#!/usr/bin/perl -w # cd /home/xiangyang/Desktop/P.stutzeri_genome/Blast_A1501 # perl /home/xiangyang/Desktop/P.stutzeri_genome/Blast_A1501/perl_bin/hash_extract.pl /home/xiangyang/Desktop/P.stutzeri_genome/Blast_A1501/3793_genome.shared 3780 /home/xiangyang/Desktop/P.stutzeri_genome/PROTEIN_1-3793 use strict; use Data::Dumper; use List::Compare; my $path1="/home/xiangyang/Desktop/P.stutzeri_genome/Blast_A1501"; my $path2=$ARGV[2]; my $path3; my $path4; my $output_2; ########################################################################################################################################## ########################################################################################################################################## #####################################################Create a new excel file############################################################## use Spreadsheet::WriteExcel; use Spreadsheet::ParseExcel; use Bio::DB::Fasta; my $workbook = Spreadsheet::WriteExcel->new('core_cluster.xls'); my $worksheet = $workbook->add_worksheet("sheet1"); my $infile=$ARGV[0]; my $Genome_number=$ARGV[1]; open (INFILE, $infile); my @array; my $count=0; my $vol=0; while (){ chomp; my $line=$_; $line=~s/XD38_(.+);\[Pseudomonas_sp._63_8\];\[GCA_001507875.1_ASM150787v1\]//g; @array=split " ", $line; $count++; if (scalar @array eq $Genome_number){ $vol++; for (my $i=0; $i<$Genome_number; $i++){ $worksheet->write($i,$vol-1,"$array[$i]"); } } } $workbook->close(); # print "$vol\n"; ################################Split seperate list file from excel################################################################# my $parser = Spreadsheet::ParseExcel->new(); my $workbook1 = $parser->parse('core_cluster.xls'); if ( !defined $workbook1 ) { die $parser->error(), ".\n"; } for my $worksheet1 ( $workbook1->worksheets() ) { for (my $i=0; $i<$vol; $i++) { my $Value; mkdir "$path1/split/"; my $name=$worksheet1->get_cell(0, $i )->value(); $name=~s/;(.+)//g; # file_name $name=~s/\]$//g; my $output="$path1/split/$name"; open(OUTPUT, ">$output"); for (my $row=0; $row<$Genome_number; $row++) { my $cell = $worksheet1->get_cell($row, $i ); $Value=$cell->value(); print OUTPUT "$Value\n"; } } }system ("cat $path1/split/* > $path1/filted_sequence.tablelist1111111111111111111111111111111111111"); $path3="$path1/split"; opendir DIRR, $path3 or die $!; my @to_be_compared=(); my @dir3 = readdir DIRR; closedir DIRR; foreach my $file3(@dir3){ my $input="$path3/$file3"; my @file4=(); open IN,"$input" or die $!; while () { my $name=$_; $name=~s/(.+);\[//g; $name=~s/\]$//g; push (@file4,$name); } print $file3, "--", scalar @file4; print "\n"; if (scalar @file4 >0) {push (@to_be_compared, \@file4)}; # print "@file3\n"; } my $lc = List::Compare->new({ lists => \@to_be_compared} ); my @intersection1 = $lc->get_intersection; # my @intersection1 = get_intersection(\@to_be_compared); print scalar(@intersection1)."\n"; # print (join ",", @intersection1); # print "\n"; ######################################################################################################################################## my %file31; opendir DIRRR, $path3 or die $!; my @dir31 = readdir DIRRR; closedir DIRRR; mkdir "$path3/MODIFY/"; foreach my $file33(@dir31){ my $inputt="$path3/$file33"; open INN,"$inputt" or die $!; while () { my $namee=$_; $namee=~s/(.+);\[//g; $namee=~s/\]$//g; $file31{$namee}=$_; } my $output_1="$path3/MODIFY/$file33.MODIFY"; open(OUTPUT_1, ">$output_1"); foreach my $iii(@intersection1){ if (exists $file31{$iii}){ my $valueee=$file31{$iii}; print OUTPUT_1 "$valueee"; } } } # print %filesystem ("cat $path3/MODIFY/*.MODIFY > $path1/filted_sequence.tablelist"); $output_2="$path1/filted_sequence.fasta"; open(OUTPUT_2, ">$output_2"); my $plus=0; opendir DIR, $path2 or die $!; my @dir1 = readdir DIR; closedir DIR; foreach my $file1(@dir1){ # 分布例遍每个参考序列文件 my $inputtt="$path2/$file1"; my %data; open FASTA, $inputtt or die $!; my @array=; my $length= scalar @array; for (my $i=0; $i<$length; $i=$i+2) { $plus++; my $key=$array[$i]; # my $value=$plus."_".$array[$i+1]; my $value=$array[$i+1]; $data{$key}=$value; } # print Dumper(\%data); # 打印这个哈希 open TABLE,"$path1/filted_sequence.tablelist" or die $!; while () { my $header=">".$_; if (exists $data{$header}){ my $value=$data{$header}; print OUTPUT_2 "$header$value"; } } } # 分布例遍每个参考序列文件 close OUTPUT_2; ########################################################################################################################################## ########################################################################################################################################## ################################Extract sequence according to list################################################### my %final; open FAS, $output_2 or die $!; my @arrayy=; my $length2= scalar @arrayy; for (my $k=0; $k<$length2; $k=$k+2) { my $keyy=$arrayy[$k]; my $valuee=$arrayy[$k+1]; $final{$keyy}=$valuee; } $path4="$path3/MODIFY/"; opendir DIRRRR, $path4 or die $!; my @dir4 = readdir DIRRRR; closedir DIRRRR; foreach my $file4(@dir4){ my $inputttt="$path4/$file4"; mkdir "$path4/MODIFY/"; my $output_3="$path4/MODIFY/$file4.FASTA"; open(OUTPUT_3, ">$output_3"); open INNN,"$inputttt" or die $!; while () { my $headerr=">".$_; if (exists $final{$headerr}){ my $valuee=$final{$headerr}; print OUTPUT_3 "$headerr$valuee"; } } } ##############################################################################################################