#!/usr/bin/perl -w # cd /home/xiangyang/Desktop/P.stutzeri_genome/Blast_A1501/split/FASTA/ # perl /home/xiangyang/Desktop/P.stutzeri_genome/Blast_A1501/process.pl GCF_000282375.1_Pseudomonas.strGM50_v1.0.FASTA GCF_000474765.1_CMAA1215_1.0.FASTA use strict; my %HoH = (); # my %seqid_HoH = (); # my %seqid_count_hash = (); # key:seqid, val:count my $term = $/; # input record separator my $space = "\t"; # spacer for aligned print my $nchar = 0; # nchar for phyml header. my $nseq; # nseq for phyml header. Do not initiate! my $first_name = q{}; # First name in matrix. my $fasta = 0; # Print phyml format by default my $man = 0; # Manual my $help = 0; # Help my $dontprint = 0; # Do not print the concatenation my $relaxed_phylip = 0; # Print relaxed phylip format my $lwidth = 60; # default line width for fasta my %nseq_hash = (); # key:infile, val:nseq my @hash_ref_array = (); # array with hash references my $nfiles = 0; # count number of files my $verbose = 0; # Verbose #--------------------------------------------------------------------------- # Read all infiles to count sequences #--------------------------------------------------------------------------- print STDERR "\nCecking sequences in infiles...\n" if ($verbose); foreach my $arg (@ARGV) { my $infile = $arg; my %seq_hash = parse_fasta($infile); # key: seqid, value:sequence $nfiles++; ## Save sequences in array with hash references. ## Does this work for really large number of fasta files? my $hash_ref = \%seq_hash; push(@hash_ref_array, $hash_ref); ## Add nseqs to global nseq_hash: # $nseq_hash{$infile} = scalar(keys(%seq_hash)); $nseq_hash{$infile} = keys(%seq_hash); foreach (values(%seq_hash)) { print "$_\n";} } sub parse_fasta { my ($infile) = @_; my $term = $/; # input record separator; my %seq_hash = (); # key:seqid, val:seq open my $INFILE, "<", $infile or die "could not open infile '$infile' : $! \n"; $/ = ">"; while(<$INFILE>) { chomp; next if($_ eq ''); my ($id, @sequencelines) = split /\n/; foreach my $line (@sequencelines) { $seq_hash{$id} .= $line; } } $/ = $term; return(%seq_hash); } # end of parse_fasta