#!/usr/bin/perl -w my $usage="Usage: $0 [-e] fastaFile1 fastaFile2\n" . " -e: make sure the 2 files contains the sequences in Exact same order\n". "With -e, sequences have to be in exactly the same order and has " . "identical names in the two files. Without -e, it joins the sequence " . "whose name appears in both files and ignore the rest." . "Order in the 1st file is preserved in this case"; my $sep = "\t"; use Getopt::Std; getopts('e') || die "$usage\n"; @ARGV = ('-') unless @ARGV; @maxLenVect = (); while ($argv = shift) { my @dat = ReadInFASTA($argv); my $len = MaxSeqLen(@dat); push @maxLenVect, $len; } print join " ", @maxLenVect; print "\n"; exit(0); # takes an arg; name of a file from which data are read Then read in # the data and make an array. Each element of this array corresponds # to a sequence, name tab data. sub ReadInFASTA { my $infile = shift; my @line; my $i = -1; my @result = (); my @seqName = (); my @seqDat = (); open (INFILE, "<$infile") || die "Can't open $infile\n"; while () { chomp; if (/^>/) { # name line in fasta format $i++; s/^>\s*//; s/^\s+//; s/\s+$//; $seqName[$i] = $_; $seqDat[$i] = ""; } else { s/^\s+//; s/\s+$//; s/\s+//g; # get rid of any spaces next if (/^$/); # skip empty line s/[uU]/T/g; # change U to T $seqDat[$i] = $seqDat[$i] . uc($_); } # checking no occurence of internal separator $sep. die ("ERROR: \"$sep\" is an internal separator. Line $. of " . "the input FASTA file contains this charcter. Make sure this " . "separator character is not used in your data file or modify " . "variable \$sep in this script to some other character.\n") if (/$sep/); } close(INFILE); foreach my $i (0..$#seqName) { $result[$i] = $seqName[$i] . $sep . $seqDat[$i]; } return (@result); } sub GetSeqDat { my @data = @_; my @line; my @result = (); foreach my $i (@data) { @line = split (/$sep/, $i); push @result, $line[1]; } return (@result) } sub GetSeqName { my @data = @_; my @line; my @result = (); foreach my $i (@data) { @line = split (/$sep/, $i); push @result, $line[0]; } return (@result) } sub CharLen { my $string = shift; my @charString = split (//, $string); return scalar(@charString); } sub MaxSeqLen { my @data = GetSeqDat(@_); my $maxLen = 0; foreach $i (@data) { my $len = CharLen($i); $maxLen = $len if ($len > $maxLen); } return ($maxLen); }