#!/usr/bin/perl -w

use strict;
use warnings;

# perl /home/xiangyang/Desktop/assembly/delete_contatament.pl /home/xiangyang/Desktop/assembly/workplace_test/final_contig

my $file_dir = $ARGV[0];
my $list = $ARGV[1];

&trans($file_dir, $list);

sub trans {
    my ($f, $l) = @_;

    opendir(F, $f);
    my @f = readdir F;
    @f = grep($_ !~ /^\./, @f);
    close F;

用法： substr(string, index, length, replacement)

参数: 

    string:从中提取子字符串的字符串
    index:子字符串的起始索引
    length:子串的长度
    replacement: 替换子字符串(如果有)


    open (L, $l);

    foreach my $fe(@f){
        my %h = parse_fasta("$f/$fe");
        my $of = "$f/$fe.fasta";
        open(O, ">$of");

        foreach (keys %h){
            my $id = $_;
            $id =~ s/ .*//g;
            my $D = print_sequence_into_file($h{$_}, 60);  # print_sequence_into_file($dna, 70);
            print O ">$id\n$D\n";
            push @fe, length($h{$_});
            $len +=length($h{$_});
        }
        @fe = sort{$a<=>$b}@fe;
        my $N50;
        my $a;
        foreach (@fe){
            $a += $_;  
            $N50 = $_ if $a >= int($len/2); 
            last if $a >= int($len/2);
        }
        my $contig_num = scalar @fe;
   
        print R "$fe\t$len\t$contig_num\t$fe[0]\t$fe[-1]\t$N50\n";

        close O;
    }
    close R;
}


sub parse_fasta {

    my ($infile) = @_;
    my $term     = $/; # input record separator;
    my %seq_hash = (); # key:seqid, val:seq
    open my $INFILE, $infile or die "could not open infile '$infile' : $! \n"; 
    $/ = ">"; # input record separator;
    while(<$INFILE>) {
        chomp;
        next if($_ eq '');
        my ($id, @sequencelines) = split /\n/;
        foreach my $line (@sequencelines) {
            $seq_hash{$id} .= $line;
        }
    }
    $/ = $term;
    return(%seq_hash);

} # end of parse_fasta


# print $sequence into $FILE with $length character per line
sub print_sequence_into_file{   

    my($sequence, $length) = @_;
    my $DNA;
    my $string;
    # Print sequence in lines of $length
    for (my $pos = 0; $pos < length($sequence); $pos += $length){   
        $DNA=substr($sequence, $pos, $length)."\n";
        $string.=$DNA;        
    }

    return $string;

}