use strict;
use warnings;
use Getopt::Long;
use Bio::SeqIO;


# cd '/home/xiangyang/assembly_file/modified_fasta'
# perl /home/xiangyang/assembly_file/trim_comtamination.pl NJXKYY24.genome.fasta NJXKYY24.genome.fasta.txt > /home/xiangyang/assembly_file/final/NJXKYY24.genome.fasta


my $input=$ARGV[0];
my $error=$ARGV[1];

    my %seq_hash     = parse_fasta($input); # key: seqid, value:sequence
    open(ERROR, $error);

    my @info;
    my @info_local; 
    my $contig_num=0;
    my @detele_id;
    while (<ERROR>){

        $contig_num++;
        @info =    split '\t', $_;
    
        @info_local = split '\.\.', $info[2];
   
    
        foreach my $key (keys %seq_hash) {
        
            if ($key eq $info[0]) {
                push (@detele_id, $key);

                my $array_contig = $seq_hash{$key};  # my @array = $seq_hash{$key} =~ /./sg;
            
                my $array_contig_1 = substr($array_contig,0,$info_local[0]-1);
                my $array_contig_2 = substr($array_contig,$info_local[1], $info[1]-$info_local[1]);
                
                $seq_hash {$info[0]."_1"} = $array_contig_1;
                $seq_hash {$info[0]."_2"} = $array_contig_2;
                
                #print ">$info[0]","_1","\n",$array_contig_1,"\n";
                #print ">$info[0]","_2","\n",$array_contig_2,"\n";
            }

        }

    }


        my %new_key_hash;
        foreach my $keyy (keys %seq_hash) {
            
            my $check=0;
            for (my $i=0; $i < scalar @detele_id; $i++) {
 
                if ($keyy eq $detele_id[$i]) {                    
                   
                     $check++;
                } 
                    
            }
 
            if ($check == 0) {
            
                $new_key_hash{$keyy} = $keyy;}
                
        
        }


        foreach (sort keys %new_key_hash) {
 
            if (length $seq_hash{$_} > 200) {

                print ">$_\n$seq_hash{$_}\n";
            }
        }

sub parse_fasta {

    my ($infile) = @_;
    my $term     = $/; # input record separator;
    my %seq_hash = (); # key:seqid, val:seq
    open my $INFILE, $infile or die "could not open infile '$infile' : $! \n"; 
    $/ = ">"; # input record separator;
    while(<$INFILE>) {

        chomp;

        next if($_ eq '');
        my ($id, @sequencelines) = split /\n/;
        $id=~s/\s.*//g;                         #使每个文件的ID变为一致 >lcl|3839_Contig1 Citrobacter freundii strain NJGLYY3839-CR 
        $id=~s/lcl\|//g;
   #     $id=~s/\]$//g;
   #     my $idd=">".$id;                                   #使每个文件的ID变为一致
        foreach my $line (@sequencelines) {
            $seq_hash{$id} .= $line;
                                          }
                     }
    $/ = $term;
    return(%seq_hash);

                } # end of parse_fasta
