use strict;
use warnings;
use Array::Utils qw(:all);
use File::Basename qw<basename dirname>;



# perl aspera_gono.pl $dn_dir $list $list_new
# perl aspera_gono.pl /media/xiangyang/My_Passport/ncbidb_11 /home/xiangyang/ncbidb/list_split/0 /home/xiangyang/ncbidb/list_split/0_1
my $dn_dir = $ARGV[0];
my $list = $ARGV[1];
my $list_new = $ARGV[2];

my %h;
open(LIST, $list);
while(<LIST>){
    chomp;
    my $gn = $_;
    $gn =~ s/.*\///g;
    $h{$gn} = $_;
}
close LIST;

my $dn_list = dirname($list)."/dn_list.txt";
chdir $dn_dir;
#system("find . -type f -name *genomic.gbff.gz | xargs ls > $dn_list");

my %old = obtain_file_hash($list);
my @old = keys %old;
chdir $dn_dir;
opendir(DN_DIR, $dn_dir);
my @new = readdir DN_DIR;
@new = grep($_ =~ /genomic.gbff.gz$/, @new);
closedir DN_DIR;

my @diff = array_diff(@old, @new);
my @share = intersect(@old, @new);
my @old_unique = array_diff(@old, @share);
my @new_unique = array_diff(@new, @share);
print "    --->diffent genomes number: ", scalar @diff, "\n", "    --->shared genomes number: ", scalar @share, "\n", "    --->unique genomes number in old version: ", scalar @old_unique, "\n", "    --->unique genomes number in new version: ", scalar @new_unique, "\n";
#print join("\n", @diff), "\n", scalar @diff, "\n";

open(LIST_NEW, ">$list_new");
my @list_new;
foreach(@old_unique){
    push @list_new, $old{$_};
}
print LIST_NEW join("\n", @list_new);
%old = ();



sub obtain_file_hash {
    my $sub_txt = shift;

    my $total_row = `wc -l $sub_txt`;
    chomp $total_row;
    $total_row =~ s/ .*//;
    print "    --->total rows in $sub_txt:  $total_row\n";
    my %fhash;
    my $c=0;
    open(my $fh, '<', "$sub_txt") or die "Cannot open file: $!";
    print "    --->";
    while(<$fh>){
        chomp;
        my $progress_record = int (($c/$total_row)*100);
        $c++;

        my $gn = $_;
        $gn =~ s/.*\///g;
        $fhash{$gn} = $_;

        my $do_percent = int (($c/$total_row)*100);
        print "$do_percent%","..." if ($c == 1 or ( ($do_percent%10 ==0) && ($progress_record <$do_percent)) );
    }
    print "done\n";
    close $fh;
    return %fhash;
}

