use strict;
use warnings;
use File::Basename qw<basename dirname>;
use FindBin;
use File::Spec;
use Getopt::Long;

# perl /home/xiangyang/genome_bacth_retrive/genome_bacth/bash_fastANI.pl $query $split_number $workplace threadnumber $tengxunyun_workplace

# perl /home/xiangyang/moxiu/TypeStrain_ANI/bash_fastANI.pl -u /home/xiangyang/moxiu/upload_dir/file -s 20 -m 30 -e 397310815@qq.com -d /home/ubuntu/moxiu/workplace

my %options = (
    'upload_file'                                  => undef,  
    'spilt_number'                                 => "4",
    'number_threads'                               => "3",
    'email'                                        => '397310815@qq.com',
    'type_analysis'                                => undef,
    'Tengxunyun_dir'                               => undef,
    'help'                                         => undef
);
 
GetOptions(
    'u|upload_file=s'                              => \$options{upload_file},    
    's|spilt_number=i'                             => \$options{spilt_number},     
    'm|number_threads=s'                           => \$options{number_threads}, 
    'e|email=s'                                    => \$options{email}, 
    't|type_analysis=s'                            => \$options{type_analysis},
    'd|Tengxunyun_dir=s'                           => \$options{Tengxunyun_dir},
    'h|help'                                       => \$options{help}

);

my $now_time = localtime;
$now_time =~ s/ |\:/_/g;

my $home_directory = $FindBin::Bin;

my $f = $options{upload_file};  #"/home/xiangyang/moxiu/upload_dir/$file" is sent to Dell from Tengxunyun that is the final file from vue but the name is auto-modified 
my $upload_dir = dirname $f; #"/home/xiangyang/moxiu/upload_dir"
my $fn = basename $f;

my $getunloadfile__website = "https://www.microbialgenomic.cn:8101/getFile?fileName=$fn";

system("wget -c -O $f $getunloadfile__website --no-check-certificate");

my $spilt_number = $options{spilt_number};  

my $number_threads = $options{number_threads};
my $email = $options{email};

my $workplace = dirname($home_directory)."/workplace_$fn";
mkdir $workplace;

my $typestrain_dir =  "$home_directory/Typestrain_SEQ";
chdir $typestrain_dir;

my $typestrain_list = "$workplace/typestrain_list";
system ("ls > $typestrain_list");
my $query_list = "$workplace/query_list";
open (LIST, ">$query_list");

my $type = $options{type_analysis};
my $tengxunyun_dir = $options{Tengxunyun_dir};


##
#judge the type for $query
#loadup input as sequence used in 16S rRNA gene analysis by blastn

#loadup input as file: 1) each genome must be a single file, for multiple genomes, a compressed folder containg multiple files were allowed;
#                      2) genome using assembly number, each row represents one genome

#situation1

##to easy delete the file, the unloaded file or decompressed folder (unloaded compressed file) were added ".TyprStain_ANI.txt" ad the end

    chdir $upload_dir;
    my $filename = $fn;
    #situation1 .gz .tar.gz .zip .rar
    if ($fn =~ /.gz$|.tar.gz$|.tar.bz$|.tar.bz2$|.bz$|.bz2$|.zip$|.rar$|.7z$/){
        if ($fn =~ /.tar.gz$|.tar.bz$|.tar.bz2$/){
            $filename =~ s/.tar.gz$|.tar.bz\d*$//;
            mkdir "$upload_dir/$filename.TyprStain_ANI.txt";
            system("tar -xf $f -C $upload_dir/$filename.TyprStain_ANI.txt"); 

        }elsif($fn =~ /.gz$/){   #can only compress single file
            system("gzip -dk $f");
            $filename =~ s/.gz$//;
            system("mv $upload_dir/$filename $upload_dir/$filename.TyprStain_ANI.txt");

        }elsif($fn =~ /.bz$|.bz2$/){ #can only compress single file
            system("bzip2 -dk $f");
            $filename =~ s/.bz\d*$//;
            system("mv $upload_dir/$filename $upload_dir/$filename.TyprStain_ANI.txt");

        }elsif($fn =~ /.zip$/){
            $filename =~ s/.zip$//;
            mkdir "$upload_dir/$filename.TyprStain_ANI.txt";
            system("unzip -j $f -d $upload_dir/$filename.TyprStain_ANI.txt");

        }elsif($fn =~ /.rar$/){
            $filename =~ s/.rar$//;
            mkdir "$upload_dir/$filename.TyprStain_ANI.txt";
            system("unrar e $f $upload_dir/$filename.TyprStain_ANI.txt"); #sudo apt-get install rar

        }elsif($fn =~ /.7z$/){
            $filename =~ s/.7z$//;
            mkdir "$upload_dir/$filename.TyprStain_ANI.txt";
            system("7z e $f -r -o$upload_dir/$filename.TyprStain_ANI.txt");  #sudo apt-get install p7zip-full
            #system("mv -rf $f");  #delete

        }

        if (-d "$upload_dir/$filename.TyprStain_ANI.txt"){
            foreach my $sf (glob "$upload_dir/$filename.TyprStain_ANI.txt/*"){
                if (-d $sf){
                    foreach my $sf2 (glob "$sf/*"){  
                    #1)folder containing files were compressed; 2)multip files were directly compressed #obtain the file in subfolder

                        print LIST "$sf2\n" if -f $sf2;    #File::Spec->rel2abs($options{file_attachment});
                    }
                }else{
                    print LIST "$sf\n" if -f $sf;    #File::Spec->rel2abs($options{file_attachment});
                }
            }
        }else{
            print LIST "$upload_dir/$filename.TyprStain_ANI.txt\n";

        }
        
    #situation2 uploaded file
    }else{
        system("cp $f $f.TyprStain_ANI.txt");
        print LIST "$f.TyprStain_ANI.txt\n";
    }
   
    close LIST;


#store ftpsite inf


my $assem_ftpsite = "$home_directory/assembly_FTPsite.txt";
my %assem_hash;
open (ASS, $assem_ftpsite);
while(<ASS>){
    chomp $_;
    my ($assem_simple, $assem_full, $genme_name, $ftpsite) = split /\t/, $_;
    $assem_hash{$assem_simple} = $ftpsite;
    $assem_hash{$assem_full} = $ftpsite;
}



#### download the genome sequneces as fasta format in genomedownload_dir if the file uploaded containing assembly number information
#
open (LIST, $query_list);

#print chomp <LIST>, "\n";
my @arr = <LIST>;
my $fe = $arr[0];
$fe =~ s/\r\n//g;

if (scalar @arr == 1){
    open (F, $fe);
    my @f = <F>;
   
    if ( ($f[0] =~ /^>/) or ($f[0] =~ /^[AaTtCcGg][AaTtCcGg][AaTtCcGg][AaTtCcGg][AaTtCcGg][AaTtCcGg][AaTtCcGg][AaTtCcGg][AaTtCcGg][AaTtCcGg][AaTtCcGg][AaTtCcGg][AaTtCcGg][AaTtCcGg][AaTtCcGg]/) ){
	 #input file is fasta sequence
    }else{
        #We have kwnown the file uploaded containing assembly number information
        close LIST;
        my $genomedownload = "$workplace/genomedownload.sh";
        open (SH, ">$genomedownload");

        my $genome_error = "$workplace/genome_error.txt";
        open (GE, ">$genome_error");
        
        my $genomedownload_dir = "$workplace/genomedownload_dir"; 
        mkdir $genomedownload_dir;   
        chdir $genomedownload_dir; 
        system ("rm -rf *");
        foreach my $assembly_no(@f){
            chomp $assembly_no;
            next unless $assembly_no ne "";
            print SH "wget -nc $assem_hash{$assembly_no};\n" if defined $assem_hash{$assembly_no};
            my $tf = $assem_hash{$assembly_no} if defined $assem_hash{$assembly_no};
            $tf =~ s/.*\///g if defined $assem_hash{$assembly_no};
            print SH "gzip -d $tf;\n" if defined $assem_hash{$assembly_no};
            print GE "$assembly_no\n" if !defined $assem_hash{$assembly_no};
                       
        }
        close SH;
        close GE;
        system ("sh $genomedownload");
        open (LIST, ">$query_list"); #rewrite the $query_list when the file uploaded containing assembly number information
        foreach my $gs (glob "$genomedownload_dir/*"){
            print LIST "$gs\n";
        }
        close LIST;
    } 

}
#



chdir $typestrain_dir;
my $split_listdir = "$workplace/split_listdir";
mkdir $split_listdir;
system ("rm -rf $split_listdir/*");

system ("bash $home_directory/FastANI-1.33/scripts/splitDatabase.sh $typestrain_list $spilt_number $split_listdir");

my $ani_outdir = "$workplace/ani_outdir";
mkdir $ani_outdir;
system ("rm -rf $ani_outdir/*");

opendir(SL, $split_listdir);
my @sp = readdir SL;
@sp = sort grep ($_ !~ /^\./, @sp);
closedir @sp;

use Parallel::Runner;
my $runner = Parallel::Runner->new(6);
for (my $i=0; $i<$spilt_number; $i++) {
    my $sub_list = "$split_listdir/$sp[$i]";
    $runner->run(
	sub{
            system ("$home_directory/FastANI-1.33/fastANI --ql $query_list --rl $sub_list -o $ani_outdir/ani_out.$sp[$i] -t $number_threads --fragLen 3000");
        }
    )#run end

}
$runner->finish;


system ("cat $ani_outdir/* > $workplace/ANI_results0.txt");
my $ani = "$workplace/ANI_results0.txt";

my $out_result = "$workplace/out_result";
mkdir $out_result;
chdir $workplace;
my $aniout1 = "$out_result/ANI_result_details.txt";
my $aniout2 = "$out_result/ANI_result_above95.txt";
my $aniout3 = "$out_result/ANI_result_top.txt";

my $Typestrain_fn = "$home_directory/Typestrain_assembly_FTPsite.txt";
my %fn;
open(FN, $Typestrain_fn);
while(<FN>){
    chomp;
    my ($short_assemble, $full_assemble, $fname, $ftp_site) = split /\t/, $_;
    $fn{$short_assemble} = $fname;
}

&ani_copy($ani, $aniout1, $aniout2, $aniout3, \%fn);

system ("cp $Typestrain_fn -t $out_result");
system ("zip -r out_result.zip out_result");
#############################################################
#system ("perl $home_directory/sendEmail.pl -f $out_result.zip -t $email");
system("rm -rf $upload_dir/*.TyprStain_ANI.txt"); #delete generated files during runing bash_fastANI.pl
system("rm -rf $f");  #delete the source file
system("sshpass -p 'Lxy_13296570632' scp -P 13187 $out_result.zip 'ubuntu\@115.236.153.172':$tengxunyun_dir");
#system("sshpass -p 'Lxy_13296570632' scp $out_result.zip 'ubuntu\@124.223.154.39':$tengxunyun_dir");

sub ani_copy {
    my ($i, $o1, $o2, $o3, $ref_h) = @_;
    my $header = "Query_genome\tType_strain\tANI value\tIdentity number of fragment\tTotal number of fragment\n";
    my %f = %$ref_h;
    open (F, $i);
    my %input;
    while(<F>){
        chomp;
	$_ =~ s/^.*\/(.*?)(_genomic.fna)?\t(.*?)\t/$1\t$f{$3}\t/;
        push @{$input{(split /\t/, $_)[0]}}, $_;
    }
    close F;

    open (O_1, ">$o1");
    open (O_2, ">$o2");
    open (O_3, ">$o3");
    print O_1 "Query_number\t$header";
    print O_2 "Query_number\t$header";
    print O_3 "Query_number\t$header";

    my $q=0;
    foreach(sort keys %input){
        $q++;
        @{$input{$_}} = sort { (split(/\t/, $b))[2] <=> (split(/\t/, $a))[2] } @{$input{$_}};

        for (my $h = 0; $h < scalar @{$input{$_}}; ++$h){
            print O_1 "$q\t", ${$input{$_}}[$h], "\n";
            print O_2 "$q\t", ${$input{$_}}[$h], "\n" if ( ($h == 0) or (split(/\t/, ${$input{$_}}[$h]))[2] >= 95 );
            print O_3 "$q\t", ${$input{$_}}[$h], "\n" if $h == 0;
        }
    }
    close O_1;
    close O_2;
}
