#perl /home/xiangyang/Desktop/tool_dev/snp计算工具/snp.pl /home/xiangyang/Desktop/tool_dev/snp计算工具/oxa_g6.txt /home/xiangyang/Desktop/tool_dev/snp计算工具/core_gene_alignment_SNP_pre.phy SNP_oxa_g6

#perl /home/xiangyang/Desktop/web_tool/tools/snp计算工具/snp.pl /home/xiangyang/Desktop/web_tool/tools/snp计算工具/snp.txt /home/xiangyang/Desktop/web_tool/tools/snp计算工具/core_gene_alignment_SNP.phy /home/xiangyang/Desktop/web_tool/tools/snp计算工具/SNP_out2

use strict;
use warnings;
use FindBin;

my $home_directory = $FindBin::Bin;


my $strain_group_tab = $ARGV[0];
my $core_snp_file = $ARGV[1];
my $snp_dir = "$ARGV[2]";
mkdir $snp_dir;
#print "$snp_dir\n";
open (ID, $strain_group_tab);
my %h;
my %snp;
my @all;
while(<ID>){
    chomp;
    my ($strain, $st) = split /\t/, $_;
    push @{$h{$st}}, $strain;
    push @all, $strain;
}
my $sample_num = scalar @all;

my @d = sort keys %h;
for(my $m=0; $m < scalar @d; $m++){
    for(my $n=0; $n < scalar @d; $n++){
        if ($n>=$m){
            my $dir_n = $d[$m]."_".$d[$n];
            mkdir "$snp_dir/$dir_n";
            print "$dir_n\n";
        }
    }
}


my %s = parse_fasta($core_snp_file);
#print  join("", @all), "\n";
my ($fn_left, $fn_right);
my $count=0;
for(my $i=0; $i<$sample_num; $i++){
    foreach (keys %h){
        $fn_left = $_ if grep($_ eq $all[$i], @{$h{$_}});
    }
    #$fn_left = "Novel" if grep($_ eq $all[$i], @{$h{"Novel"}});
    #$fn_left = "1968" if grep($_ eq $all[$i], @{$h{"1968"}});
    #$fn_left = "1806" if grep($_ eq $all[$i], @{$h{"1806"}});
    #$fn_left = "3243" if grep($_ eq $all[$i], @{$h{"3243"}});
    #$fn_left = "1" if grep($_ eq $all[$i], @{$h{"1"}});
    #$fn_left = "2" if grep($_ eq $all[$i], @{$h{"2"}});
    #$fn_left = "3" if grep($_ eq $all[$i], @{$h{"3"}});
    next if $i>$sample_num - 1;
    for(my $k=0; $k<$sample_num; $k++){
        next if $k>$sample_num - 1;
        $count++;
        foreach (keys %h){
            $fn_right = $_ if grep($_ eq $all[$k], @{$h{$_}});
        }
        #$fn_right = "Novel" if grep($_ eq $all[$k], @{$h{"Novel"}});
        #$fn_right = "1968" if grep($_ eq $all[$k], @{$h{"1968"}});
        #$fn_right = "1806" if grep($_ eq $all[$k], @{$h{"1806"}});
        #$fn_right = "3243" if grep($_ eq $all[$k], @{$h{"3243"}});
        #$fn_right = "1" if grep($_ eq $all[$k], @{$h{"1"}});
        #$fn_right = "2" if grep($_ eq $all[$k], @{$h{"2"}});
        #$fn_right = "3" if grep($_ eq $all[$k], @{$h{"3"}});
        my $sub_fn = $fn_left."_".$fn_right;
        my $fhs;
        if (-d "$snp_dir/$sub_fn" ){
            open my $fh, ">$snp_dir/$sub_fn/$all[$i].$all[$k].$count";
            $fhs->{$count} = $fh; 
            next unless $fh;
            print { $fhs->{$count} } ">$all[$i]\n$s{$all[$i]}\n";
            print { $fhs->{$count} } ">$all[$k]\n$s{$all[$k]}\n";
            system("snp-sites -p -o $snp_dir/$sub_fn/snp_$count $snp_dir/$sub_fn/$all[$i].$all[$k].$count 2>/dev/null");
            system("rm -rf $snp_dir/$sub_fn/$all[$i].$all[$k].$count");
            
            if ( (-e "$snp_dir/$sub_fn/snp_$count") && ($i ne $k) ){
                open (SNP, "$snp_dir/$sub_fn/snp_$count");  
                my @snp = <SNP>;
                my $num = $snp[0];
                chomp $num;
                $num =~ s/.* //;
                push @{$snp{$sub_fn}}, $num;
                #print "$sub_fn\t$num\n";                
            }else{
                push @{$snp{$sub_fn}}, 0 if $i ne $k;
            }
            
        }
        
    }

}

open (O, ">$snp_dir/snp_out_ST.txt");
print O "ST.vs.ST\tSNP median number\tSNP(Min-Max)\tSNP(all)\n";
foreach (sort keys %snp){
    my @s = @{$snp{$_}};
    @s = sort{$a<=>$b}@s;
    my $md = median(@s);
    my $min = $s[0];
    my $max = $s[-1];
    print O "$_\t$md\t$min-$max\t", join(" ", @s), "\n";

}
close O;


sub median {
    my @array = @_;
    @array = sort { $a <=> $b } @array;  # 排序数组
    my $length = @array;                # 数组长度
    if ($length % 2) {                  # 如果长度是奇数，直接返回中间的元素
        return $array[int($length / 2)];
    } else {                             # 如果长度是偶数，返回中间元素的平均值
        return ($array[int($length / 2 - 1)] + $array[int($length / 2)]) / 2;
    }
}


sub parse_fasta {

    my ($infile) = @_;
    my $term     = $/; # input record separator;
    my %seq_hash = (); # key:seqid, val:seq
    open my $INFILE, $infile or die "could not open infile '$infile' : $! \n"; 
    $/ = ">"; # input record separator;
    while(<$INFILE>) {
        chomp;
        next if($_ eq '');
        my ($id, @sequencelines) = split /\n/;
        foreach my $line (@sequencelines) {
            $seq_hash{$id} .= $line;
        }
    }
    $/ = $term;
    return(%seq_hash);

} # end of parse_fasta

