#!/usr/bin/perl -w 
#This will take as input user parameter file and convert it to Simon style parameter file
#use strict;
use Getopt::Long;
use File::Basename;

my %opts = ();

        # Default Options

Getopt::Long::GetOptions(\%opts,qw(
        parfile=s
)) || die "Failed to parse options\n\n";

if ( (!defined $opts{parfile}))
{
        &usage();
        die "Failed to parse parameters\n\n";
}

&printPar();

sub usage
{
        print "\n";
        print "take as input user parameter file and remove temporary files\n";
        print "Usage:\n\t";
        print "-parfile\t parameter file \n\t";
        print "\n";
}

sub printPar
{
        print "\n";
        print "Parameters in Effect:\n";
        print "\t parameter file \n\t\t-parfile '$opts{parfile}'\n";
        print "\n";
}

my $paramfile = $opts{parfile};
$outparamfile = "$paramfile.Hapmix.out";

open(PARAMFILE,$paramfile) ||die("File $paramfile not found");

@rowp = <PARAMFILE>;
for($i = 0; $i < scalar @rowp; $i++)
  {
    chomp($rowp[$i]);
    if($rowp[$i] =~ /#/)
      {
	next;
      }
    @data = split(':',$rowp[$i]);
    if(scalar @data ==0)
      {
	next;
      }
    if(scalar @data == 2)
      {
	@val = split(' ',$data[1]);
	if(scalar @val == 1)
	  {
	    $data[1] =~ s/\s//g;
	  }
	$paramdata{$data[0]} = $data[1];
      }
  }

my(%param_key) = ( 'GENOTYPE'=>2, 
		   'OUTPUT_SITES'=>39, 
 		   'SITE_POSITIONS'=>6, 
		   'RECOMBINATION_VALS'=>106, 
		   'MUTATION_VALS'=>107, 
		   'MISCOPYING_VALS'=>108);


my(%param_file) = ('REFPOP1GENOFILE'=>'REFPOP1GENOFILE',  
                   'REFPOP2GENOFILE'=>'REFPOP2GENOFILE',  
                   'REFPOP1SNPFILE'=>'REFPOP1SNPFILE',  
                   'REFPOP2SNPFILE'=>'REFPOP2SNPFILE',  
                   'ADMIXSNPFILE'=>'ADMIXSNPFILE',  
                   'ADMIXGENOFILE'=>'ADMIXGENOFILE',  
                   'ADMIXINDFILE'=>'ADMIXINDFILE',  
                   'REF1LABEL'=>'REF1LABEL',  
                   'REF2LABEL'=>'REF2LABEL',
		   'RATESFILE'=>'RATESFILE',
		   'CHR'=>'CHR',
		   'OUTDIR'=>'OUTDIR',
		   'THETA'=>'THETA',
		   'LAMBDA'=>'LAMBDA',
		   'HAPMIX_MODE'=>'HAPMIX_MODE',
		   'OUTPUT_DETAILS'=>'OUTPUT_DETAILS',
		   'THRESHOLD'=>'THRESHOLD',
		  'ADMIXPOP'=>'ADMIXPOP',
		  'KEEPINTFILES'=>'KEEPINTFILES'); 
 

open(OUTPARAMFILE,">$outparamfile") || die("Unable to make file $outparamfile");

#Check the values and set defaults here:
if(!(exists $paramdata{'GENOTYPE'}))
  {
    print "WARNING Setting GENOTYPE parameter to 1, since not specified\n";
    $paramdata{'GENOTYPE'} = 1;
  }
if(!(exists $paramdata{'OUTPUT_SITES'}))
  {
    print "WARNING Setting OUTPUT_SITES parameter to 0, since not specified\n";
    $paramdata{'OUTPUT_SITES'} = 0;
  }
if(!(exists $paramdata{'SITE_POSITIONS'}))
  {
    print "WARNING Setting SITE_POSITIONS parameter, since not specified\n";
    $paramdata{'SITE_POSITIONS'} = "1 1000000000";
  }
if(!(exists $paramdata{'RECOMBINATION_VALS'}))
  {
    print "ERROR Please set the parameter RECOMBINATION_VALS\n";
    exit;
  }
if(!(exists $paramdata{'MUTATION_VALS'}))
  {
    print "ERROR Please set the parameter MUTATION_VALS\n";
    exit;
  }
if(!(exists $paramdata{'MISCOPYING_VALS'}))
  {
    print "ERROR Please set the parameter MISCOPYING_VALS\n";
    exit;
  }
if(!(exists $paramdata{'REFPOP1GENOFILE'}))
  {
    print "ERROR Please specify the input file REFPOP1GENOFILE\n";
    exit;
  }
if(!(exists $paramdata{'REFPOP2GENOFILE'}))
  {
    print "ERROR Please specify the input file REFPOP2GENOFILE\n";
    exit;
  }
if(!(exists $paramdata{'REFPOP1SNPFILE'}))
  {
    print "ERROR Please specify the input file REFPOP1SNPFILE\n";
    exit;
  }
if(!(exists $paramdata{'REFPOP2SNPFILE'}))
  {
    print "ERROR Please specify the input file REFPOP2SNPFILE\n";
    exit;
  }
if(!(exists $paramdata{'ADMIXSNPFILE'}))
  {
    print "ERROR Please specify the input file ADMIXSNPFILE\n";
    exit;
  }
if(!(exists $paramdata{'ADMIXGENOFILE'}))
  {
    print "ERROR Please specify the input file ADMIXGENOFILE\n";
    exit;
  }
if(!(exists $paramdata{'ADMIXINDFILE'}))
  {
    print "ERROR Please specify the input file ADMIXINDFILE\n";
    exit;
  }
if(!(exists $paramdata{'RATESFILE'}))
  {
    print "ERROR Please specify the input file RATESFILE\n";
    exit;
  }
if(!(exists $paramdata{'CHR'}))
  {
    print "ERROR Please specify the chromosome num\n";
    exit;
  }
if(!(exists $paramdata{'OUTDIR'}))
  {
    print "ERROR Please specify the directory for output files\n";
    exit;
  }
if(!(exists $paramdata{'THETA'}))
  {
    print "ERROR Please specify the parameter THETA\n";
    exit;
  }
if(!(exists $paramdata{'LAMBDA'}))
  {
    print "ERROR Please specify the parameter LAMBDA\n";
    exit;
  }
if(!(exists $paramdata{'HAPMIX_MODE'}))
  {
    $paramdata{'HAPMIX_MODE'} = 'LOCAL_ANC';
  }
if(!(exists $paramdata{'OUTPUT_DETAILS'}))
  {
    if(($paramdata{'HAPMIX_MODE'} eq 'LOCAL_ANC') || ($paramdata{'HAPMIX_MODE'} eq 'DIPLOID') || 
       ($paramdata{'HAPMIX_MODE'} eq 'HAPLOID'))
      {
	$paramdata{'OUTPUT_DETAILS'} = 'PROB';
      }
    elsif($paramdata{'HAPMIX_MODE'} eq 'SAMPLE_RANDOM_PATHS')
      {
	$paramdata{'OUTPUT_DETAILS'} = 'ANC_INT_RANDOMPATH';
      }
}
if(!(exists $paramdata{'THRESHOLD'}))
  {
    $paramdata{'THRESHOLD'} = 0.0;
  }
if(!(exists $paramdata{'ADMIXPOP'}))
  {
    print "ERROR Please specify the parameter ADMIXPOP\n";
    exit;
  }
if(!(exists $paramdata{'REF1LABEL'}))
  {
    $paramdata{'REF1LABEL'} = $paramdata{'ADMIXPOP'}.".REF1";
  }
if(!(exists $paramdata{'REF2LABEL'}))
  {
    $paramdata{'REF2LABEL'} = $paramdata{'ADMIXPOP'}.".REF2";
  }
if(!(exists $paramdata{'KEEPINTFILES'}))
  {
    print "WARNING Setting KEEPINTFILES parameter to 0, since not specified\n";
    $paramdata{'KEEPINTFILES'} = 0;
  }

foreach $key(keys %paramdata) 
  { 
    if($key eq 'THETA')
      {
          $theta = $paramdata{$key};
	  }

    if($key eq 'LAMBDA')
      {
	
	if($paramdata{$key} <= 1)
	  {
	    print "ERROR: Please choose a valid value of lambda greater than 1\n";
	    exit;
	  }

	$lambda = ($paramdata{$key})/100;
      }
#Extract the Output format:
    if($key eq 'HAPMIX_MODE')
      {
	if($paramdata{'GENOTYPE'} ==1)
	  {
	    if($paramdata{$key} eq 'LOCAL_ANC')
	      {
		$paramout{38} = 5;
	      }
	    elsif($paramdata{$key} eq 'SAMPLE_RANDOM_PATHS')
	      {
		$paramout{38} =2 ;
	      }
	    elsif($paramdata{$key} eq 'DIPLOID')
	      {
		$paramout{38} = 1;
	      }
	    elsif($paramdata{$key} eq 'HAPLOID')
	      {
		print "ERROR: Cannot have Haploid output mode for genotype run, please change one of them\n";
		exit;
	      }
	  }
	elsif($paramdata{'GENOTYPE'} ==0) 
          { 
            if(($paramdata{$key} eq 'LOCAL_ANC') || ($paramdata{$key} eq 'SAMPLE_RANDOM_PATHS') || ($paramdata{$key} eq 'DIPLOID')) 
              { 
		print "ERROR: Cannot have the mode specified for haplotype run, please change one of them\n"; 
                exit; 

              } 
	  }
      }
	
#Chromsome 
    if($key eq 'CHR')
      {
	if($paramdata{$key} eq 'X')
	  {
	    $paramdata{$key} = 23;
	  }
      }

    if(exists $param_key{$key})
      {
	$paramout{$param_key{$key}} = $paramdata{$key};
      }
    elsif(exists $param_file{$key})
      {
	next;
      }
    else
      {
	print "Unknown parameter please check ",$key,"\n";
      }
    
  }

foreach $key(sort keys %paramout)
  {
    print OUTPARAMFILE ":",$key,":",$paramout{$key},"\n";
  }

print OUTPARAMFILE ":104:",$lambda*(1.0-$theta)," ",$lambda*(1.0-$theta),"\n";
print OUTPARAMFILE ":105:",$lambda*$theta," ",$lambda*$theta,"\n"; 

#Hard coded values depending on genotype/haploid mode
if($paramdata{'GENOTYPE'} == 1)
  {
    print OUTPARAMFILE ":33:1\n";
    print OUTPARAMFILE ":37:1 0.01\n";
    print OUTPARAMFILE ":40:1\n";
    
  }
elsif($paramdata{'GENOTYPE'} == 0)
  {
    print OUTPARAMFILE ":33:0\n";
    print OUTPARAMFILE ":37:0\n";
    print OUTPARAMFILE ":40:0\n";

  }

#Setting the following parameters:
#SIMULATE 
print OUTPARAMFILE ":1:0\n";
#SEPARATE_FILES
print OUTPARAMFILE ":41:1\n";
#TRIM_LEND , TRIM_REND
print OUTPARAMFILE ":3: 1\n";          
print OUTPARAMFILE ":4: 1000000000\n"; 
#PHASED
print OUTPARAMFILE ":110:1\n";    
#NUM_SIMULATIONS
print OUTPARAMFILE ":101:1\n";         
#THETA1S, THETA2S
print OUTPARAMFILE ":12: 10 0.00005 0.0001 0.0005 0.001 0.005 0.001 0.05 0.1 0.5 1\n";
print OUTPARAMFILE ":13: 10 0.00005 0.0001 0.0005 0.001 0.005 0.01 0.05 0.1 0.5 1\n";
#ORIG_SEED
print OUTPARAMFILE ":100:5466565\n";    
#MISSING_PROB
print OUTPARAMFILE ":109:0\n";
   
close OUTPARAMFILE;


$inadmixgenofile = $paramdata{'ADMIXGENOFILE'};
$inadmixsnpfile = $paramdata{'ADMIXSNPFILE'};
#$inadmixindfile = $paramdata{'ADMIXINDFILE'};
#$inparent1genofile = $paramdata{'REFPOP1GENOFILE'};
$inparent1snpfile = $paramdata{'REFPOP1SNPFILE'};
#$inparent2genofile = $paramdata{'REFPOP2GENOFILE'};
$inparent2snpfile = $paramdata{'REFPOP2SNPFILE'};
$parent1label = $paramdata{'REF1LABEL'};
$parent2label = $paramdata{'REF2LABEL'};
$admixpop = $paramdata{'ADMIXPOP'};


#This will check the parental marker files to make sure they are exactly the same else exit with error message

open(REFPOP1MFILE,$inparent1snpfile) || die("Unable to open file $inparent1snpfile");
open(REFPOP2MFILE,$inparent2snpfile) || die("Unable to open file $inparent2snpfile");

@row= <REFPOP1MFILE>;                              
for($i = 0; $i < scalar @row; $i++)                 
  {                                               
    chomp($row[$i]);                                
    @data = split(' ',$row[$i]);                  
    $phys1{$data[3]} = $data[3];         
                                         
  }                                 
 
$filelen1 = scalar @row; 
                                                                                                               
@row= <REFPOP2MFILE>; 
$filelen2 = scalar @row;                                                              
if($filelen1 != $filelen2)                                                            
  { 
    print "ERROR: The parental marker files $inparent1snpfile and $inparent2snpfile files are not of the same length\n";    
    exit;    
  } 
 
for($i = 0; $i < scalar @row; $i++)           
  { 
    chomp($row[$i]); 
    @data = split(' ',$row[$i]); 
    if(!exists($phys1{$data[3]}))                                                                                                 
      {          
        print "ERROR: The parental files $inparent1snpfile and $inparent2snpfile files do not have the same set of markers\n";    
        exit;                                                
      }   
  } 
                                                      
close REFPOP1MFILE; 
close REFPOP2MFILE; 

#Check the admixed marker file is a subset of the parental marker file
open(ADMIXMFILE,$inadmixsnpfile) || die("Unable to open file $inadmixsnpfile");
@admixrow = <ADMIXMFILE>;
for($i = 0; $i < scalar @admixrow; $i++)
  {
    chomp($admixrow[$i]);
    @data = split(' ',$admixrow[$i]);
    if(!exists($phys1{$data[3]}))
      {
	print "ERROR: The admixed marker file $inadmixsnpfile is not a subset of the parental marker file\n";     
	exit;     
      }
  }  


#Filter the markers which look flipped
#print "perl filterSNPs.pl $inparent1snpfile $inparent2genofile $inparent1genofile $inadmixsnpfile $inadmixgenofile $inadmixindfile $paramdata{'CHR'}\n";
#`perl bin/filterSNPs.pl $inparent1snpfile $inparent2genofile $inparent1genofile $inadmixsnpfile $inadmixgenofile $inadmixindfile $paramdata{'CHR'}`;
#if(($?>>8) != 0) 
#  { 
#    print "Error in filtering the markers, check the frequency files\n"; 
#    exit; 
#  } 

#Convert the admixed files into Hapmix format
#print "perl convert_phasedeghapmix_chrsamp.pl $inadmixgenofile $inadmixsnpfile $paramdata{'CHR'} $admixpop $paramdata{'GENOTYPE'}\n";
#`perl bin/convert_phasedeghapmix_chrsamp.pl $inadmixgenofile $inadmixsnpfile $paramdata{'CHR'} $admixpop $paramdata{'GENOTYPE'}`;
if(($?>>8) != 0)
  {
    print "Error in creating Hapmix format admixed files\n";
    exit;
  }

$admixsnpfile = "$admixpop.SNPOUT.$paramdata{'CHR'}";


open(GENOFILE,$inadmixgenofile) ||die("File $inadmixgenofile not found");
$row = <GENOFILE>;
chomp($row);
@data = split('',$row);
$nadmix = scalar @data;

#print "perl convert_phasedeghapmix_chr.pl $inparent1genofile $inparent1snpfile $parent1label $paramdata{'CHR'} $admixpop\n";
#`perl bin/convert_phasedeghapmix_chr.pl $inparent1genofile $inparent1snpfile $parent1label $paramdata{'CHR'} $admixpop`;
if(($?>>8) != 0)
  {
    print "Error in creating Hapmix format reference P1 files\n";
    exit;
  }

$ref1file = $admixpop.".".$parent1label."GENOOUT.$paramdata{'CHR'}";
#print "Parent1 File length $out\n";

#print "perl convert_phasedeghapmix_chr.pl $inparent2genofile $inparent2snpfile $parent2label $paramdata{'CHR'} $admixpop\n";
#`perl bin/convert_phasedeghapmix_chr.pl $inparent2genofile $inparent2snpfile $parent2label $paramdata{'CHR'} $admixpop`;
if(($?>>8) != 0)
  {
    print "Error in creating Hapmix format reference P2 files\n";
    exit;
  }
$ref2file = $admixpop.".".$parent2label."GENOOUT.$paramdata{'CHR'}"; 

#print "Finished creating genotype files to run Hapmix\n";

#print "Starting Hapmix Runs\n";

$runstyle = $paramdata{'HAPMIX_MODE'};
#print $runstyle,"\n";
if($runstyle eq 'LOCAL_ANC')
  {
    $runstyle = 'LOCALANC';
  }
elsif($runstyle eq 'SAMPLE_RANDOM_PATHS')
  {
    $runstyle = 'SAMPRANPATH';
  }


$chr = $paramdata{'CHR'};
$outdir = $paramdata{'OUTDIR'};
#$ratesfile =$paramdata{'RATESFILE'};
#print $ref1file,"\t",$ref2file,"\t",$admixsnpfile,"\n";

################
################
################
################
print "Finished running Hapmix for all sample, processing output files\n";
#Handle the output

$likefile = "$outdir/loglhood.$admixpop.$runstyle.$chr";

for($n=0; $n<$nadmix; $n++) 
  { 
    `grep Likelihood $outdir/run.$admixpop.$runstyle.$n.$chr >> $likefile`;
    if($paramdata{'KEEPINTFILES'} == 0)
      {
	`rm $outdir/run.$admixpop.$runstyle.$n.$chr`;
      }
} 

if($paramdata{'HAPMIX_MODE'} eq 'LOCAL_ANC')
  {
    $details = $paramdata{'OUTPUT_DETAILS'};
    if($details eq 'PROB')
      {
	print "Look at the output files in $paramdata{'OUTDIR'}\n";
      }
    elsif(($details eq 'ANC_INT_THRESH') || ($details eq 'ANC_EXPECTED'))
      {
	`perl bin/extract_output.pl $paramdata{'OUTDIR'} $nadmix $paramdata{'CHR'} $details $paramdata{'THRESHOLD'} $admixpop`;
	if(($?>>8) == 0)
	  {
	    print "Look at the output files in $paramdata{'OUTDIR'}\n";
	    if($paramdata{'KEEPINTFILES'} == 0)
	      {
		for($n=0;$n < $nadmix;$n++)
		  {
		    `rm $outdir/$admixpop.$runstyle.$n.$chr`;
		  }
	      }
	  }
	else
	  {
	    print "Error in creating output files\n";
	    exit;
	  }
      }
    elsif($details eq 'ANC_INT_SAMPLE')
      {
	`perl bin/extract_output5.pl $paramdata{'OUTDIR'} $nadmix $paramdata{'CHR'} $admixpop`;
	if(($?>>8) == 0)
	  {
	    print "Look at the output files in $paramdata{'OUTDIR'}\n";
	    if($paramdata{'KEEPINTFILES'} == 0)
	      {
		for($n=0;$n < $nadmix;$n++)
		  {
		    `rm $outdir/$admixpop.$runstyle.$n.$chr`;
		  }
	      }
	  }
	else
	  {
	    print "Error in creating output files\n";
	    exit;
	  }
      }
    else
      {
	print "Please check your OUTPUT_DETAILS parameter value\n";
	exit;
      }
  }
elsif($paramdata{'HAPMIX_MODE'} eq 'SAMPLE_RANDOM_PATHS')
  {
    $details = $paramdata{'OUTPUT_DETAILS'};
    print $details,"\n";
    if(($details eq 'ANC_INT_RANDOMPATH') || ($details eq 'HAPLOID_FILES'))
      {
	`perl bin/extract_output2.pl $paramdata{'OUTDIR'} $nadmix $paramdata{'CHR'} $details $admixpop`;
	if(($?>>8) == 0)
	  {
	    print "Look at the output files in $paramdata{'OUTDIR'}\n";
	    if($paramdata{'KEEPINTFILES'} == 0)
	      {
		for($n=0;$n<$nadmix;$n++)
		  {
		    `rm $outdir/$admixpop.$runstyle.$n.$chr`;
		  }
	      }
	  }
	else
	  {
	    print "Error in creating output files\n";
	    exit;
	  }
      }
    else
      {
	print "Please check your OUTPUT_DETAILS parameter value\n";
	exit;
      }
  }
elsif($paramdata{'HAPMIX_MODE'} eq 'DIPLOID')                   
  {                                                                                         
    $details = $paramdata{'OUTPUT_DETAILS'};                            
    if($details eq 'PROB')
      {
	print "Look at the output files in $paramdata{'OUTDIR'}\n";
      }
    elsif($details eq 'HAPLOID_FILES')
      {
	`perl bin/extract_output3.pl $paramdata{'OUTDIR'} $nadmix $paramdata{'CHR'} $paramdata{'THRESHOLD'} $admixpop`;
	if(($?>>8) == 0)
	  {
	    print "Look at the output files in $paramdata{'OUTDIR'}\n";
	    if($paramdata{'KEEPINTFILES'} == 0)
	      {
		for($n=0;$n<$nadmix;$n++)
		  {
		    `rm $outdir/$admixpop.$runstyle.$n.$chr`;
		  }
	      }
	  }
	else
	  {
	    print "Error in creating output files\n";
	    exit;
	  }
      }
    else
      {
	print "Please check your OUTPUT_DETAILS parameter value\n";
	exit;
      }
  }
elsif($paramdata{'HAPMIX_MODE'} eq 'HAPLOID')                   
  {
    $details = $paramdata{'OUTPUT_DETAILS'};
    if($details eq 'PROB')
      {
	print "Look at the output files in $paramdata{'OUTDIR'}\n";
      }
    elsif(($details eq 'ANC_PROB') || ($details eq 'ANC_INT_THRESH') || ($details eq 'HAPLOID_FILES') || ($details eq 'ANC_INT_SAMPLE'))
      {
	print $paramdata{'HAPMIX_MODE'},"\t",$details,"\n";
	`perl bin/extract_outputhap.pl $paramdata{'OUTDIR'} $nadmix $paramdata{'CHR'} $details $paramdata{'THRESHOLD'} $admixpop`;
	if(($?>>8) == 0)
	  {
	    print "Look at the output files in $paramdata{'OUTDIR'}\n";
	    if($paramdata{'KEEPINTFILES'} == 0)
	      {
		for($n=0;$n<$nadmix;$n++)
		  {
		    `rm $outdir/$admixpop.$runstyle.$n.$chr`;
		  }
	      }
	  }
	else
	  {
	    print "Error in creating output files\n";
	    exit;
	  }
      }
    else
      {
	print "Please check your OUTPUT_DETAILS parameter value\n";
	exit;
      }
  }

#Clean Up all intermediate files
`rm $outparamfile`;
`rm $admixsnpfile`;
`rm $ref1file`;
`rm $ref2file`;
for($i=0;$i<$nadmix;$i++)
  {
    
    $admixfile ="$admixpop.$i.$paramdata{'CHR'}";
    `rm $admixfile`;
    
  }
