#!/usr/bin/perl -w 

use Tie::File;

$snpinfile = $ARGV[0]; 
$snpoutfile = $ARGV[1]; 
$ancinfile = $ARGV[2];
$ancoutfile = $ARGV[3];

if(($#ARGV+1) != 4)
  {
    print ("WRONG USAGE: Usage interpolateSNPs.pl SNPINFILE SNPOUTFILE ANCINFILE ANCOUTFILE\n");
    exit;
  }


open(SNPINFILE ,$snpinfile) || die("COF");
open(SNPOUTFILE ,$snpoutfile) || die("COF");
open(ANCINFILE ,$ancinfile) || die("COF");
open(ANCOUTFILE ,">$ancoutfile") || die("COF");


#Read in the SNP files to get the physical positions


@row = <SNPINFILE>;
$snpfilelen = scalar @row;
for($i=0;$i< scalar @row; $i++)
  {
    chomp($row[$i]);
    @data =split(' ',$row[$i]);
#    $snpin[$i] = $data[0];
    $pposhash{$data[3]} = $i;
    $pposin[$i] = $data[3];
  }



tie @ancinpfile,'Tie::File',\*ANCINFILE,autochomp => 0;
$ancinfilelen = scalar @ancinpfile;
if($ancinfilelen != $snpfilelen)
  {
    print "ERROR: The length of the SNP and Ancestry Input Files does not match please check\n";
    exit;
  }

@row = <SNPOUTFILE>;
for($i=0;$i< scalar @row; $i++)
  {
    chomp($row[$i]);
    @data =split(' ',$row[$i]);
 #   $snpout[$i] = $data[0];
    $pposout[$i] = $data[3];
    if(defined $pposhash{$data[3]})
      {
	$insnp[$i] = $pposhash{$data[3]};
      }
    else
      {
	$insnp[$i] = binsearch($pposout[$i],\@pposin);
      }
  #  print $i,"\t",$pposout[$i],"\t",$insnp[$i],"\t",$pposin[$insnp[$i]],"\n";
    print ANCOUTFILE $ancinpfile[$insnp[$i]];
  }


close SNPINFILE;
close SNPOUTFILE;
close ANCINFILE;
close ANCOUTFILE;

# -------------------------------------------------------------------
#  Binary search for nearest SNP by physical position
# -------------------------------------------------------------------
sub binsearch  {
  my $x = shift;            # physical position to search for
  my $aref = shift;         # array of physical positions to search

  my $max = @{$aref}-1;
  my $min = 0;

  while ( $max - $min > 1 )  {
    my $mid = int(($max+$min)/2);
    if ( $aref->[$mid] > $x )  {
      $max = $mid;
    }
    elsif ( $aref->[$mid] < $x )  {
      $min = $mid;
    }
    else  {
      return $mid;
    }
  }

#  print "in binsearch ",$max,"\t",$min,"\t",$x,"\n";

  if ( abs($aref->[$max] - $x) < abs($aref->[$min] - $x) )  {
    return $max;
  }
  else {
    return $min;
  }

}
