# reformat.hapmix.output 
#
# extract the number of minor alleles, number of pop1 allele and pop1 minor alleles info from HapMix probability file for downstream association analysis

args <- commandArgs(T)

hapmix.outdir=args[1]
chr=args[2]
N=as.numeric(args[3])
outdir=args[4]
admixpop=args[5]
pop1=args[6]
hapmix_mode="DIPLOID"


"%&%" <- function(a, b){paste(a, b, sep="")}

## get the number of markers
prob <- read.table(hapmix.outdir %&% "/RUN/" %&% admixpop %&% "." %&% hapmix_mode %&% ".0." %&% chr, header = F)
m <- nrow(prob)

## individuals
ind <- read.table(hapmix.outdir %&% "/ind." %&% chr, header = F)
iid <- as.character(ind$V1)

## markers
snps <- read.table(hapmix.outdir %&% "/snpfile." %&% chr, header = F)
rsID <- as.character(snps$V1)

mat.minor     <- matrix(NA, nrow=N, ncol=m, dimnames=list(iid, rsID))
mat.pop1.minor <- matrix(NA, nrow=N, ncol=m, dimnames=list(iid, rsID))
mat.pop1.major <- matrix(NA, nrow=N, ncol=m, dimnames=list(iid, rsID))
mat.pop1.allele<- matrix(NA, nrow=N, ncol=m, dimnames=list(iid, rsID))

indNum=0
for (j in 0:(N-1)){

	indNum = j + 1
 	prob <- read.table(hapmix.outdir %&% "/RUN/" %&% admixpop %&% "." %&% hapmix_mode %&% "." %&% j %&% "." %&% chr, header = F)
	
	## Normalize prob for each marker to sum up to 1
	prob <- t(apply(prob, 1, function(x){x/sum(x)}))

	## calculate # minor, # pop1 minor, # pop1 alleles
	mat.minor[indNum, ] <- round(2*(prob[,1]+prob[,2]+2*prob[,3]+prob[,4]+prob[,7]+prob[,11]+prob[,12]), 2)
	mat.pop1.minor[indNum, ] <- round(2*(prob[,1]+prob[,2]+prob[,3]+prob[,4]), 2)
	mat.pop1.major[indNum, ] <- round(2*(prob[,2]+prob[,6]+prob[,7]+prob[,8]), 2)
	mat.pop1.allele[indNum, ]<- mat.pop1.minor[indNum, ] + mat.pop1.major[indNum, ]
  	
	print("individual " %&% indNum)  

} ## end of j loop

# output
write.table(mat.minor, file= outdir %&% "/" %&% admixpop %&% ".geno." %&% chr,
      quote=FALSE, sep=" ", row.names=TRUE, col.names=TRUE)
write.table(mat.pop1.minor, file= outdir %&% "/" %&% pop1 %&%".minor." %&% chr,
      quote=FALSE, sep=" ", row.names=TRUE, col.names=TRUE)
write.table(mat.pop1.allele, file= outdir %&% "/" %&% pop1 %&%".allele." %&% chr,
      quote=FALSE, sep=" ", row.names=TRUE, col.names=TRUE)

print("Please find the output in " %&% outdir)



