####################################################################################################################
##                                                                                                       	  ##
##                   R Code for Haplotype-Based HWE Tests                                                 	  ##
##                                                                                                       	  ##
####################################################################################################################
##                                                                                                       	  ##
##                                                                                                       	  ##
####################################################################################################################
## > source("HAP-HWE.txt")                                                                                	  ## 
## > HAPHWE("ped.txt",header=T,missing=0,blockfile="block.txt",filename="results.txt")                            ##
####################################################################################################################
## Input variables:												  ##
##														  ##
## ped: The name of a standard linkage pedigree file or a matrix/dataframe containing pedigree relationship,      ##
##      genotype, and phenotype information, one row for each individual. The first 5 columns give the            ##
##      pedigree id, individual id, father id (0 if founder), mother id (0 if founder), and sex (1=male,          ## 
##      2=female), in that order. Note that these fields need to be numeric. The sixth column gives the           ##
##      affection status (1=unaffected; 2=affected; 0=unknown). The remaining columns code for marker genotypes   ##
##     (2 column per SNP), with the two alleles for each marker represented by two consecutive numbers            ##
##     (0=missing). The HAP-HWE software will automatically exclude the nonfounders from the analysis.            ## 
##														  ##
## header: If the data file has header, then it should be taken as T (or TRUE), otherwise F (or FALSE).           ##
##         The default value is T.		                                                                  ##
##														  ##
## missing: The missing value for alleles.The default value is 0. It could take any numeric value, except for     ##
##          1, 2, NA and any other string value. If there is any missing values at some loci for an individual,   ##
## 	    then the individual will be excluded from the analysis.		    		 	          ##
##														  ##
## blockfile: haplotype block file with each row representing a haplotype block, the first column representing    ##
##            the ID of haplotype block and the columns after the first column representing the marker sequential ##
##            numbers included in the haplotype block. If the "blockfile" is assigned, then the HAP-HWE software  ##
##            analyzes the haplotype blocks one by one. Otherwise, the HAP-HWE software will simutaneously        ##
##            analyze all the markers in the input file ("ped.txt") as a whole haplotype block. The default value ##
##            of the "blockfile" is NULL.                                                                         ##                     
##                                                                                                                ##
## filename: If the "filename" is assigned (should be text file, and the file name should be embraced by double   ##
##	     quotation marks, then the output results will be saved in the text file in the current directory.	  ##
##           Otherwise, the ouput results will be automatically saved in the "results.txt" file.                  ## 
##														  ##
## start.ratio.ab: the initial value of the parameter K=a/b under the Niu's Model for iterations.	          ##
## 														  ##
## start.rho: the initial value of rho under the Inbreeding Model for iterations, which should be taken to be     ##
##            larger than 0.                                                                            	  ##
## 														  ##
## iter: the maximum number of iterations. The default value is 1000.						  ##
##														  ##
## dv: convergence criterion. The default value is 1e-8.							  ##
##														  ##
##														  ##
####################################################################################################################

HAPHWE <- function(ped, header=T, missing=0, blockfile=NULL, filename="results.txt", start.ratio.ab = 1, start.rho = 0.01, iter = 1000, dv = 1e-8){

        if (is.null(blockfile)) {

            blockname <- "block1"
    
            results <- HAPHWE0(ped, header, missing, blockfile, filename, start.ratio.ab, start.rho, iter, dv, blockname)

	        if (missing(filename)!=T){
		
		    sink(filename)
		
		    print(results)
		
		    sink()

	         }

             print(results)

        }

        else if (!is.null(blockfile)) {

               results <- NULL

               ped1 <- read.table(ped, header=header) 

               n <- count.fields(blockfile)

               b <- read.table(blockfile,fill=T,col.name=1:max(n)) 

               b.dim <- dim(b)[1]

               for (i in 1:b.dim){ 

                   snp <- b[i,2:max(n)][!is.na(b[i,2:max(n)])] 

                   snppos <- 2*(rep(snp,each=2)-1)+7:8

                   ped2 <- ped1[,c(1:6,snppos)]

                   blockname <- paste("block",i,sep = "")

                   results0 <- HAPHWE0(ped2, header, missing, blockfile, filename, start.ratio.ab, start.rho, iter, dv, blockname)

                   print(results0)

                   results <- c(results, results0)

                }

	        if (missing(filename)!=T){
		
		    sink(filename)
		
		    print(results)
		
		    sink()

	         }

         }


    }

HAPHWE0 <- function(ped, header, missing, blockfile, filename, start.ratio.ab, start.rho, iter, dv, blockname){

        if (is.character(ped)) {

            pedfile <- ped

            ped <- read.table(pedfile,header=header,as.is=T)
			
			ped_org <- ped
			
			ped <- ped[which(ped[,3]==0),c(1,7:ncol(ped))]

            n.loci <- (dim(ped)[2] - 1) / 2
			
			marker_name <- names(ped)[5+2*(1:n.loci)]

        }

        else if(is.data.frame(ped)) {

			ped_org <- ped
			
			ped <- ped[which(ped[,3]==0),c(1,7:ncol(ped))]
			
            n.loci <- (dim(ped)[2] - 1) / 2

			marker_name <- names(ped)[5+2*(1:n.loci)]
			
        }

        else if(is.matrix(ped)) {
			
			ped_org <- ped
			
			ped <- ped[which(ped[,3]==0),c(1,7:ncol(ped))]
			
            n.loci <- (dim(ped)[2] - 1) / 2
			
			marker_name <- NULL
			
			if(!is.null(dimnames(ped)))

			marker_name <- dimnames(ped)[[2]][5+2*(1:n.loci)]
			
        }

		if(is.null(marker_name)){
		
			marker_name <- paste("Marker",rep(1:n.loci,each=2),sep="")
			
			marker_name <- paste(marker_name,c("_1","_2"),sep="")
			
			colnames(ped) <- c("ID",marker_name)
		}
	
	ped <- as.matrix(ped)
		
	ped.bak <- ped
	
	peddata <- ped

	for(chk in 2:ncol(peddata)){

		if(missing == 0){
			
			peddata[which(peddata[,chk]==missing),chk] <- 9
			
			peddata[which(peddata[,chk]==2),chk] <- 0
			
		}
		
		else peddata[which(peddata[,chk]==2),chk] <- 0

	}

	for(index_m in 1:n.loci){
	
		if(missing == 0){
		
			ped.anl <- peddata[which(peddata[,2+2*(index_m-1)]!=9),]
	
			peddata <- ped.anl
		
		}
		
		else{
		
			ped.anl <- peddata[which(peddata[,2+2*(index_m-1)]!=missing),]
	
			peddata <- ped.anl
			
		}
		
	}

	ped <- peddata
	
	n.ind <- dim(ped)[1]
	
	stat.ped <- count.geno( ped,bloci=2 )

	ped.c <- stat.ped[[1]]

	count <- ped.c[,1:2]

	corr.id <- stat.ped[[2]]

	if( n.ind != sum(count[,2]) ) stop("Sample size is wrong!") 
	
	n.ind1 <- nrow(ped.c)
	
	n.col <- ncol(ped.c)
	
	ped1 <- ped.c[,c(1,3:n.col)]

	getHAP <- get.HAP(ped1,outcomb=T)

	HAP1 <- getHAP[[1]]
	
	all.comb1 <- getHAP[[2]]

	n.HAP <- length(HAP1)

	start.freq <- 1 / n.HAP

	HAP.freq.start <- cbind(HAP1, start.freq)
	
	h1.re <- HAP.freq(ped, ped1, count, corr.id, HAP1, all.comb1, start.ratio.ab=start.ratio.ab ,start.freq=NULL, bloci=2, iter=iter, dv=dv,if.HWE=F,HAPcomb.out=T )
	
	h0.re <- HAP.freq(ped, ped1, count, corr.id, HAP1, all.comb1, start.ratio.ab=1 ,start.freq=NULL, bloci=2, iter=iter, dv=dv,if.HWE=T,HAPcomb.out=T )

	LRT.NM <- LH.test(h0.re[[4]],h1.re[[4]])

	EM.h1.re <- HAP.freq.EM(HAP.freq.start, all.comb1, count, start.rho=start.rho ,start.freq=NULL, iter=iter, dv=dv,if.HWE=F )
	
	EM.h0.re <- HAP.freq.EM(HAP.freq.start, all.comb1, count, start.rho=0 ,start.freq=NULL, iter=iter, dv=dv,if.HWE=T )

	LRT.IM <- LH.test(EM.h0.re[[4]],EM.h1.re[[4]])

	
	BH.h1 <- DH2BH( h1.re[[3]][,1],n.loci )
	
	BH.h1[which(BH.h1==0)] <- 2
	
	CBH.h1 <- apply(BH.h1,1,paste,collapse="")
	
	R.h1 <- data.frame(Haplotype=CBH.h1,Frequency=h1.re[[3]][,2])

	BH.EM.h1 <- DH2BH( EM.h1.re[[3]][,1],n.loci )
	
	BH.EM.h1[which(BH.EM.h1==0)] <- 2

	CBH.EM.h1 <- apply(BH.EM.h1,1,paste,collapse="")
	
	R.EM.h1 <- data.frame(Haplotype=CBH.EM.h1,Frequency=EM.h1.re[[3]][,2])

	BH.EM.h0 <- DH2BH( EM.h0.re[[3]][,1],n.loci )
	
	BH.EM.h0[which(BH.EM.h0==0)] <- 2

	CBH.EM.h0 <- apply(BH.EM.h0,1,paste,collapse="")
	
	R.EM.h0 <- data.frame(Haplotype=CBH.EM.h0,Frequency=EM.h0.re[[3]][,2])
	
	results <- list("block.name"=blockname,"Convergence process of likelihood function under EM algorithm" = EM.h0.re[[1]],
					"Haplotype frequency estimates under EM algorithm" = R.EM.h0,
					"Convergence process of likelihood function under ECM algorithm" = h1.re[[1]],
					"Haplotype frequency estimates under ECM algorithm"=R.h1,
					K=h1.re[[2]],
					"LRT statistic based on Niu's Model"=LRT.NM[[1]],
					"P value of LRT based on Niu's Model"=LRT.NM[[2]],
					"Convergence process of likelihood function under IEM algorithm" = EM.h1.re[[1]],
					"Haplotype frequency estimates under IEM algorithm" = R.EM.h1,
					rho=EM.h1.re[[2]],
					"LRT statistic based on Inbreeding Model"=LRT.IM[[1]],
					"P value of LRT based on Inbreeding Model"=LRT.IM[[2]])
	
	return(results)
    
    }


DH2BH <- function( D.Haplotype, nloci){

	if(!is.matrix(D.Haplotype)) D.Haplotype <- as.matrix(D.Haplotype)

	B.HAP <- apply(D.Haplotype,1,decBin,n=nloci)
	
        B.Haplotype <- t(B.HAP)
	
	return(B.Haplotype)
      
        }

    binDec<-function (vect) 
   
    {
    
    k = 0
    
    for (i in 1:length(vect)) {
        
        k = k + vect[i] * 2^(i - 1)
   
    }
    
    return(k)

}


decBin<-function (nb, n) 
    {
    
    v = nb
    
    if (nb >= 2^n) {
    
    print("arguments are not valid")
    
    }

    else {

        bin = vector("numeric", n)

        i = 1

        while (v != 0) {

            p = v%/%2

            if (2 * p == v) {

                bin[i] = 0

            }

            else {

                bin[i] = 1

            }

            v = p

            i = i + 1

        }

        return(bin)

    }

}


cal.pair<-function( haplotype,prob,ratio.ab=1,if2D=F ){

	if( if2D == 'F' | if2D == 'FALSE') D.HAP <- apply(haplotype,1,binDec)

	else D.HAP <- haplotype

	HAP.prob <- cbind( D.HAP,prob )

	HAP.prob <- HAP.prob[order(HAP.prob[,1]),]

	place <- 1:nrow(HAP.prob)

	cpl <- cbind(place,HAP.prob)

	result <- apply(cpl,1,cal.comb,HAP.freq=HAP.prob,ratio.ab=ratio.ab)

	p.comb <- do.call("rbind",result)

	return(p.comb)

}


cal.comb <- function(data,HAP.freq,ratio.ab=1){

	PL <- data[1]

	x <- data[2]

	n.row <- nrow(HAP.freq)

	ch <- PL:n.row

	result <- cbind(x,HAP.freq[ch,1],0,HAP.freq[PL,2],HAP.freq[ch,2])
	
	if(PL!=n.row){

		ch2 <- 2:nrow(result)

		result[1,3] <- ratio.ab * result[1,4]^2

		result[ch2,3] <- 2 * result[ch2,4] * result[ch2,5]

	}

	else result[1,3] <- ratio.ab * result[1,4]^2 

	colnames(result) <- NULL

	rownames(result) <- NULL

	return(result)
	
}


EB <- function( HAP.freq, all.HAP, ratio.ab ){
	
	if(is.vector(all.HAP)) all.HAP <- matrix(all.HAP,nrow=1)
	
	HAPmatrix <- all.HAP[,1:3]

	if(is.matrix(HAPmatrix)) HAPmatrix <- HAPmatrix[order(HAPmatrix[,1],HAPmatrix[,2],HAPmatrix[,3]),]

	else HAPmatrix <- matrix(HAPmatrix,nrow=1)
	
	all.PL <- 1:nrow(HAPmatrix)

	same.PL <- which(HAPmatrix[,2]==HAPmatrix[,3])

	diff.PL <- all.PL[-same.PL]
	
	Sec.PL <- match(HAPmatrix[,2],HAP.freq[,1])

	Thr.PL <- match(HAPmatrix[,3],HAP.freq[,1])

	Prob.comb <- HAP.freq[Sec.PL,2] * HAP.freq[Thr.PL,2]
	
	HAPmatrix <- cbind( HAPmatrix, Prob.comb )

	HAPmatrix[same.PL,4] <- ratio.ab * HAPmatrix[same.PL,4]

	HAPmatrix[diff.PL,4] <- 2 * HAPmatrix[diff.PL,4]
	
	return(HAPmatrix)
	
}


Bysum <- function(HAP.freq, geno.comb, same.pl, diff.pl, rho=0){
		
	geno.comb2 <- cbind(geno.comb[,1],0) 

	geno.comb2[same.pl,2] <- rho * HAP.freq[same.pl,1] + (1-rho) * HAP.freq[same.pl,1]^2

	geno.comb2[diff.pl,2] <- 2*(1-rho) * HAP.freq[diff.pl,1] * HAP.freq[diff.pl,2]

	Pr <- aggregate(geno.comb2,by=list(geno.comb2[,1]),sum) 
	
	Pr <- as.matrix(Pr)

	colnames(Pr) <- NULL

	Pr2 <- Pr[,c(1,3)]

	return(Pr2)

}


get.HAP <- function(ped,bloci=2,outcomb=F){
	
	nind <- nrow(ped)

	n.col <- ncol(ped)

	ped <- ped[,c(1,bloci:n.col)]
	
	all.HAP <- apply(ped,1,Gen.HAP)

	if(is.list(all.HAP)) comb.HAP<- do.call("rbind",all.HAP )

	else comb.HAP <- matrix(all.HAP,nrow=nind,byrow=T)
	
	if(outcomb == 'F' | outcomb == 'FALSE' ) {

		HAP <- sort( unique( c(comb.HAP[,2],comb.HAP[,3]) ) )

		return(HAP)

	}
	else{

		HAP <- sort( unique( c(comb.HAP[,2],comb.HAP[,3]) ) )

		result <- list( "Haplotype"=HAP, "individual combined Haplotype"=comb.HAP)

		return( result )

	}

}


Gen.HAP <- function(ind.geno){
	
	len <- length(ind.geno)

	genematrix <- matrix(ind.geno[2:len],ncol=2,byrow=T) 
	
	diff.PL <- which( genematrix[,1] != genematrix[,2] )

	same.PL <- which( genematrix[,1] == genematrix[,2] )

	ndiff <- length(diff.PL)

	same <- 2 + 2*( same.PL-1 )
	
		
		diff.p1 <- 2+2*( diff.PL-1 )

		diff.p2 <- 3+2*( diff.PL-1 )

		k <- 1

		HAP <- numeric(0)
				
		if( ndiff > 2 ){

				judge <- numeric(0)

				loop <- ndiff %/% 2
				
				for(i in 1:loop){

					ncom <- combn(1:ndiff,i)

					num.comb <- ncol(ncom)
					
					for(j in 1:num.comb){

						PL1 <- diff.p1

						PL2 <- diff.p2

						PL1[ ncom[,j] ] <- diff.p2[ ncom[,j] ]

						PL2[ ncom[,j] ] <- diff.p1[ ncom[,j] ]

						HAP1 <- binDec(ind.geno[sort(c(PL1,same))])

						HAP2 <- binDec(ind.geno[sort(c(PL2,same))])

						if( !any(judge == HAP1) ) {

							cHAP <- c(sort(c(HAP1,HAP2)),k)

							HAP <- rbind(HAP,cHAP)

							judge <- c(judge,HAP1,HAP2)

							k <- k + 1

						}

					}			

				}

			
			HAP1 <- binDec(ind.geno[sort(c(diff.p1,same))])

			HAP2 <- binDec(ind.geno[sort(c(diff.p2,same))])

			cHAP <- c(sort(c(HAP1,HAP2)),k)

			HAP <- rbind(HAP,cHAP)		

		}

		else{

			all.loci1 <- sort(c(diff.p1,same))

			all.loci2 <- sort(c(diff.p2,same))

			HAP1 <- binDec(ind.geno[all.loci1])

			HAP2 <- binDec(ind.geno[all.loci2])

			HAP <- matrix(c(sort(c(HAP1,HAP2)),1),nrow=1,byrow=T)

			if( ndiff ==2 ) {

				PL <- diff.p1[1]

				diff.p1[1] <- diff.p2[1]

				diff.p2[1] <- PL

				HAP1 <- binDec(ind.geno[sort(c(diff.p1,same))])

				HAP2 <- binDec(ind.geno[sort(c(diff.p2,same))])

				cHAP <- c(sort(c(HAP1,HAP2)),2)

				HAP <- rbind(HAP,cHAP)

			}		

		}

	HAP <- cbind(ind.geno[1],HAP)

	rownames(HAP) <- NULL	

	return(HAP)
	
}


root3 <- function(a,b,c,d){

	if(a != 0){
	
		fst <- -b/(3*a)
		
		sec <- b*c/(6*a^2) - b^3/(27*a^3) - d/(2*a)
		
		thr <- c/(3*a) - b^2/(9*a^2)
		
		delta <- sec^2 + thr^3
		
		if(delta > 0){
		
			cubic1 <- sec + sqrt(delta)
			
			cubic2 <- sec - sqrt(delta)
			
			if(cubic1 >= 0) rcubic1 <- cubic1^(1/3)
			
			else rcubic1 <- -abs(cubic1)^(1/3)
			
			if(cubic2 >= 0) rcubic2 <- cubic2^(1/3)
			
			else rcubic2 <- -abs(cubic2)^(1/3)
			
			X1 <- fst + rcubic1 + rcubic2
			
			ARe <- fst - 0.5 * rcubic1 - 0.5 * rcubic2
			
			AIm <- (sqrt(3)/2) * rcubic1 - (sqrt(3)/2) * rcubic2 

			X2 <- complex(re=ARe,im=AIm)
			
			X3 <- complex(re=ARe,im=-AIm)
		}
		else if(delta == 0){
		
			cubic1 <- sec + sqrt(delta)
			
			cubic2 <- sec - sqrt(delta)
			
			if(cubic1 >= 0) rcubic1 <- cubic1^(1/3)
			
			else rcubic1 <- -abs(cubic1)^(1/3)
			
			if(cubic2 >= 0) rcubic2 <- cubic2^(1/3)
			
			else rcubic2 <- -abs(cubic2)^(1/3)

			X1 <- fst + rcubic1 + rcubic2
			
			X2 <- fst - 0.5 * rcubic1 - 0.5 * rcubic2
			
			X3 <- fst - 0.5 * rcubic1 - 0.5 * rcubic2
		
		}
		else{
		
			fcos <- acos(sec/sqrt((-thr)^3))
			
			X1 <- fst + 2 * sqrt(-thr) * cos(fcos/3)
			
			X2 <- fst + 2 * sqrt(-thr) * cos((fcos + 2*pi)/3)
			
			X3 <- fst + 2 * sqrt(-thr) * cos((fcos - 2*pi)/3)
			
		}
		
		result<-data.frame(x1=X1,x2=X2,x3=X3)
		
		return(result)
	
	}
	else{

		stop("It is not a univariate cubic equation")

	}
	
}


sum.pair<-function( haplotype,prob,if2D=F ){

	if( if2D == 'F' | if2D == 'FALSE') D.HAP <- apply(haplotype,1,binDec)

	else D.HAP <- haplotype

	HAP.prob <- cbind( D.HAP,prob )

	HAP.prob <- HAP.prob[order(HAP.prob[,1]),]

	place <- 1:nrow(HAP.prob)

	cpl <- cbind(place,HAP.prob)

	result <- apply(cpl,1,cal.sum,HAP.freq=HAP.prob)

	p.comb <- t(result)

	rownames(p.comb) <- HAP.prob[,1]
	
	return(p.comb)

}


cal.sum <- function(data,HAP.freq){

	PL <- data[1]

	x <- data[2]

	n.row <- nrow(HAP.freq)

	ch <- PL:n.row

	result <- cbind(x,HAP.freq[ch,1],HAP.freq[PL,2],HAP.freq[ch,2])
	
	diff.sum <- 0	

	same.sum <- 0	
		
	if(PL!=n.row){

		ch2 <- 2:nrow(result)

		same.sum <- result[1,3]^2

		diff.sum <- result[ch2,3] * result[ch2,4]

	}

	else same.sum <- result[1,3]^2 

	output <- c(same.sum,sum(diff.sum))

	return(output)
	
}


Ln <- function(HAP.freq, geno.comb, count.geno, ratio.ab=1){

	geno.comb2 <- geno.comb[order(geno.comb[,1],geno.comb[,4]),]
	
	pair.P <- sum.pair(HAP.freq[,1],HAP.freq[,2],if2D=T)

	sum.T <- sum(ratio.ab * pair.P[,1],2 * pair.P[,2])
	
	first.PL <- match(geno.comb2[,2],HAP.freq[,1])

	second.PL <- match(geno.comb2[,3],HAP.freq[,1])

	HAP.prob <- HAP.freq[first.PL,2] * HAP.freq[second.PL,2]

	same.PL <- which(geno.comb2[,2] == geno.comb2[,3])

	diff.PL <- which(geno.comb2[,2] != geno.comb2[,3])
	
	geno.comb2 <- cbind(geno.comb2,HAP.prob)

	geno.comb2[same.PL,5] <- ratio.ab * geno.comb2[same.PL,5] / sum.T

	geno.comb2[diff.PL,5] <- 2 * geno.comb2[diff.PL,5] / sum.T

	colnames(geno.comb2) <- paste("V",1:5,sep="")
	
	forchoose <- as.data.frame(geno.comb2)
	
	Pr <- aggregate(geno.comb2[,c(1,5)],by=list(geno.comb2[,1]),sum)

	choosephase <- aggregate(geno.comb2[,c(1,5)],by=list(geno.comb2[,1]),max) 

	choosephase2 <- as.data.frame(choosephase[,2:3])

	maxphase <- merge(choosephase2,forchoose,by.x=c("V1","V5"),by.y=c("V1","V5")) 

	maxphase <- maxphase[order(maxphase[,1]),c(1,3,4,2)]

	maxphase <- as.matrix(maxphase)

	rownames(maxphase) <- NULL

	colnames(maxphase) <- c("id","snp1","snp2","prob") 

	LH <- sum(log(Pr[order(Pr[,1]),3])*count.geno[order(count.geno[,1]),2])
	
	result <- list(Likelihood=LH,"combined HAPlotype"=maxphase)

	return(result)

}


HAP.freq <- function(ped, ped1, count, corr.id, HAP, all.comb, start.ratio.ab=1 ,start.freq=NULL, bloci=2, iter=1000, dv=1e-8,if.HWE=F,HAPcomb.out=F ){

	pro_iter <- numeric(0)

	n.ind <- nrow(ped)

	n.HAP <- length(HAP)
	
	if( is.null(start.freq) ) start.freq <- 1 / n.HAP

	else start.freq <- start.freq
	
	n.iter <- 1
	
	if(toupper(if.HWE) == 'F' | toupper(if.HWE) == 'FALSE' ) old.ratio <- start.ratio.ab

	else old.ratio <- 1
	
	old.freq <- start.freq
	
	HAP.freq <- cbind(HAP, old.freq) 

	colnames(HAP.freq) <- c("Haplo","Freq")

	B1 <- 0

	B2 <- 0

	PL.B1 <- which(all.comb[,2]==all.comb[,3])

	len.B1 <- length(PL.B1)

	if(len.B1 != 0) B1 <- sum( count[match(all.comb[PL.B1,1],count[,1]),2] )

############# calculate the probability of all the possible haplotype pairs #############

	pair.P <- sum.pair( HAP.freq[,1],HAP.freq[,2],if2D=T )
 
	pair.rowname <- row.names(pair.P)

############# ECM algorithm #############################################################

	while(n.iter <= iter){

		Cy <<- array( 0,dim=c( n.HAP,2 ) )	

		Cy[,1] <<- HAP.freq[,1]
		
		sum.same <- sum( pair.P[,1] )
	
		sum.diff <- sum( pair.P[,2] )

		sum.all <- sum(old.ratio * pair.P[,1],2 * pair.P[,2])

		all.freq <- EB( HAP.freq,all.comb,ratio.ab=old.ratio )	

		old.geno.freq <- t( apply(ped1,1,Previous.HAPfreq,all.count=count,all.prob=all.freq) )

		old.geno.freq[,2] <- old.geno.freq[,2]/sum.all

		if( if.HWE == 'F' | if.HWE == 'FALSE' ) new.ratio <- ( 2 * B1 * sum.diff ) / ( ( n.ind - B1 ) * sum.same )

		else new.ratio <- 1

		HAP.freq.old <- HAP.freq	

		maxpL <- which(HAP.freq[,2]==max(HAP.freq[,2]))	

		maxpL.HAP <- HAP.freq[maxpL[1],1]

		pair.maxHAP <- which(pair.rowname == maxpL.HAP)
		
		for(r in 1:n.HAP){

			if(r == maxpL[1]) next
			
			cal.HAP <- HAP.freq[r,1]
		
			pair.P2 <- sum.pair(HAP.freq[-c(r,maxpL[1]),1],HAP.freq[-c(r,maxpL[1]),2],if2D=T)

			sum.same3 <- sum(pair.P2[,1])

			sum.diff3 <- sum(pair.P2[,2])

			A1 <- sum(HAP.freq[-c(r,maxpL[1]),2])

			d <- 2 * ( new.ratio - 1 )

			A2 <- d	* ( sum.same3 + sum.diff3 - A1) + new.ratio
	
			cube <- d * ( 2 * n.ind - Cy[r,2] - Cy[maxpL[1],2] )	
		
			quad <- d * ( 1 - A1 ) * ( 2 * Cy[r,2] + Cy[maxpL[1],2] - 3 * n.ind )
		
			first <- d * ( 1 - A1 )^2 * ( n.ind - Cy[r,2] ) - A2 * ( Cy[r,2] + Cy[maxpL[1],2] )
		
			const <- ( 1 - A1 ) * A2 * Cy[r,2]	

			if( cube != 0) {		
				result <- root3( cube,quad,first,const )
			
				if(is.complex( result[1,2] )) result <- as.data.frame(result[1,1])

			}
			else {	
		
				result <- -const / first

				result <- as.data.frame(result)

			}

			result[1,which(abs(result[1,]) <= 1e-8)] <- 0
			
			rp <- which( result[1,] >= 0 & result[1,] < 1 & (result[1,] + A1) <= 1 )

			if( length(rp) == 1 ) HAP.freq[r,2] <- result[1,rp]
		
			else if(length(rp) > 1){

				result2 <- result[1,rp]

				find.value <- abs(result2 - HAP.freq[r,2])

				value.PL <- which(find.value == min(find.value))

				HAP.freq[r,2] <- result2[1,value.PL[1]]

			}

		
		}
		
		sita.max <- 1 - sum( HAP.freq[-maxpL[1],2])

		HAP.freq[maxpL[1],2] <- sita.max
	
		
		Lnb <- sum( log( old.geno.freq[order(old.geno.freq[,1]),2] ) * count[order(count[,1]),2] )	

		Lna <- Ln(HAP.freq, all.comb, count,new.ratio)

		if(n.iter == 1) pro_iter <- c(0, Lnb)

		iteraction <- c(n.iter,Lna[[1]])
		
		pro_iter <- rbind(pro_iter,iteraction)

		if(n.iter == iter | abs( Lna[[1]] - Lnb ) <= dv) break

		else{	
			n.iter <- n.iter + 1

			old.ratio <- new.ratio

			pair.P <- sum.pair(HAP.freq[,1],HAP.freq[,2],if2D=T)
	
		}

	}

############# End iteration ######################################################


	if( toupper(HAPcomb.out) != 'F' | toupper(HAPcomb.out) != 'FALSE' ){
		
		maxcomb <- Lna[[2]]

		all.max <- cbind(corr.id[,1],maxcomb[match(corr.id[,2],maxcomb[,1]),2:3])

		colnames(all.max) <- c("id","HAP1","HAP2")

	}

############# Output the results #################################################

	p.out <- which(HAP.freq[,2]<1e-5)

	if ( length(p.out) != 0) Out.freq <- HAP.freq[-p.out,]

	else Out.freq <- HAP.freq

	colnames(Out.freq) <- c("Haplotype","Frequency")
	rownames(pro_iter) <- NULL
	
	if( toupper(HAPcomb.out) == 'F' | toupper(HAPcomb.out) == 'FALSE' ) 

		HAP.result <- list(iter=pro_iter,ratio.ab=new.ratio,"Haplotype.frequency.estimate" = Out.freq, Likelihood = Lna[[1]])

	else HAP.result <- list(iter=pro_iter,ratio.ab=new.ratio,"Haplotype.frequency.estimate" = Out.freq, Likelihood = Lna[[1]],maxphase.comb=all.max)

	return(HAP.result)
	
}


HAP.freq.EM <- function(HAP.freq, all.comb, count, start.rho=0 ,start.freq=NULL, iter=1000, dv=1e-8,if.HWE=F ){

	pro_iter <- numeric(0)

	nHAP <- nrow(HAP.freq)
	
	all.comb2 <- all.comb[,1:3]

	if(is.matrix(all.comb2)) all.comb2 <- all.comb2[order(all.comb2[,1],all.comb2[,2],all.comb2[,3]),]

	else all.comb2 <- matrix(all.comb2,nrow=1)
	
	all.PL <- 1:nrow(all.comb2)

	same.PL <- which(all.comb2[,2]==all.comb2[,3])

	diff.PL <- all.PL[-same.PL]

	Sec.PL <- match(all.comb2[,2],HAP.freq[,1])

	Thr.PL <- match(all.comb2[,3],HAP.freq[,1])

	Hapfreq <- array(0,dim=c(nrow(all.comb2),2))

	Hapfreq[,1] <- HAP.freq[Sec.PL,2]

	Hapfreq[,2] <- HAP.freq[Thr.PL,2]
	
	same.id <- unique(all.comb2[same.PL,1])

	diff.id <- unique(all.comb2[diff.PL,1])
	
	same.pl2 <- match(same.id,count[,1])

	diff.pl2 <- match(diff.id,count[,1])
		
	n.iter <- 1

	if(if.HWE==F) old.rho <- start.rho

	else old.rho <- 0
	
	count2 <- count[order(count[,1]),]	

	HAP.freq2 <- HAP.freq

	deno.n <- sum(count2[,2])	

	Pr.byid <- Bysum(Hapfreq, all.comb2, same.PL, diff.PL, rho=old.rho)

	Lnb <- sum(log(Pr.byid[,2])*count2[,2])	

	pro_iter <- c(0, Lnb)
	
	while(n.iter <= iter){

		if(if.HWE==F){
	
		new.rho <- sum(old.rho * Hapfreq[same.PL,1] /(old.rho * Hapfreq[same.PL,1] + (1 - old.rho) * Hapfreq[same.PL,1]^2)*count[same.pl2,2])/deno.n

		}

		else new.rho <- 0

		
		for(i in 1:nHAP){
		
			hap.cal <- HAP.freq2[i,1]

			zero.hap <- which(HAP.freq[i,2]<1e-100)

			cal.pl1 <- which(all.comb2[,2]==hap.cal & all.comb2[,3]==hap.cal)

			cal.pl2 <- which((all.comb2[,2]==hap.cal & all.comb2[,3]!=hap.cal) | (all.comb2[,2]!=hap.cal & all.comb2[,3]==hap.cal))			
			
			if(length(cal.pl1)!=0){
				
				cal.id1 <- all.comb2[cal.pl1,1]

				c.pl1 <- match(cal.id1, count[,1])

				c.sum.pl1 <- match(cal.id1,Pr.byid[,1])
				
				E1 <- sum((((old.rho * Hapfreq[cal.pl1,1]+2*(1 - old.rho) * Hapfreq[cal.pl1,1]^2) / Pr.byid[c.sum.pl1,2])*count[c.pl1,2]))
			}
			else E1 <- 0
			
			if(length(cal.pl2)!=0){
			
				cal.id2 <- all.comb2[cal.pl2,1]

				c.pl2 <- match(cal.id2, count[,1])

				c.sum.p2 <- match(cal.id2,Pr.byid[,1])

				E2 <- sum(2*((1-old.rho) * Hapfreq[cal.pl2,1] * Hapfreq[cal.pl2,2] / Pr.byid[c.sum.p2,2]) * count[c.pl2,2])
			}
			else E2 <- 0

			HAP.freq2[i,2] <- E1 + E2
		}

		HAP.freq2[,2] <- HAP.freq2[,2]/sum(HAP.freq2[,2])

		Hapfreq[,1] <- HAP.freq2[Sec.PL,2]

		Hapfreq[,2] <- HAP.freq2[Thr.PL,2]

		Pr.byid2 <- Bysum(Hapfreq, all.comb2, same.PL, diff.PL, rho=new.rho)

		Lna <- sum(log(Pr.byid2[,2])*count2[,2])

		iteraction <- c(n.iter, Lna)
		
		pro_iter <- rbind(pro_iter, iteraction)
		
		if(n.iter == iter | abs( Lna - Lnb ) <= dv) break

		else{	
			n.iter <- n.iter + 1

			old.rho <- new.rho

			Lnb <- Lna

			Pr.byid <- Pr.byid2

		}
		
	}
	
	p.out <- which(HAP.freq2[,2]<1e-5)

	if(length(p.out)!=0) Out.HAP <- HAP.freq2[-p.out,]

	else Out.HAP <- HAP.freq2

	colnames(Out.HAP) <- c("Haplotype","Frequency")

	rownames(pro_iter) <- NULL
	
	results <- list(iter=pro_iter,rho=new.rho,"Haplotype.frequency.estimate" = Out.HAP,"Value of Likelihood Function" = Lna)

	return(results)

}


# Likelihood ratio test

LH.test <- function( h0.LH, h1.LH ){

	stat <- ( h1.LH - h0.LH ) * 2

	P.value <- 1 - pchisq( stat,1 )
	
	result <- list( stat=stat,P.value=P.value)

	return(result)
}


sort.snps <- function( data,bloci=2 ){

	len <- length( data )
	
	bs.snps <- matrix(data[ bloci:len ],ncol=2,byrow=T)
	
	af.snps <- apply(bs.snps,1,sort)
	
	af.data <- matrix(c(data[1:(bloci-1)],as.vector(af.snps)))

	return(af.data)
	
}


count.geno <- function(ped,bloci=2){

	ped1 <- t( apply(ped,1,sort.snps,bloci=bloci) )	
	
	n.col <- ncol(ped1)

	n.loci <- (n.col - bloci +1 )/2
	
	HAP1 <- apply(ped1[,seq(bloci,n.col-1,by=2)],1,binDec)

	HAP2 <- apply(ped1[,seq((bloci+1),n.col,by=2)],1,binDec)

	geno <- paste(HAP1,HAP2,sep="/")

	ped.D <- as.data.frame(cbind(ped1[,1],HAP1,HAP2,geno))
	
	count <- as.vector(table(geno))	

	geno2 <- sort(unique(geno))

	str.geno <- strsplit(geno2,"/")

	str.geno <- do.call("rbind",str.geno)

	geno3 <- matrix(as.numeric(str.geno),ncol=2)
	
	n.count <- length(count)	

	id <- 1:n.count

	find.id <- data.frame(geno2=geno2,id=id)

	if(is.vector(find.id)) find.id <- matrix(find.id,nrow=1)

	ped2 <- cbind(id,count,geno3)

	if(is.vector(ped2)) ped2 <- matrix(ped2,nrow=1)
	
	new.col <- 2+2*n.loci

	ped3 <- array(0,dim=c(n.count,new.col)) 

	ped3[,1:2] <- ped2[,1:2]
	
	HAP.D1 <- as.matrix(ped2[,3])

	HAP.B1 <- t( apply(HAP.D1,1,decBin,n.loci) )

	ped3[,seq(3,new.col-1,by=2)] <- HAP.B1
	
	HAP.D2 <- as.matrix(ped2[,4])

	HAP.B2 <- t( apply(HAP.D2,1,decBin,n.loci) )

	ped3[,seq(4,new.col,by=2)] <- HAP.B2

	ped.find <- cbind( ped.D,find.id[match(ped.D[,4],find.id[,1]),2] )

	corr <- ped.find[,c(1,5)]

	corr <- as.matrix(corr)

	corr <- matrix(as.numeric(corr),ncol=2)

	colnames(corr) <- c("old.id","new.id") 
	
	result <- list(ped=ped3,corr=corr)

	return(result)

}


Previous.HAPfreq <- function(ind.geno,all.count,all.prob){

	ind.id <- ind.geno[1]
	
	c.freq <- all.count[ which( all.count[,1]==ind.id ),2 ]
	
	ind.prob <- all.prob[which(all.prob[,1]==ind.id),]

	if(is.vector(ind.prob)) ind.prob <- matrix(ind.prob,nrow=1)
	
	Pr.g <- sum( ind.prob[,4] )

	ind.geno.freq <- c( ind.id,Pr.g )
	
	sp2 <- which( ind.prob[,2] == ind.prob[,3] )
	
	ind.all.HAP <- sort(unique(c(ind.prob[,2],ind.prob[,3])))

	n.ind.HAP <- length( ind.all.HAP )
	
	for(j in 1:n.ind.HAP){

		ind.H <- ind.all.HAP[j]

		Cy.PL <- which(Cy[,1]==ind.H) 
			
		ind.p <- which( ind.prob[,2] == ind.H | ind.prob[,3] == ind.H )

		if(length(ind.p) > 0){

		if(ifelse(length(sp2) >0, ind.p == sp2, 0)) {Cy[Cy.PL,2] <<- Cy[Cy.PL,2] + c.freq * 2 * sum( ind.prob[ind.p,4] / Pr.g )}

			else Cy[Cy.PL,2] <<- Cy[Cy.PL,2] + c.freq * sum( ind.prob[ind.p,4] / Pr.g )
		}

	}

	return(ind.geno.freq)			
}