############################################################################################## ## source('C-PATu.R') ## ## CPATu("ped.txt",loci=NULL,c=NULL,status_missing=0,allele_missing=0,allele=1,header=T) ## ## ## ## $c ## ## [1] "c1=0.55" "c2=0.685" ## ## ## ## $pvalue ## ## PDT C-PAT C-PATu1 C-PATu2 ## ## SNP1_1 0.8886379 0.3093287 0.5618444 0.8570546 ## ## SNP2_1 0.3020282 0.3399154 0.5275499 0.7428609 ## ## ## ## $Tstat ## ## PDT C-PAT C-PATu1 C-PATu2 ## ## SNP1_1 0.140028 1.0166317 0.5801041 0.1801248 ## ## SNP2_1 -1.032094 -0.9543324 -0.6317506 -0.3280672 ## ## ## ############################################################################################## # # This code contains the methods of PDT for association study, and C-PAT and C-PATu for detecting imprinting effects # at qualitative trait loci by simultaneously using the case (affected) and control (unaffected) children in # nuclear families selected from the extended pedigrees # # The results may be different for different runs, because of the sampling randomness # for two-generation nuclear families selected from the extended pedigrees # # Input has multiple pedigrees # # ped: the name of a standard linkage pedigree file or # a matrix/dataframe containing the pedigrees # the 6th column is the affection status (1 = unaffected, 2 = affected) # ped is required # # loci: the name of a standard linkage loci file # Note that only SNP markers are suitable for the code # # c: the disease prevalence in the population under study, which is used in the C-PATu statistic # If c is given, then this code outputs one value of the C-PATu statistic for each marker # If c = NULL, it will be estimated by # (1) the affection statuses of all the typed nonfounders in the dataset, denoted by c1; # (2) the affection statuses of all the offspring in the two-generation nuclear # family sample randomly selected, denoted by c2, where only one family is randomly selected from an extended pedigree # Then, this code outputs two values of the C-PATu statistic based on these two c values for each marker # # status_missing : the input variable "status_missing" is the missing value for the affection status in the data files, # and the default value is 0. # It can take NA, but cannot take any other string values. # # For each SNP locus, there are two alleles 1 and 2 # # allele_missing : the input variable "allele_missing" represents that the allele at the locus is missing, e.g., # it may be 9 in some data files; or other numeric value # it cannot be NA or string values # # allele: allele code of interest # # header: if ped contains variable names, then set header = True (or T) # The default value is False (or F) # # In case of multiple markers, each marker will be analyzed seperately # # References: # PDT: # Martin ER, Monks SA, Warren LL and Kaplan NL. # (2000) A test for linkage and association in general pedigrees: # the pedigree disequilibrium test. Am J Hum Genet 67:146-154 # # C-PAT: # Zhou JY, Hu YQ, Lin S and Fung WK. # (2009) Detection of parent-of-origin effects based on complete and # incomplete nuclear families with multiple affected children. Hum Hered 67:1-12 # # C-PATu: # Zhou JY, Mao WG, Li DL, Hu YQ, Xia F and Fung WK. # (2011) A powerful parent-of-origin effects test for qualitative traits incorporating unaffected children in nuclear families. (submitted) # # Results: # # C-PATu1: the C-PATu value based on c1 # C-PATu2: the C-PATu value based on c2 # # The positive PDT value with the significant result for association shows that # the disease is positively associated with allele 1, the positive C-PAT/CPATu value with # significant result for imprinting shows the maternal imprinting, and the negative C-PAT/CPATu value with # significant result for imprinting shows the paternal imprinting # # The negative PDT value with the significant result for association shows that # the positive C-PAT/CPATu value with significant result for # imprinting shows the paternal imprinting, and the negative C-PAT/CPATu value with # significant result for imprinting shows the maternal imprinting `CPATu` <- function(ped,loci=NULL,c=NULL,status_missing=0,allele_missing=NULL,allele=1,header=F) { marker.name <- NULL # read in pedigree file if it is provided if (is.character(ped)) { pedfile <- ped ped <- read.table(pedfile,header=header,as.is=T) n.loci <- (dim(ped)[2] - 6) / 2 n.ind <- dim(ped)[1] marker.name <- names(ped)[5+2*(1:n.loci)] } else if(is.data.frame(ped)) { n.loci <- (dim(ped)[2] - 6) / 2 n.ind <- dim(ped)[1] marker.name <- names(ped)[5+2*(1:n.loci)] # read in marker names from ped } else if(is.matrix(ped)) { n.loci <- (dim(ped)[2] - 6) / 2 n.ind <- dim(ped)[1] if(!is.null(dimnames(ped))) marker.name <- dimnames(ped)[[2]][5+2*(1:n.loci)] } ped <- as.matrix(ped) if(is.na(status_missing)){ ped[which(ped[,6] == 1),6] <-0 ped[which(ped[,6] == 2),6] <-1 } else if (status_missing!=0){ ped[which(ped[,6] == 1),6] <-0 ped[which(ped[,6] == 2),6] <-1 } else if(status_missing==0){ ped[which(ped[,6] == 0),6] <-9 ped[which(ped[,6] == 1),6] <-0 ped[which(ped[,6] == 2),6] <-1 status_missing <- 9 } # read in loci file if it is provided if (is.character(loci)) { locifile <- loci loci.raw <- readLines(locifile) # raw input loci.raw2 <- strsplit(loci.raw, " ") n.loci.2 <- as.integer(loci.raw2[[1]][1]) if (n.loci != n.loci.2) stop("Numbers of loci in pedigree data and marker data don't match") if ("#" %in% strsplit(loci.raw[4],"") && is.null(marker.name)) { marker.name <- character(n.loci) for(i in 1:n.loci) marker.name[i] <- strsplit(loci.raw[2+i*2],"#")[[1]][2] } } if (is.null(marker.name)) { marker.name <- paste("Marker",1:n.loci) } # Output PDT, C-PAT, C-PATu1 and C-PATu2 if(!is.null(c)){ pvalue <- matrix(0,ncol=3,nrow=n.loci) dimnames(pvalue) <- list(marker.name, c("PDT","C-PAT","C-PATu")) Tstat <- matrix(0,ncol=3,nrow=n.loci) dimnames(Tstat) <- list(marker.name, c("PDT","C-PAT","C-PATu")) n.family <- length(unique(ped[,1])) n.ind <- dim(ped)[1] # PDT: ped1, C-PAT: ped2, C-PATu: ped3 ped1 <- ped #PDT # randomly select nuclear families ped<-ped1 ped<-famselected(ped,allele_missing) ped2<-ped ped3 <- ped # C-PATu # C-PAT && C-PATu for(i in 1:n.loci){ j <- 2*i+5 result1 <- PDT(ped1[,c(1:6,j:(j+1))],allele_missing,allele) pvalue[i,1] <- result1$pvalue Tstat[i,1] <- result1$Tstat result2 <- CPAT.ind(ped2[,c(1:6,j:(j+1))],status_missing,allele_missing) pvalue[i,2] <- result2$pvalue Tstat[i,2] <- result2$Tstat result3 <- CPATu.ind(ped3[,c(1:6,j:(j+1))],status_missing,allele_missing,c_val=c) pvalue[i,3] <- result3$pvalue Tstat[i,3] <- result3$Tstat } } else if(is.null(c)){ pvalue <- matrix(0,ncol=4,nrow=n.loci) dimnames(pvalue) <- list(marker.name, c("PDT","C-PAT","C-PATu1","C-PATu2")) Tstat <- matrix(0,ncol=4,nrow=n.loci) dimnames(Tstat) <- list(marker.name, c("PDT","C-PAT","C-PATu1","C-PATu2")) family.id <- unique(ped[,1]) n.family <- length(family.id) n.ind <- dim(ped)[1] if (is.na(status_missing)) { nonfounder.id <- which(ped[,3] != 0 & !is.na(ped[,6])) } else if (!is.na(status_missing)) { nonfounder.id <- which(ped[,3] != 0 & ped[,6] != status_missing) } c1 <- mean(ped[nonfounder.id,6]) # ped1: PDT; ped2: C-PAT; ped3: C-PATu1; ped4: C-PATu2 ped1<-ped #PDT # randomly select nuclear families ped <- famselected(ped,allele_missing) ped0 <- ped family.id0 <- unique(ped0[,1]) n.family0 <- length(family.id0) n.ind0 <- dim(ped0)[1] offpos0 <- numeric(0) for (i in 1:n.family0) { fam <- family.id0[i] family1 <- which(ped0[,1]==fam) fid <- family1[which(match(ped0[family1,2],ped0[family1,3])!=0)] mid <- family1[which(match(ped0[family1,2],ped0[family1,4])!=0)] offpos.i <- family1[which(family1 != fid & family1!=mid)] offpos0 <- c(offpos0, offpos.i) } ped.off <- ped0[offpos0,6] if (is.na(status_missing)) { ped.off.unmis <- which(!is.na(ped.off)) } else if (!is.na(status_missing)) { ped.off.unmis <- which(ped.off != status_missing) } c2 <- mean(ped.off[ped.off.unmis]) ped2 <- ped #C-PAT ped3 <- ped #C-PATu1 ped4 <- ped #C-PATu2 for(i in 1:n.loci){ j <- 2*i+5 result1 <- PDT(ped1[,c(1:6,j:(j+1))],allele_missing,allele) pvalue[i,1] <- result1$pvalue Tstat[i,1] <- result1$Tstat result2 <- CPAT.ind(ped2[,c(1:6,j:(j+1))],status_missing,allele_missing) pvalue[i,2] <- result2$pvalue Tstat[i,2] <- result2$Tstat result3 <- CPATu.ind(ped3[,c(1:6,j:(j+1))],status_missing,allele_missing,c_val=c1) pvalue[i,3] <- result3$pvalue Tstat[i,3] <- result3$Tstat result4 <- CPATu.ind(ped4[,c(1:6,j:(j+1))],status_missing,allele_missing,c_val=c2) pvalue[i,4] <- result4$pvalue Tstat[i,4] <- result4$Tstat } } if(is.null(c)){ list(c=paste(c("c1=","c2="),c(c1,c2),sep=''),pvalue = pvalue ,Tstat = Tstat) } else list(c=c,pvalue = pvalue ,Tstat = Tstat) } # CPATu `CPATu.ind` <- function(ped,status_missing,allele_missing,c_val) { family.id <- unique(ped[,1]) n.fam <- length(family.id) PATu <- 0 PATu.2 <- 0 nCM1 <- 0 nCF1 <- 0 nCM1.2 <- 0 nCF1.2 <- 0 nCM0 <- 0 nCF0 <- 0 nCM0.2 <- 0 nCF0.2 <- 0 n10M <- 0 n10F <- 0 cross.m <- 0 cross.f <- 0 onePAT.m <- 0 onePAT.f <- 0 onePAT.m.2 <- 0 onePAT.f.2 <- 0 oneUPAT.m <- 0 oneUPAT.f <- 0 oneUPAT.m.2 <- 0 oneUPAT.f.2 <- 0 for (i in 1:n.fam) { fam <- family.id[i] family1 <- which(ped[,1]==fam) fid <- family1[which(match(ped[family1,2],ped[family1,3])!=0)] mid <- family1[which(match(ped[family1,2],ped[family1,4])!=0)] # father and mother are not missing. Combine the PAT and PATu if (ped[fid, 7] != allele_missing & ped[mid, 7] != allele_missing) { ped1<-ped[family1,] if(is.na(status_missing)){ status.avail <- which(!is.na(ped[family1,6])) } else if(!is.na(status_missing)){ status.avail <- which(ped[family1,6]!=status_missing) } ped1[status.avail,6] <- ped1[status.avail,6]-c_val PATu.output <- PATu.num(ped1,ped[fid,2],ped[mid,2],status_missing,allele_missing) PATu <- PATu + PATu.output PATu.2 <- PATu.2 + PATu.output^2 } # father or mother is missing else if (ped[fid, 7] == allele_missing & ped[mid, 7] != allele_missing) { ped1 <- ped[family1,] fid1 <- which(ped1[,2]==ped[fid,2]) mid1 <- which(ped1[,2]==ped[mid,2]) id.afoff <- which(ped1[,2]!=ped[fid,2] & ped1[,2]!=ped[mid,2] & ped1[,6]== 1) id.uafoff <- which(ped1[,2]!=ped[fid,2] & ped1[,2]!=ped[mid,2] & ped1[,6]== 0) n10M <- n10M + length(id.afoff) * length(id.uafoff) if(length(id.afoff)>0){ ped2 <- ped1[c(mid1,id.afoff),] onePAT.output <- onePAT.num(ped2,status_missing,allele_missing) nCM1 <- nCM1 + onePAT.output$Li nCM1.2 <- nCM1.2 + onePAT.output$Li^2 onePAT.m <- onePAT.m + onePAT.output$stat onePAT.m.2 <- onePAT.m.2 + onePAT.output$stat^2 } if(length(id.uafoff)>0){ ped2 <- ped1[c(mid1,id.uafoff),] oneUPAT.output <- onePAT.num(ped2,status_missing,allele_missing) nCM0 <- nCM0 + oneUPAT.output$Li nCM0.2 <- nCM0.2 + oneUPAT.output$Li^2 oneUPAT.m <- oneUPAT.m - oneUPAT.output$stat oneUPAT.m.2 <- oneUPAT.m.2 + oneUPAT.output$stat^2 } if(length(id.afoff)>0 & length(id.uafoff)>0){ ped2 <- ped1[c(mid1,id.afoff,id.uafoff),] cross.output <- cross.num(ped2,ped1[mid1,2],status_missing,allele_missing) cross.m <- cross.m + cross.output } } else if (ped[fid, 7] != allele_missing & ped[mid, 7] == allele_missing) { ped1 <- ped[family1,] fid1 <- which(ped1[,2]==ped[fid,2]) mid1 <- which(ped1[,2]==ped[mid,2]) id.afoff <- which(ped1[,2]!=ped[fid,2] & ped1[,2]!=ped[mid,2] & ped1[,6]== 1) id.uafoff <- which(ped1[,2]!=ped[fid,2] & ped1[,2]!=ped[mid,2] & ped1[,6]== 0) n10F <- n10F + length(id.afoff) * length(id.uafoff) if(length(id.afoff)>0){ ped2 <- ped1[c(fid1,id.afoff),] onePAT.output <- onePAT.num(ped2,status_missing,allele_missing) nCF1 <- nCF1 + onePAT.output$Li nCF1.2 <- nCF1.2 + onePAT.output$Li^2 onePAT.f <- onePAT.f - onePAT.output$stat onePAT.f.2 <- onePAT.f.2 + onePAT.output$stat^2 } if(length(id.uafoff)>0){ ped2 <- ped1[c(fid1,id.uafoff),] oneUPAT.output <- onePAT.num(ped2,status_missing,allele_missing) nCF0 <- nCF0 + oneUPAT.output$Li nCF0.2 <- nCF0.2 + oneUPAT.output$Li^2 oneUPAT.f <- oneUPAT.f + oneUPAT.output$stat oneUPAT.f.2 <- oneUPAT.f.2 + oneUPAT.output$stat^2 } if(length(id.afoff)>0 & length(id.uafoff)>0){ ped2 <- ped1[c(fid1,id.afoff,id.uafoff),] cross.output <- cross.num(ped2,ped1[fid1,2],status_missing,allele_missing) cross.f <- cross.f + cross.output } } } if (nCF1 == 0 || nCM1 == 0) { w <- 0 w1 <- 0 Const <- 0 } else if (nCF1 != 0 & nCM1 != 0) { w <- nCF1/(nCF1+nCM1) w1 <- 1 - w Const <- (nCF1^2*nCM1.2+nCM1^2*nCF1.2)/(nCM1*nCF1*(nCF1+nCM1)^2) } if (nCF0 == 0 || nCM0 == 0) { w0 <- 0 w0_1 <- 0 Const0 <- 0 } else if (nCF0 != 0 & nCM0 != 0) { w0 <- nCF0/(nCF0+nCM0) w0_1 <- 1 - w0 Const0 <- (nCF0^2*nCM0.2+nCM0^2*nCF0.2)/(nCM0*nCF0*(nCF0+nCM0)^2) } onePAT.stat.num <- w * onePAT.m + w1 * onePAT.f onePAT.stat.den <- w^2 * onePAT.m.2 + w1^2 * onePAT.f.2 + Const * onePAT.m * onePAT.f oneUPAT.stat.num <- w0 * oneUPAT.m + w0_1 * oneUPAT.f oneUPAT.stat.den <- w0^2 * oneUPAT.m.2 + w0_1^2 * oneUPAT.f.2 + Const0 * oneUPAT.m * oneUPAT.f if(nCM1 > 0 & nCF0+nCM0 > 0) for.cross1 <- -n10M/((nCF1+nCM1) * (nCF0+nCM0)) * onePAT.m * oneUPAT.f*nCF1/nCM1 else for.cross1 <- 0 if(nCF1 > 0 & nCF0+nCM0 > 0) for.cross2 <- -n10F/((nCF1+nCM1) * (nCF0+nCM0)) * onePAT.f * oneUPAT.m * nCM1/nCF1 else for.cross2 <- 0 cross.den <- w * w0 *cross.m + w1 * w0_1 * cross.f + for.cross1 + for.cross2 CPATu.stat.num <- PATu + (1-c_val) * onePAT.stat.num + c_val * oneUPAT.stat.num CPATu.stat.den <- PATu.2 + (1-c_val)^2 * onePAT.stat.den + c_val^2 * oneUPAT.stat.den -2 * (1-c_val) * c_val * cross.den CPATu.stat <- CPATu.stat.num/sqrt(CPATu.stat.den) p.CPATu <- 1 - pchisq(CPATu.stat^2,1) list(pvalue=p.CPATu,Tstat=CPATu.stat) } `CPAT.ind` <- function(ped,status_missing,allele_missing) { family.id <- unique(ped[,1]) n.fam <- length(family.id) PAT <- 0 PAT.2 <- 0 nCM <- 0 nCF <- 0 nCM.2 <- 0 nCF.2 <- 0 onePAT.m <- 0 onePAT.f <- 0 onePAT.m.2 <- 0 onePAT.f.2 <- 0 for (i in 1:n.fam) { fam <- family.id[i] family1 <- which(ped[,1]==fam) fid <- family1[which(match(ped[family1,2],ped[family1,3])!=0)] mid <- family1[which(match(ped[family1,2],ped[family1,4])!=0)] # both father and mother are not missing if (ped[fid, 7] != allele_missing & ped[mid, 7] != allele_missing) { PAT.output <- PAT.num(ped[family1,],ped[fid,2],ped[mid,2],status_missing,allele_missing) PAT <- PAT + PAT.output PAT.2 <- PAT.2 + PAT.output^2 } # father or mother is missing else if (ped[fid, 7] == allele_missing & ped[mid, 7] != allele_missing) { ped1 <- ped[family1,] fid1 <- which(ped1[,2]==ped[fid,2]) mid1 <- which(ped1[,2]==ped[mid,2]) id.afoff <- which(ped1[,2]!=ped[fid,2] & ped1[,2]!=ped[mid,2]&ped1[,6]==1) if(length(id.afoff) > 0){ ped2 <- ped1[c(mid1,id.afoff),] onePAT.output <- onePAT.num(ped2,status_missing,allele_missing) nCM <- nCM + onePAT.output$Li nCM.2 <- nCM.2 + onePAT.output$Li^2 onePAT.m <- onePAT.m + onePAT.output$stat onePAT.m.2 <- onePAT.m.2 + onePAT.output$stat^2 } } else if (ped[fid, 7] != allele_missing & ped[mid, 7] == allele_missing) { ped1 <- ped[family1,] fid1 <- which(ped1[,2]==ped[fid,2]) mid1 <- which(ped1[,2]==ped[mid,2]) id.afoff <- which(ped1[,2]!=ped[fid,2] & ped1[,2]!=ped[mid,2]&ped1[,6]==1) if(length(id.afoff) > 0){ ped2 <- ped1[c(fid1,id.afoff),] onePAT.output <- onePAT.num(ped2,status_missing,allele_missing) nCF <- nCF + onePAT.output$Li nCF.2 <- nCF.2 + onePAT.output$Li^2 onePAT.f <- onePAT.f - onePAT.output$stat onePAT.f.2 <- onePAT.f.2 + onePAT.output$stat^2 } } } if (nCF == 0 || nCM == 0) { w <- 0 w1 <- 0 Const <- 0 } else if (nCF != 0 & nCM != 0) { w <- nCF/(nCF+nCM) w1 <- 1 - w Const <- (nCF^2*nCM.2+nCM^2*nCF.2)/(nCM*nCF*(nCF+nCM)^2) } CPAT.stat.num <- PAT + w * onePAT.m + w1 * onePAT.f CPAT.stat.den <- PAT.2 + w^2 * onePAT.m.2 + w1^2 * onePAT.f.2 + Const * onePAT.m * onePAT.f CPAT.stat <- CPAT.stat.num/sqrt(CPAT.stat.den) p.CPAT <- 1 - pchisq(CPAT.stat^2,1) list(pvalue=p.CPAT,Tstat=CPAT.stat) } `PATu.num` <- function(ped,fid2,mid2,status_missing,allele_missing) { fid <- which(ped[,2]==fid2) mid <- which(ped[,2]==mid2) id.off <- which(ped[,2]!=fid2 & ped[,2]!=mid2) #the id of all the children(include the affected and unaffected) no.off <- length(id.off) stat <- 0 if (ped[fid,7] + ped[fid,8] != ped[mid,7] + ped[mid,8]) { for (ind in 1:no.off) { if ( is.na(ped[id.off[ind],6]) || (!is.na(status_missing) & ped[id.off[ind],6] == status_missing) || ped[id.off[ind],7] == allele_missing || ped[id.off[ind],7] == ped[id.off[ind],8]) next if (ped[fid,7] + ped[fid,8] - ped[mid,7] - ped[mid,8] < 0) stat <- stat + ped[id.off[ind],6] else if (ped[fid,7] + ped[fid,8] - ped[mid,7] - ped[mid,8] > 0) stat <- stat - ped[id.off[ind],6] } } stat } `onePATu.num` <- function(ped,status_missing,allele_missing) { ped1 <- ped no.off <- dim(ped1)[1] - 1 stat <- 0 for (ind in 1:no.off) { offpos <- ind + 1 if (is.na(ped[offpos, 6]) || (!is.na(status_missing) & ped[offpos, 6] == status_missing) || ped[offpos,7] == allele_missing || ped[offpos,7] == ped[offpos,8]) next if (ped[1,7] + ped[1,8] - ped[offpos,7] - ped[offpos,8] < 0) stat <- stat - ped[offpos,6] else if (ped[1,7] + ped[1,8] - ped[offpos,7] - ped[offpos,8] > 0) stat <- stat + ped[offpos,6] } list(Li = no.off, stat = stat) } `cross.num` <- function(ped,pa_id,status_missing,allele_missing) { ped1 <- ped no.off <- dim(ped1)[1] - 1 stat <- 0 affect.ind <- which(ped1[,2] != pa_id & ped1[,6]==1) uaffect.ind <- which(ped1[,2] != pa_id & ped1[,6]==0) n1 <- length(affect.ind) n0 <- length(uaffect.ind) cross <- 0 for(aff in 1:n1){ for(uaff in 1:n0){ if(is.na(ped1[affect.ind[aff],6]) || is.na(ped1[uaffect.ind[uaff],6]) || (!is.na(status_missing) & ped1[affect.ind[aff], 6] == status_missing ) || (!is.na(status_missing) & ped1[uaffect.ind[uaff], 6] == status_missing )) next if((ped1[affect.ind[aff],7] != ped1[affect.ind[aff],8]) & (ped1[uaffect.ind[uaff],7] != ped1[uaffect.ind[uaff],8]) & ((ped1[1,7] + ped1[1,8]) != (ped1[affect.ind[aff],7] + ped1[affect.ind[aff],8]))) cross<-cross+1 } } cross } `PAT.num` <- function(ped,fid2,mid2,status_missing,allele_missing) { fid <- which(ped[,2]==fid2) mid <- which(ped[,2]==mid2) id.afoff <- which(ped[,2]!=fid2 & ped[,2]!=mid2 & ped[,6]==1) #the id of all the children(include the affected and unaffected) #the id of all the affected children stat <- 0 # for calculating the PAT if(length(id.afoff)>0){ no.afoff <-length(id.afoff) if (ped[fid,7] + ped[fid,8] != ped[mid,7] + ped[mid,8]) { for (ind in 1:no.afoff) { if ( is.na(ped[id.afoff[ind],6]) || (!is.na(status_missing) & ped[id.afoff[ind],6]==status_missing) || ped[id.afoff[ind],7] == allele_missing || ped[id.afoff[ind],7] == ped[id.afoff[ind],8]) next if (ped[fid,7] + ped[fid,8] - ped[mid,7] - ped[mid,8] < 0) stat <- stat + 1 else if (ped[fid,7] + ped[fid,8] - ped[mid,7] - ped[mid,8] > 0) stat <- stat - 1 } } } stat } `onePAT.num` <- function(ped,status_missing,allele_missing) { no.off <- dim(ped)[1] - 1 offpos<-0 stat <- 0 for (ind in 1:no.off) { offpos <- ind + 1 if (is.na(ped[offpos,6]) || (!is.na(status_missing) & ped[offpos,6]==status_missing) || ped[offpos,7] == allele_missing || ped[offpos,7] == ped[offpos,8]) next if (ped[1,7] + ped[1,8] - ped[offpos,7] - ped[offpos,8] < 0) stat <- stat - 1 else if (ped[1,7] + ped[1,8] - ped[offpos,7] - ped[offpos,8] > 0) stat <- stat + 1 } list(Li = no.off, stat = stat) } `PDT` <- function (peds,allele_missing,allele) # Calculate the PDT statistic for general pedigrees # No parental genotype reconstructions # peds: pedigree data, which can have multiple pedigrees and loci # locus: position of the marker of interest # allele: allele of interest { peds<-peds # extract family ids familyid <- unique(peds[,1]) nfamily <- length(familyid) # statistic for each family Dstat <- rep(0,nfamily) for (i in 1:nfamily) { #data of one family ped <- peds[peds[,1]==familyid[i],] #offspring position (easier to code than using offspring id) offpos <- which(ped[,3]!=0) noff <- length(offpos) offused <- rep(0,noff) repeat { #first unused offspring off <- offpos[offused==0][1] if (is.na(off)) break #father id and mother id fid <- ped[off,3] mid <- ped[off,4] fpos <- which(ped[,2]==fid) mpos <- which(ped[,2]==mid) # nuclear family alloff <- which(ped[,3]==fid & ped[,4]==mid) offused[match(alloff,offpos)] <- 1 allpos <- c(fpos,mpos,alloff) Dstat[i] <- Dstat[i] + nufam(ped[allpos,],allele_missing,allele) } } # PDT statisitc for all families Tstat <- sum(Dstat)/sqrt(sum(Dstat*Dstat)) Pvalue <- pnorm(Tstat,0,1,lower.tail=F) Pvalue <- 2*min(Pvalue,1-Pvalue) # 2-sided p-value list(D=data.frame(family=familyid,Dstat=Dstat),Tstat=Tstat,pvalue=Pvalue) } `nufam` <- function (ped,allele_missing,allele) # Calculate D statisic for a single nuclear family # No parental genotype reconstrutions # ped: data for one nuclear family with one marker locus, # father at row 1, mother at row 2 # allele: allele code of interest { # parents and offspring postions fpos <- 1 mpos <- 2 offpos <- 3:dim(ped)[1] npos <- length(offpos) Dstat <- 0 n_triad <- 0 n_DSP <- 0 if (ped[mpos,7] !=allele_missing && ped[mpos,8] !=allele_missing && ped[fpos,7] !=allele_missing && ped[fpos,8] !=allele_missing) { for (i in 1:npos) { if (ped[offpos[i],6]!=1) next # unaffected or unknown if (ped[offpos[i],7]==allele_missing) next # untyped D_triad <- triad(ped[c(fpos,mpos,offpos[i]),],allele) Dstat <- Dstat + D_triad } } # pick up DSPs aff_o <- 2 + which(ped[offpos,6]==1 & ped[offpos,7] != allele_missing) unaff_o <- 2 + which(ped[offpos,6]==0 & ped[offpos,7] !=allele_missing) n_aff_o <- length(aff_o) n_unaff_o <- length(unaff_o) if (n_aff_o > 0 && n_unaff_o > 0) { for (i in 1:n_aff_o) { for (j in 1:n_unaff_o) { D_DSP <- DSP(ped[c(aff_o[i],unaff_o[j]),],allele) Dstat <- Dstat + D_DSP } } } Dstat } `triad` <- function (ped,allele) # calculate D statistic for one family triad and one marker locus # assume everyone is typed # ped: father at row 1, mother at row 2, affected offspring at row 3 # allele: allele of interest { Dstat <- 2*sum(ped[3,c(7,8)]==allele) - sum(ped[1:2,7:8]==allele) Dstat } `DSP` <- function (ped,allele) # Calculate D statistic for DSP # ped: affected at row 1, unaffected at row 2 # allele: allele of interest { Dstat <- sum(ped[1,7:8]==allele) - sum(ped[2,7:8]==allele) Dstat } `famselected` <- function(ped,allele_missing) { family.id <- unique(ped[,1]) n.family <- length(family.id) n.ind <- dim(ped)[1] offpos <- which(ped[,3] != 0) no.off <- length(offpos) offused <- rep(0, no.off) # no.off * 1 matrix nufampos1 <- numeric(0) nufam.id1 <- numeric(0) ped.id1 <- numeric(0) nufampos2 <- numeric(0) nufam.id2 <- numeric(0) ped.id2 <- numeric(0) i <- 0 repeat { off <- offpos[offused==0][1] if (is.na(off)) break i <- i+1 fid <- ped[off,3] # individual id of father mid <- ped[off,4] # individual id of mother fpos <- which(ped[,1] == ped[off,1] & ped[,2] == fid) mpos <- which(ped[,1] == ped[off,1] & ped[,2] == mid) alloff <- which(ped[,1] == ped[off,1] & ped[,3] == fid & ped[,4] == mid) # all the positions of offspring in the whole pedigree file offused[match(alloff,offpos)] <- 1 allpos1 <- c(fpos,mpos,alloff) nufampos1 <- c(nufampos1,allpos1) ped.id1 <-c(ped.id1, rep(ped[off,1],length(allpos1))) nufam.id1 <- c(nufam.id1, rep(i,length(allpos1))) if(sum(ped[alloff,6]==1)>0){ allpos2 <- c(fpos,mpos,alloff) nufampos2 <- c(nufampos2,allpos2) ped.id2 <-c(ped.id2, rep(ped[off,1],length(allpos2))) nufam.id2 <- c(nufam.id2, rep(i,length(allpos2))) } } nufam.all1 <- cbind(ped.id1, nufampos1, nufam.id1) nufam.all2 <- cbind(ped.id2, nufampos2, nufam.id2) nufam.selected.all <- numeric(0) for (i in 1:n.family) { flag<-2 pedigree1.id <- which(nufam.all2[,1]==family.id[i]) if(length(pedigree1.id)==0){ pedigree1.id <- which(nufam.all1[,1]==family.id[i]) nufamno.pedigree1 <- unique(nufam.all1[pedigree1.id,3]) flag<-1 } else nufamno.pedigree1 <- unique(nufam.all2[pedigree1.id,3]) if (length(nufamno.pedigree1) == 1) { nufam.selected <- nufamno.pedigree1 if(flag==2) nufam.selected.all <- c(nufam.selected.all, nufam.all2[which(nufam.all2[,3]==nufam.selected),2]) else nufam.selected.all <- c(nufam.selected.all, nufam.all1[which(nufam.all1[,3]==nufam.selected),2]) } else if (length(nufamno.pedigree1) > 1) { nufam.selected <- sample(nufamno.pedigree1,1) if(flag==2) nufam.selected.all <- c(nufam.selected.all, nufam.all2[which(nufam.all2[,3]==nufam.selected),2]) else nufam.selected.all <- c(nufam.selected.all, nufam.all1[which(nufam.all1[,3]==nufam.selected),2]) } } ped <- ped[nufam.selected.all,] ped }