######################################################### ## cvtest ## ######################################################### # # This code is used to test for the equality of two or more # coefficients of variation (CV) based on the Bartlett-corrected # likelihood ratio test, together with the results of the multiple testing # based on Bonferroni correction # 本程序用于计算多样本变异系数的(Bartlett校正后)似然比检验统计量的值与相应的P值 # 并给出基于Bonferroni校正方法的多重比较结果 # # This code is suitable for the case of all the CVs taking values from the interval [0, 1] # 本程序只能用于变异系数介于0与1之间时的情形 # ############################################## ## ## > source("cvtest.txt") ## ## > cvtest("Example1.txt", header = T) ## ## > cvtest("Example2.txt", header = F) ## ############################################## # data:data file with columns being different groups # the first row: mean # the second row: standard deviation (SD) # third row: sample size for each group # 输入变量data表示数据文件,其中的各列表示不同的样本 # 数据文件的第一行为均数 # 数据文件的第二行为标准差 # 数据文件的第三行为样本量 # "header = T" is default, that is, there is a header line in the data file, e.g., Example1.txt # otherwise "header = F", e.g., Example2.txt # 输入变量 header:若数据文件中第一行为变量名时,则其取值为 T,如:数据文件 Example1.txt # 若数据文件中没有给出变量名,则其取值为 F, 如:数据文件 Example2.txt cvtest <- function(data, header){ # read in the data file if it is provided data <- as.matrix(read.table(data,header = header,as.is = T)) datafile <- data n.group <- dim(data)[2] data0<-matrix(data,nrow=3,ncol=n.group) group.name <- paste("Group", 1:n.group, sep = "") dimnames(data) <- list(c("mean", "SD", "sample size"), group.name) if (header == T) { group.name <- dimnames(datafile)[[2]] dimnames(data) <- list(c("mean", "SD", "sample size"), group.name) } n <- data[3, ] # sample size cv <- data[2, ]/data[1, ] # coefficients of variation (CV) for all the groups x <- sqrt(n)/cv results <- cv.test0(data0) n.pair <- choose(n.group,2) LRT.pair <- matrix(NA, nrow = n.group, ncol = n.group) dimnames(LRT.pair) <- list(group.name, group.name) p.pair <- matrix(NA, nrow = n.group, ncol = n.group) dimnames(p.pair) <- list(group.name, group.name) for (i in 1:(n.group - 1)) { for (j in (i + 1):n.group) { data1 <- cbind(data[, i], data[, j]) results1 <- cv.test0(data1) LRT.pair[i, j] <- results1$LRT LRT.pair[j, i] <- results1$LRT med <- results1$P_value * n.pair if (med <= 1) { p.pair[i, j] <- med p.pair[j, i] <- med } else { p.pair[i, j] <- 1; p.pair[j, i] <- 1 } } } list(data = data, CV = cv, LRT = results$LRT, P_value = results$P_value, Multiple_Testing_LRT = LRT.pair, Multiple_Testing_P_value = p.pair) } cv.test0 <- function(data0){ n <- data0[3, ] # sample size cv <- data0[2, ]/data0[1, ] # coefficients of variation (CV) for all the groups x <- sqrt(n)/cv ######### f函数说明 ############### ## ## 用途:计算迭代公式 ## 参数说明: ## ## k:要估计的最大CV值 ## x:服从非中心t分布的统计量 ## m:模拟的次数 ################################### f <- function(k, x, m){ a <- as.vector(m - 1) u <- sqrt(m) * x/(k * sqrt(x^2 + m - 1)) a[1] <- u + exp(-u^2/2)/(sqrt(2 * pi) * pnorm(u)) if(m == 2) c <- log(a[1] + log(sqrt(2 * pi) * pnorm(u))) else{ b <- log(a[1]) for (i in 1:(m - 2)) { a[i + 1] <- u + i/a[i] b <- b + log(a[i + 1]) } c <- b + log(sqrt(2 * pi) * pnorm(u)) } return(c) } ############# 计算似然比统计量分母部分的值 ################# ## 参数说明: ## ## freedom:分母部分似然函数 ## result:最大似然估计 ## a:最大似然函数值 ## ########################################################## s <- 0 for (i in 1:length(x)){ freedom <- function(k){ # 分母部分的似然函数 log <- n[i] * (n[i] - 1)/(2 * k^2 * (x[i]^2 + n[i] - 1)) - f(k, x[i], n[i]) return(log) } result <- optimize(freedom,c(0,2)) # 最大似然估计 a <- -result$objective # 求最大似然函数值 s <- s + a # 每次求得的函数值相加 } ############## 计算似然比统计量分子部分的值 ################ ## 参数说明: ## ## freedom:分子部分似然函数 ## result:最大似然估计 ## b:最大似然函数值 ## ########################################################## t <- 0 freedom <- function(k){ for (i in 1:length(x)){ log <- n[i] * (n[i] - 1)/(2 * k^2 * (x[i]^2 + n[i] - 1)) - f(k, x[i], n[i]) t <- t + log } return(t) } result <- optimize(freedom,c(0,2)) b <- -result$objective ###################### 计算似然比统计量####################### ## 参数解释: ## ## ka:似然比统计量 ## C:校正系数 ## p:P值 ## ############################################################# ka <- 2 * (s - b) C <- 1 + (sum(1/(n - 1)) - 1/(sum(n) - length(n)))/(3 * (length(n) - 1)) p <- pchisq(ka/C, df = length(n) - 1, lower.tail = F) list(LRT = ka/C, P_value = p) }