2.1 符号检验###
了解随机件数是否为10
> build.y<-c(22,9,4,5,1,16,15,26,47,8,31,7)
> binom.test(sum(build.y>10),length(build.y),0.5) #符号检验
number of successes 是k值,取的是大于10和小于10 的个数的较小的那个
number of trials是n的值,数据的个数。
2.1 Wilcoxon 符号秩检验
> x = c(22,9,4,5,1,16,15,26,47,8,31,7)
> wilcox.test(x-10)
v = 53: w+ = 53
2.3 顺序统计量S1
> UNE.rate<-c(17.3,17.9,18.4,18.1,18.3,19.6,18.6,19.2,17.7,20,19,18.8,19.3,20.2,19.9)
> N = length(UNE.rate)
> N
[1] 15
> k = N/2
> w = seq(N-2*1+1,N-2*k+1,-2) #权重
> S= sum(w*(UNE.rate[1:((N-1)/2)]-rev(UNE.rate[((N+1)/2+1):N])>0))
> S #计算S1统计量p49
[1] 2
> ES = N^2/8 #期望
> DS = 1/24*N*(N^2-1) #方差
> S.start=(S-ES)/sqrt(DS) #结果为-2.21,小于-1.96,落在左侧,有增加趋势
> pnorm(S.start)
[1] 0.01362334
#P值,pnorm函数是计算标准正太分布的累积分布函数的函数
2.3 S2
> rain<-c(17.3,17.9,18.4,18.1,18.3,19.6,18.6,19.2,17.7,20.0,19.0,18.8,19.3,20.2,19.9)
> N=length(rain)
> if(N%%2==0){#如果N是偶数
+ c=N/2
+ S_pos<-sum(rain[1:c]-rain[(c+1):N]>0)
+ #为正的个数
+ S_neg=sum(rain[1:c]-rain[(c+1):N]<0)
+ #为负的个数
+ }else{#如果是奇数
+ c=(N+1)/2
+ S_pos<-sum(rain[1:(c-1)]-rain[(c+1):N]>0) #为正的个数
+ S_neg=sum(rain[1:(c-1)]-rain[(c+1):N]<0) #为负的个数
+ }
> k=min(S_pos,S_neg) #取较小值
> N_=S_pos+S_neg
> binom.test(k,N_,0.5)
2.4 符号检验(两总体)
> x<-c(91,46,108,99,110,105,191,57,34,81,81,51,63,51,46,45,66,64,90,28)
> N = length(x)
> k = N/2
> s1<-sum((x[1:k]-x[(k+1):N])>0) #s1是前-后>0的个数,即s+
> s2<-sum((x[1:k]-x[(k+1):N])<0) #s2是前-后<0的个数,即s-
> s <-min(s1,s2) #k的值,s+ 和s-较小值
> binom.test(s,s1+s2,0.5) #binom.test(x,n,p):x是成功的次数,n是试验总数,p是原假设的概率
2.4 Wilcoxon符号秩检验
> x<-c(91,46,108,99,110,105,191,57,34,81)
> y<-c(81,51,63,51,46,45,66,64,90,28)
> z<-x-y
> wilcox.test(z)
v = 45 : w+ = 45
2.5 随机游程
> x = c(0,1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,0,1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0)
> y = factor(x) #????
> runs.test(y)
2.15 卡方拟合优度检验(分布的一致性检验)
#计算卡方统计量
> x<-c(495,503,491,581)
> n<-sum(x)
> m<-length(x)
> p<-rep(1/m,m)
> k<-sum((x-n*p)^2/(n*p))
> k
[1] 10.53333
> pr<-1-pchisq(k,m-1)
> pr
[1] 0.01453647
方法二:
> chisq.test(x)
3.1brown-mood-test中位数检验
BM.test<-function(x,y,alt) #自定义brown-test检验,
+ {
+ xy<-c(x,y) #xy定义为两组数据混合后的数组
+ md.xy<-median(xy) #md.xy 为混合数据的中位数
+ t<-sum(xy>md.xy) #t混合数据中大于Mxy的总数
+ lx<-length(x)
+ ly<-length(y)
+ lxy<-lx+ly
+ A<-sum(x>md.xy) #A是x组中大于Mxy的个数
+ if(alt=="greater") #y组个数比x组个数多时
+ {
+ w<-1-phyper(A-1,lx,ly,t)
+ }
+ else if(alt=="less")#y组个数比x组个数少时
+ {
+ w<-phyper(A,lx,ly,t) #超几何分布,计算p值
+ }
+ conting.table=matrix(c(A,lx-A,lx,t-A,ly-(t-A),ly,t,lxy-t,lxy),3,3)#输出矩阵
+ col.name<-c("X","Y","X+Y")
+ row.name<-c(">MXY","<MXY","TOTAL")
+ dimnames(conting.table)<-list(row.name,col.name)#为矩阵命名
+ list(contingency.table=conting.table,p.value=w) #类似于python的字典
+ }
> X<-c(10,8,12,16,5,9,7,11,6)
> Y<-c(12,15,20,18,13,14,9,16)
> BM.test(X,Y,"less")
和答案的区别在于,答案是小于混合中位数,这里是小于等于
3.1Wilcoxon-Mann-Witney(WMW)秩和检验
> x <-c(10,8,12,16,5,9,7,11,6)
> y<-c(12,15,20,18,13,14,9,16)
> wilcox.test(y,x) #不能写成wilcox.test(x,y)
W = 62.5,是Wxy,即y大于x的样本个数
3.3 WMW秩和检验
> x <-c(18,15,9,10,14,16,11,13,19,20,6)
> y <-c(12,13,9,8,1,2,7,5,3,2,4)
> wilcox.test(y,x)
3.5 Mood方差检验,判断两组数据的离散程度
> X<-c(8.8,8.2,5.6,4.9,8.9,4.2,3.6,7.1,5.5,8.6,6.3,3.9)
> Y<-c(13.0,14.5,16.5,22.8,20.7,19.6,18.4,21.3,24.2,19.6,11.7,18.9,14.6,19,14.5)
> mood.test(X,Y)
4.1 K-W单因素方差分析
> x<-c(83,64,67,62,70,85,81,80,78,88,89,79,90,95)
> x
[1] 83 64 67 62 70 85 81 80 78 88 89 79 90 95
> gr.x<-c(1,1,1,1,1,2,2,2,2,3,3,3,3,3) #5个1,4个2,5个3,代表每组的个数
> gr.x
[1] 1 1 1 1 1 2 2 2 2 3 3 3 3 3
> kruskal.test(x,gr.x)
H = 7.8229,H近似服从自由度为k-1的卡方分布
4.1 Dunn进行两两之间比较,无代码
4.2 Jonckeere-Terpstra检验(JK)
一个样本中观测值小于另一个样本中观测值的个数较多或较少,考虑两总体之间的位置之间的大小关系
> install.packages("clinfun")
> library(clinfun)
> g1<-c(83,64,67,62,70)
> g2<-c(85,81,80,78)
> g3<-c(88,89,79,90,95)
#计算w12
> w12<-rep(0,4) #设置重复向量,此处w12 = 0,0,0,0
> for(i in 1:4){w12[i]<-sum(g2[i]>g1)} #每一个2中的数拿去和1中所有数比较,1中数据小于2中数据的个数
> w12
[1] 5 4 4 4
> sum(w12)
[1] 17 #w12 = 17
> #w13
> w13<-rep(0,5)
> for(i in 1:5){w13[i]<-sum(g3[i]>g1)}
> w13
[1] 5 5 4 5 5
> sum(w13)
[1] 24
#w23
> w23<-rep(0,5)
> for(i in 1:5){w23[i]<-sum(g3[i]>g2)}
> w23
[1] 4 4 1 4 4
> sum(w23)
[1] 17
> G123<-list(g1,g2,g3)
> n<-c(length(g1),length(g2),length(g3))
> group_label<-as.ordered(factor(rep(1:length(n),n)))
> jonckheere.test(unlist(G123),group_label,alternative="increasing")
> #unlist(G123)将复杂向量转化为简单向量
JT = w12 + w13 + w23 = 58
P(JT>=58)=0.0009078
4.6 Friedman秩方差分析法
> x<-c(20.3,25.6,24.0,21.2,24.7,23.1,18.2,19.3,20.6,18.6,19.3,19.8,18.5,20.7,21.4)
> treat.x<-c(1,2,3,1,2,3,1,2,3,1,2,3,1,2,3)
> block.x<-c(1,1,1,2,2,2,3,3,3,4,4,4,5,5,5)
> friedman.test(x,treat.x,block.x)
Q = 7.6,和卡方0.95,自由度2的值比较
4.6 Hodges-Lehmmann调整秩和检验
当随机区组设计数据的区组数较大或处理数较小时,Friedman检验的效果就不是很好了
因为Friedman检验的编秩是在每一个区组内进行的,这种编秩的方法仅限于区组内效应,不同区组间响应的直接比较是无意义的
为了去除区组效应,可以用区组的平均值或中位数作为区组效应的估计值
用每个观测值与估计值相减来反映处理之间的差异,就可以消除区组之间的差异,问题转为无区组的情况
> data<-c(20.3,21.2,18.2,18.6,18.5,25.6,24.7,19.3,19.3,20.7,24.0,23.1,20.6,19.8,21.4)
> treat<-c(1,1,1,1,1,2,2,2,2,2,3,3,3,3,3) #处理
> block<-c(1,2,3,4,5,1,2,3,4,5,1,2,3,4,5) #区组
> A<-c(20.3,21.2,18.2,18.6,18.5)
> B<-c(25.6,24.7,19.3,19.3,20.7)
> C<-c(24.0,23.1,20.6,19.8,21.4)
> table <- cbind(A,B,C) #合并,矩阵
> colnames(table) <- c("A","B","C")
> row.names(table) <- 1:5
> b<-5 #区组数
> k<-3 #处理数
> Xi.bar <- apply(table,1,mean) #apply函数可代替for循环,对矩阵按行或列进行循环计算,mean平均数
> #apply()函数第一个参数为数组或矩阵等,第二个参数1表示按行,2表示按列,第三个参数表示自定义的调用函数
> table.adjusted <- table-cbind(Xi.bar,Xi.bar,Xi.bar) #观测值减去均值,得到调整后的观测值
> rank <- rank(table.adjusted) #按列,对调整后的观测值混合数据求秩
> R.j <- c(sum(rank[1:5]),sum(rank[6:10]),sum(rank[11:15])) #处理的秩和
> rank2 <- rank(t(table.adjusted)) #按行
> Ri. <- c(sum(rank2[1:3]),sum(rank2[4:6]),sum(rank2[7:9]),sum(rank2[10:12]),sum(rank2[13:15]))#区组的秩和
> #判断是否有结
> if(length(as.vector(table)) > length(unique(as.vector(table)))){#有结
+ Q <- (k-1)*(sum(R.j*R.j) - k*b*b*(k*b+1)*(k*b+1)/4)/(sum(rank*rank) - 1/k*sum(Ri.*Ri.))
+ }else
+ {#无结
+ Q <- (k-1)*(sum(R.j*R.j) - k*b*b*(k*b+1)*(k*b+1)/4)/(1/6*k*b*(k*b+1)*(2*k*b+1) - 1/k*sum(Ri.*Ri.))
+ }
> K <- qchisq(0.95,k-1) #Q近似服从自由度为k-1的卡方分布,这里计算0.95置信度,自由度k-1卡方分布的临界值
> p <- 1 - pchisq(Q,k-1)
> cat("Q =",Q,"\n")
Q = 6.842615
> cat("临界值K =",K,"\n")
临界值K = 5.991465 #Q>k,拒绝原假设
> cat("p =",p,"\n")
p = 0.03266969
4.8 Cochran检验
适用于二元数据
> candid1<-c(1,1,1,1,1,1,0,0,1,1,1,0)
> candid2<-c(0,1,0,1,0,0,0,1,0,0,0,0)
> candid3<-c(1,0,1,0,0,1,0,1,0,0,0,1)
> candid<-matrix(c(candid1,candid2,candid3),nrow=12,ncol=3)
> nidot.candid=apply(candid,1,sum)
> ndotj.candid=apply(candid,2,sum)
> k=ncol(candid)
> Q=(k-1)*((k*sum(ndotj.candid^2)-(sum(ndotj.candid))^2))/(k*sum(nidot.candid)-sum(nidot.candid^2))
> pvalue.candid=pchisq(Q,df=k-1,lower.tail=F)
> Q
[1] 5.090909
> pvalue.candid
[1] 0.07843739
4.9 Durbin 不完全区组分析法
可能存在处理非常多,但是每个区组中允许的样本量有限的时候,每一个区组中不可能包含所有的处理,比如重要的均衡不完全区组BIB设计。Durbin检验便是针对这种问题
> install.packages("PMCMRplus")
> library(PMCMRplus)
> z <- c(356,320,359,338,340,385,372,380,390,308,332,348)
> blocks <- c(1,2,3,1,2,4,1,3,4,2,3,4)
> groups <- c(1,1,1,2,2,2,3,3,3,4,4,4)
> durbinTest(z,blocks=blocks,groups=groups)
Q = 6.75
5,1 卡方独立性检验
> x<-c(83,91,41,70,86,38,45,15,10)
> A<-matrix(x,3,3)
> A
[,1] [,2] [,3]
[1,] 83 70 45
[2,] 91 86 15
[3,] 41 38 10
> chisq.test(A)
卡方值为18.651,自由度4
5.2 卡方独立性检验
> y <-c(341,103,405,11,105,15)
> A<-matrix(y,2,3)
> A
[,1] [,2] [,3]
[1,] 341 405 105
[2,] 103 11 15
> chisq.test(A)
5.3 卡方齐性检
> x <-matrix(c(6,4,1,19),byrow = F,2)
> fisher.test(x)
5.11 ridit检验
> A = c(90,23,53,21,13,47,34,28,18,5,20,13,10,5,2,28,32,33,45,16,34,28,52,40,10)
> A.matrix = matrix(A,nrow=5,ncol=5,byrow=T)
> A.matrix
[,1] [,2] [,3] [,4] [,5]
[1,] 90 23 53 21 13
[2,] 47 34 28 18 5
[3,] 20 13 10 5 2
[4,] 28 32 33 45 16
[5,] 34 28 52 40 10
> rA = rowSums(A.matrix)
> cA = colSums(A.matrix)
> R = sum(rA)
> R
[1] 700
> R1 = rep(0,5)
> for (j in 1:5)(R1[j]<-(sum(cA[1:j-1])+0.5*cA[j])/R)
> R1
[1] 0.1564286 0.4057143 0.6242857 0.8421429 0.9671429
> r = rep(0,5)
> for (i in 1:5)(r[i]<-sum(A.matrix[i,]*R1)/sum(A.matrix[i,]))
> r
[1] 0.4337750 0.4440963 0.4158143 0.5930844 0.5640157
> d = rep(0,5)
> for(i in 1:5)(d[i]<-1/sqrt(3*rA[i]))
> d
[1] 0.04082483 0.05025189 0.08164966 0.04652421 0.04508348
> r-d
[1] 0.3929502 0.3938444 0.3341646 0.5465602 0.5189322
> r+d
[1] 0.4745998 0.4943482 0.4974639 0.6396086 0.6090992
> graph=matrix(c(r-d,r+d),byrow=F,ncol=2)
> graph
[,1] [,2]
[1,] 0.3929502 0.4745998
[2,] 0.3938444 0.4943482
[3,] 0.3341646 0.4974639
[4,] 0.5465602 0.6396086
[5,] 0.5189322 0.6090992
> plot(0,0,ylim=c(0,1),xlim=c(1,5))
> abline(h=0.5)
> for(i in 1:nrow(graph))
+ lines(c(i,i),graph[i,],lwd=1)