为了更好全面的展示分析步骤,下面展示的是R全代码和分析的具体顺序步骤(五个例子)
要是有困惑的,可以把代码输入Chatgpt查看详细解析:【资源共享】分享3个免费ChatGPT国内AI软件,请及时收藏!-CSDN博客
#1, data0306-deer 鹿的前后腿长,采用合适的统计方法(说明选用依据),检验前后腿长有无差异?后腿是否比前腿长?
#方法选择:因为n1,n2都小于12,选择wilcox符号秩检验
library(haven) # haven包读取sav格式文件
data1 <- read_sav("D:\\Datum\\生物统计\\data\\data3/data0306 deer.sav")
data1# A tibble: 10 × 3 Deer Hindleg Foreleg <dbl> <dbl> <dbl> 1 1 142 138 2 2 140 136 3 3 144 147 4 4 144 139 5 5 142 143 6 6 146 141 7 7 149 143 8 8 150 145 9 9 142 136 10 10 148 146wilcox.test(data1$Hindleg,data1$Foreleg,paired = T)
##输出结果:
#Wilcoxon signed rank test with continuity correction
#data: data1$Hindleg and data1$Foreleg
#V = 51, p-value = 0.01859
#alternative hypothesis: true location shift is not equal to 0
#
#Warning message: In wilcox.test.default(data1$Hindleg, data1$Foreleg, paired = T) :无法精確計算带连结的p值7
##结论
#p-value = 0.01859,小于0.05,前后腿长有明显差异
wilcox.test(data1$Hindleg,data1$Foreleg,paired = T,alternative = "greater")
##输出结果:V = 51, p-value = 0.009297
##结论:后腿比前腿长
#2, data0307 excersize 是成年人经过一段体育锻炼前后的体重,采用合适的统计方法(说明选用依据),检验锻炼前后体重有无差异?按照常理,经过体育锻炼后,体重会有所下降,采用合适的统计方法(说明选用依据),检验该数据是否支持这一说法?
#方法选择:因为n1,n2都大于30,进行t检验
library(haven) # haven包读取sav格式文件
data2 <- read_sav("D:\\Datum\\生物统计\\data\\data3/data0307 exercise.sav")
data2# A tibble: 33 × 3 ID Pre Post <dbl> <dbl> <dbl> 1 1 165 163 2 2 180 179 3 3 175 180 4 4 160 161 5 5 185 170 6 6 177 170 7 7 190 185 8 8 200 192 9 9 195 190 10 10 198 180 # ℹ 23 more rows # ℹ Use `print(n = ...)` to see more rows
var.test(data2$Pre,data2$Post) #F分布比较方差
##输出:
#data: data2$Pre and data2$Post
#F = 2.0483, num df = 32, denom df = 32, p-value = 0.04643
#alternative hypothesis: true ratio of variances is not equal to 1
#95 percent confidence interval:
# 1.011648 4.147360
#sample estimates:
# ratio of variances
#2.048333
t.test(data2$Pre, data2$Post, var.equal=TRUE)
##输出:
#t = 2.7245, df = 64, p-value = 0.008293
#alternative hypothesis: true difference in means is not equal to 0
#95 percent confidence interval:
# 1.560182 10.136787
#sample estimates:
# mean of x mean of y
#186.3636 180.5152##结论
#p-value = 0.008293,小于0.05,锻炼前后体重有差异t.test(data2$Pre, data2$Post, var.equal=TRUE,alternative = "greater")
##输出结果:
#data: data2$Pre and data2$Post
#t = 2.7245, df = 64, p-value = 0.004146
#alternative hypothesis: true difference in means is greater than 0
#95 percent confidence interval:
# 2.265802 Inf
#sample estimates:
# mean of x mean of y
#186.3636 180.5152##结论
#p-value = 0.004146,该数据是否支持“经过体育锻炼后,体重会有所下降,”这一说法。
#3, data0308 fiber 是某灵长类物种采食和不采食植物的叶片纤维素干重比例,采用合适的统计方法(说明选用依据),检验食物和非食物的干重比例有无差异?采用合适的统计方法(说明选用依据),检验食物的干重比例是否小于非食物?
#方法选择:n1=35,n2=15,验证两组独立数据发现:每组的数据服从正态分布,所以采用t测验
library(haven) # haven包读取sav格式文件
data3 <- read_sav("D:\\Datum\\生物统计\\data\\data3/data0308 fiber.sav")
data3# A tibble: 50 × 3 Item Food Fiber <dbl> <dbl+lbl> <dbl> 1 1 0 [Nonfood] 17.8 2 2 0 [Nonfood] 16.8 3 3 0 [Nonfood] 13.2 4 4 1 [Food] 12.9 5 5 0 [Nonfood] 18.6 6 6 0 [Nonfood] 11.8 7 7 0 [Nonfood] 17.1 8 8 0 [Nonfood] 10.2 9 9 0 [Nonfood] 22.3 10 10 0 [Nonfood] 12.2 # ℹ 40 more rows # ℹ Use `print(n = ...)` to see more rowsdata30<-data3[data3$Food==0,] #该灵长类物种不采食植物的叶片纤维素干重比例
data30# A tibble: 35 × 3 Item Food Fiber <dbl> <dbl+lbl> <dbl> 1 1 0 [Nonfood] 17.8 2 2 0 [Nonfood] 16.8 3 3 0 [Nonfood] 13.2 4 5 0 [Nonfood] 18.6 5 6 0 [Nonfood] 11.8 6 7 0 [Nonfood] 17.1 7 8 0 [Nonfood] 10.2 8 9 0 [Nonfood] 22.3 9 10 0 [Nonfood] 12.2 10 11 0 [Nonfood] 12.5 # ℹ 25 more rows # ℹ Use `print(n = ...)` to see more rowsdata31<-data3[data3$Food==1,] #该灵长类物种采食植物的叶片纤维素干重比例
data31# A tibble: 15 × 3 Item Food Fiber <dbl> <dbl+lbl> <dbl> 1 4 1 [Food] 12.9 2 12 1 [Food] 12.0 3 13 1 [Food] 5.05 4 22 1 [Food] 10.6 5 25 1 [Food] 10 6 26 1 [Food] 17.3 7 27 1 [Food] 14.7 8 29 1 [Food] 11.3 9 35 1 [Food] 7.8 10 36 1 [Food] 10.5 11 37 1 [Food] 10.6 12 39 1 [Food] 7.22 13 41 1 [Food] 15.9 14 47 1 [Food] 19.0 15 50 1 [Food] 13.6#检验两组中每组的数据是否服从正态分布
shapiro.test(data30$Fiber)
##输出结果:W = 0.95411, p-value = 0.1515
##结论:p-value = 0.1515大于0.05,该灵长类物种不采食植物的叶片纤维素干重比例符合正态分布
shapiro.test(data31$Fiber)
##输出结果:W = 0.98371, p-value = 0.9887
##结论:p-value = 0.9887大于0.05,该灵长类物种采食植物的叶片纤维素干重比例符合正态分布#检验这两个总体是否符合方差齐性
var.test(data30$Fiber,data31$Fiber)
##输出结果:F = 1.922, num df = 34, denom df = 14, p-value = 0.1919
##结论: p-value = 0.1919大于0.05,符合方差齐性,于是用t测验t.test(data30$Fiber,data31$Fiber,var.equal = T)
##输出结果:t = 2.5601, df = 48, p-value = 0.01367
##结论:p-value = 0.01367小于0.05,该灵长类物种食物和非食物的纤维素干重比例有差异
t.test(data31$Fiber,data30$Fiber,var.equal = T,alternative = "less")
##输出结果:t = 2.5601, df = 48, p-value = 0.006834
##结论:p-value = 0.006834小于0.05,该灵长类物种食物纤维素干重比例不比非食物小
#4, data0309 protein 是该灵长类物种采食和不采食植物的叶片蛋白质干重比例,采用合适的统计方法(说明选用依据),检验食物和非食物的干重比例有无差异?采用合适的统计方法(说明选用依据),检验食物的干重比例是否大于非食物?
##方法选择:n1=35,n2=15,验证两组数据,但是有一组的数据不服从正态分布,所以采用Wilcox符号秩检验library(haven) # haven包读取sav格式文件
data4 <- read_sav("D:\\Datum\\生物统计\\data\\data3/data0309 protein.sav")
data4# A tibble: 50 × 3 Item Food Protein <dbl> <dbl+lbl> <dbl> 1 1 0 [Nonfood] 15.7 2 2 0 [Nonfood] 13.7 3 3 0 [Nonfood] 18.8 4 4 1 [Food] 25.6 5 5 0 [Nonfood] 13.9 6 6 0 [Nonfood] 16.2 7 7 0 [Nonfood] 11.8 8 8 0 [Nonfood] 16.1 9 9 0 [Nonfood] 12.5 10 10 0 [Nonfood] 11.5 # ℹ 40 more rows # ℹ Use `print(n = ...)` to see more rowsdata40<-data4[data4$Food==0,] #该灵长类物种不采食植物的叶片蛋白质干重比例
data40# A tibble: 35 × 3 Item Food Protein <dbl> <dbl+lbl> <dbl> 1 1 0 [Nonfood] 15.7 2 2 0 [Nonfood] 13.7 3 3 0 [Nonfood] 18.8 4 5 0 [Nonfood] 13.9 5 6 0 [Nonfood] 16.2 6 7 0 [Nonfood] 11.8 7 8 0 [Nonfood] 16.1 8 9 0 [Nonfood] 12.5 9 10 0 [Nonfood] 11.5 10 11 0 [Nonfood] 11.8 # ℹ 25 more rows # ℹ Use `print(n = ...)` to see more rowsdata41<-data4[data4$Food==1,] #该灵长类物种采食植物的叶片蛋白质干重比例
data41# A tibble: 15 × 3 Item Food Protein <dbl> <dbl+lbl> <dbl> 1 4 1 [Food] 25.6 2 12 1 [Food] 11.1 3 13 1 [Food] 10.3 4 22 1 [Food] 18.8 5 25 1 [Food] 12.8 6 26 1 [Food] 13.0 7 27 1 [Food] 15.0 8 29 1 [Food] 16.2 9 35 1 [Food] 6.45 10 36 1 [Food] 12.3 11 37 1 [Food] 23.5 12 39 1 [Food] 11.6 13 41 1 [Food] 11.1 14 47 1 [Food] 16.9 15 50 1 [Food] 21#检验两组中每组的数据是否服从正态分布
shapiro.test(data40$Protein)
##输出结果:W = 0.82641, p-value = 7.027e-05
##结论:p-value = 7.027e-05小于0.05,该灵长类物种不采食植物的叶片纤维素干重比例符合不正态分布
shapiro.test(data41$Protein)
##输出结果:W = 0.94579, p-value = 0.4607
##结论:p-value = 0.4607小于0.05,该灵长类物种采食植物的叶片纤维素干重比例不符合正态分布
wilcox.test(data40$Protein,data41$Protein)
##输出结果:W = 258.5, p-value = 0.9409
##结论:p-value = 0.9409大于0.05,该灵长类物种食物和非食物的纤维素干重比例无差异
wilcox.test(data41$Protein,data40$Protein,alternative = "greater")
##输出结果:W = 266.5, p-value = 0.4705
##结论:p-value = 0.4705大于0.05,该灵长类物种食物蛋白质干重比例不比非食物大
#5, 一般情况下,灵长类喜欢吃蛋白质/纤维素比例(ratio)更高的食物,基于data0308 fiber 和 data0309 protein,采用合适的统计方法(说明选用依据),检验该物种是否符合这种情况?(提示:需合并数据)
#方法选择:n1=35,n2=15,验证两组数据,发现其中一组的数据不服从正态分布,所以采用Wilcox符号秩检验
data51 <- data41$Protein/data31$Fiber #蛋白质/纤维素比例的食物数据
data51[1] 1.9906832 0.9226933 2.0316832 1.7640977 1.2750000 0.7474048 1.0190606 1.4335106 0.8269231 1.1755725 2.2124060 [12] 1.6052632 0.7011349 0.8869611 1.5407190 attr(,"label") [1] "percent dry matter" attr(,"format.spss") [1] "F8.2"data50 <- data40$Protein/data30$Fiber #蛋白质/纤维素比例的非食物数据
data50[1] 0.8830146 0.8159619 1.4248862 0.7467742 1.3703390 0.6883421 1.5743640 0.5596948 0.9378577 0.9440448 0.9754702 [12] 1.4577320 1.0391960 0.6523785 0.3996877 0.4640403 0.5574371 1.1700581 2.8302083 0.7869023 2.3851852 1.5460340 [23] 0.4154443 0.3163403 1.5139296 0.8476421 1.1578378 0.6464411 0.7935294 0.6789906 2.7776946 1.8446995 0.9605688 [34] 3.7619632 0.6978541 attr(,"label") [1] "percent dry matter" attr(,"format.spss") [1] "F8.2"#检验两组中每组的数据是否服从正态分布
shapiro.test(data51)
##输出结果:W = 0.93386, p-value = 0.3113
##结论:p-value = 0.3113大于0.05,蛋白质/纤维素比例的食物数据符合正态分布
shapiro.test(data50)
##输出结果:W = 0.8164, p-value = 4.349e-05
##结论:p-value = 0.4607小于0.05,蛋白质/纤维素比例的非食物数据不符合正态分布
wilcox.test(data51,data50,alternative = "greater")
##输出结果:W = 177, p-value = 0.03584
##结论:p-value = 0.03584小于0.05,所以灵长类喜欢吃蛋白质/纤维素比例(ratio)更高的食物,