rm(list=ls())
setwd("D:/R/R语言学习/20220610分类变量的列联表和独立性检验")
getwd()
library(MASS)
data("birthwt",package = "MASS")#加载数据
str(birthwt)
library(dplyr)
#分类变量因子化
birthwt<-birthwt%>%mutate(low=factor(low,labels = c("no","yes")),
race=factor(race,labels = c("white","black","other")),
smoke=factor(smoke,labels = c("no","yes")),
ht=factor(ht,labels = c("no","yes")),
ui=factor(ui,labels = c("no","yes")))
str(birthwt)
#----------------------------------一维列联表-----------------------------------
mytable<-table(birthwt$low)#绝对数
mytable
prop.table(mytable)
round(prop.table(mytable)*100,1)#百分比,保留一位小数
library(epiDisplay)
tab1(birthwt$low)
tab1(birthwt$low,graph = FALSE)#不显示条形图
tab1(birthwt$age)
#------------------------------二维列联表-------------------------------------
mytable<-table(birthwt$smoke,birthwt$low)#table(行,列)
mytable
addmargins(mytable)#生成边际频数(行列汇总)
prop.table(mytable)#各单元占总体百分比
prop.table(mytable,margin = 1)#设置按行求比例,各单元占行百分比
prop.table(mytable,margin = 2)#设置按列求比例,各单元占列百分比
#xtabs函数结果同table()函数一致
xtabs(~smoke+low,data = birthwt)
addmargins(xtabs(~smoke+low,data = birthwt))#生成边际频数
#CrossTable()函数
library(gmodels)
CrossTable(birthwt$smoke,birthwt$low)
#epiDisplay的tabpct函数可以同时输出频数和百分比
tabpct(birthwt$smoke,birthwt$low)
#--------------------------------多维列联表------------------------------------
mytable<-table(birthwt$smoke,birthwt$low,birthwt$race)
xtabs(~smoke+low+race,data = birthwt) #xtabs函数也可以生成多维列联表
margin.table(mytable,3)
margin.table(mytable,c(1,3))
addmargins(mytable)
prop.table(mytable,c(1,3))
ftable(mytable)#将三维列联表转换为一种紧凑的格式输出
#--------------------------------独立性检验------------------------------------
#卡方独立性检验
mytable<-table(birthwt$smoke,birthwt$low)
mytable
chisq.test(mytable)
chisq.test(mytable)$expected#求各单元理论频数
chisq.test(mytable,correct = FALSE)
#Fisher精确概率检验
fisher.test(mytable)
#配对卡方
my.matrix<-matrix(c(11,2,12,33),nrow = 2)
mcnemar.test(my.matrix)
#相对危险度与优势比
library(epiDisplay)
cs(birthwt$low,birthwt$smoke)
cc(birthwt$low,birthwt$smoke)
mytable<-table(birthwt$low,birthwt$smoke)
cc(cctable = mytable)
#Cochram-Mantel-Haenszel 卡方检验(探索变量间混杂因素)
mytable<-table(birthwt$low,birthwt$smoke,birthwt$race)
mytable
mantelhaen.test(mytable)
mantelhaen.test(mytable,correct = FALSE)#声明不进行连续校正
mhor(mhtable = mytable)
mhor(birthwt$low,birthwt$smoke,birthwt$race)