title: “R Notebook”
output:
html_document:
df_print: paged
参考文献
- https://cran.r-project.org/web/packages/table1/vignettes/table1-examples.html
- https://stackoverflow.com/questions/54945344/p-value-column-in-table1-using-table1-function
生成普通的表格
suppressMessages(library(table1))
suppressMessages(library(boot))
data(melanoma)
melanoma2 <- melanoma
# Factor the basic variables that
# we're interested in
# 数据集基本信息
# sex 0,1
# age 连续变量
# ulcer 0,1
# status 1,2,3
# thickness 连续变量
melanoma2$status <-
factor(melanoma2$status,
levels=c(2,1,3),#
labels=c("Alive", # Reference,这里levels和labels是对应的关系
"Melanoma death",
"Non-melanoma death"))
p1=table1(~ factor(sex) + age + factor(ulcer) + thickness | status, data=melanoma2)
p1
# 验证结果
#table(melanoma$sex,melanoma$status)
#mean(melanoma$age[melanoma$status==2])
#sd(melanoma$age[melanoma$status==2])
#min(melanoma$age[melanoma$status==2])
#max(melanoma$age[melanoma$status==2])
#median(melanoma$age[melanoma$status==2])
生成p-value列
library(MatchIt)
data(lalonde)
lalonde$treat <- factor(lalonde$treat, levels=c(0, 1), labels=c("Control", "Treatment"))
lalonde$married <- as.logical(lalonde$married == 1)
lalonde$nodegree <- as.logical(lalonde$nodegree == 1)
lalonde$race <- factor(lalonde$race, levels=c("white", "black", "hispan"),
labels=c("White", "Black", "Hispanic"))
label(lalonde$race) <- "Race"
label(lalonde$married) <- "Married"
label(lalonde$nodegree) <- "No high school diploma"
label(lalonde$age) <- "Age"
label(lalonde$re74) <- "1974 Income"
label(lalonde$re75) <- "1975 Income"
label(lalonde$re78) <- "1978 Income"
units(lalonde$age) <- "years"
# eqcut函数将连续变量划分成x/n分位数,每个组的数目是相同的。
lalonde$age.quartiles <- eqcut(lalonde$age, 4, varlabel="Age")
lalonde$age.quartiles <- factor(lalonde$age.quartiles,
levels=c(levels(lalonde$age.quartiles), "P-value"))
#[1] "1st quartile of Age (years): [16.0,20.0)"
#[2] "2nd quartile of Age (years): [20.0,25.0)"
#[3] "3rd quartile of Age (years): [25.0,32.0)"
#[4] "4th quartile of Age (years): [32.0,55.0]"
#[5] "P-value"
# 这里函数rndr需要注意的是,lalonde$age.quartiles是一个全局变量
# 所以他并不是对所有的测试情况都使用,每次对于新的数据集都要修改这个因变量
rndr <- function(x, name, ...) {
if (length(x) == 0) {
y <- lalonde[[name]]
s <- rep("", length(render.default(x=y, name=name, ...)))
if (is.numeric(y)) {# 这里用的是kruskal.test检验,判断多个分布是否是相同的分布
p <- kruskal.test(y ~ lalonde$age.quartiles)$p.value
} else {
p <- chisq.test(table(y, droplevels(lalonde$age.quartiles)))$p.value
}
s[2] <- sub("<", "<", format.pval(p, digits=3, eps=0.001))
s
} else {
render.default(x=x, name=name, ...)
}
}
rndr.strat <- function(label, n, ...) {
ifelse(n==0, label, render.strat.default(label, n, ...))
}
table1(~married + nodegree + re74 + re75 + re78 | age.quartiles,
data=lalonde, droplevels=F, render=rndr, render.strat=rndr.strat, overall=F)
# 注意这个警告,并没有什么问题
测试3
rm(list=ls())
data(mtcars)
library(table1)
#mpg 连续变量
#cyl 分类变量
#disp 连续变量
#hp 连续变量
#vs am gear 这些是分类变量
test_df=mtcars
test_df$cyl=as.factor(test_df$cyl)
test_df$cyl <- factor(test_df$cyl,levels=c(levels(test_df$cyl), "P-value"))
test_df$vs=factor(test_df$vs)
test_df$am=factor(test_df$am)
test_df$gear=factor(test_df$gear)
label(test_df$cyl) <- "CYL"
rndr <- function(x, name, ...) {
#browser()
if (length(x) == 0) {
y <- test_df[[name]]
s <- rep("", length(render.default(x=y, name=name, ...)))
if (is.numeric(y)) {# 这里连续变量用的是t检验
x0=t.test(y ~ test_df$cyl)
p <- x0$p.value
stat <- x0$statistic
} else {# 如果是分类变量,用的是卡方检验
x0=chisq.test(table(y, droplevels(test_df$cyl)),simulate.p.value=TRUE)
p <- x0$p.value
stat <- x0$statistic
}
pp1=format.pval(p, digits=3, eps=0.001)
pp2=round(stat,3)
s[2] <- sub("<", "<", paste0(pp1," (",pp2,")"))
s
} else {
render.default(x=x, name=name, ...)
}
}
rndr.strat <- function(label, n, ...) {
ifelse(n==0, label, render.strat.default(label, n, ...))
}
table1(~vs + am+gear | cyl,data=test_df, droplevels=F, render=rndr, render.strat=rndr.strat, overall=F)
kruskal.test
# https://max.book118.com/html/2017/1230/146500674.shtm
food=data.frame(x=c(164,190,203,205,206,214,228,257,185,197,201,231,187,212,215,220,248,265,281,202,
204,207,227,230,276),g=factor(rep(1:4,c(8,4,7,6))))
print(kruskal.test(x~g,data=food))