实用技巧——亚组分析表格快速绘制

可能不是最方便的方法,但是自己学习过程总结的。如果有更好的方法欢迎各位大佬们补充!!!

以生存分析的材料为例:

1. 变量处理

按照亚组分析的变量创建一个新的分组变量,将连续型变量按照事先设定的阈值调整为分类变量。

#将连续型变量分类处理
#年龄
mydata$Age.group <- ifelse(mydata$Age<=60,1,
                           ifelse(mydata$Age>60,2,NA))
mydata$Age.group <- factor(mydata$Age.group,levels = c(1,2),labels = c("<=60",">60"))
table(mydata$Age.group)
#Towsend指数
summary(mydata$Townsend_deprivation_index)
mydata$Townsend_deprivation_index.group <- ifelse(mydata$Townsend_deprivation_index<(-2.3),1,
                                                  ifelse(mydata$Townsend_deprivation_index>=(-2.3),2,NA))
mydata$Townsend_deprivation_index.group <- factor(mydata$Townsend_deprivation_index.group,levels = c(1,2),labels = c("low","high"))
table(mydata$Townsend_deprivation_index.group)

#久坐时间
summary(mydata$Sedentary_hours)
mydata$Sedentary_hours.group <- ifelse(mydata$Sedentary_hours<4.5,1,
                                       ifelse(mydata$Sedentary_hours>=4.5,2,NA))
mydata$Sedentary_hours.group <- factor(mydata$Sedentary_hours.group,levels = c(1,2),labels = c("low","high"))
table(mydata$Sedentary_hours.group)

例如,我将上述的变量,年龄,Towsend指数和久坐时间进行的分类处理,并对其进行了因子化

2. 产生新的数据框

赋值到一个新的数据框,便于保留原始数据,挑选出自己需要的列,简化数据表格

library(dplyr)
names(mydata)
df <- mydata%>%
  select(patientID,Age,Townsend_deprivation_index,activity,Sedentary_hours,Total_sugar,Energy,Fat,Fish,red_meat,
         vegefruit_sum,Insulin_user,Lipid_lowering_drugs_user,Aspirin_user,futime,fustat,SSBs_category,ASBs_category,PJs_category,
         Sex,Age.group,Ethnic,smoking_status,Alcohol_intake_frequency,BMI.group,Townsend_deprivation_index.group,
         Sedentary_hours.group,sleep_duration_group,physical_activity_group,diabetes,Antihypertensive_drugs_user)


str(df)

3. 亚组分析的表格绘制(重点)

a.收集HR,95%CI 和p value值

创建一个表头,用于收纳需要收集的数据,因为自变量有四个水平,以1作为对照,那么就有3个比较的水平,所以设置3个HR

a <- c("HR1","5%CI1","95%CI1","P1","HR2","5%CI2","95%CI2","P2","HR3","5%CI3","95%CI3","P3")

建立cox回归结果

a1<-summary(coxph(Surv(futime,fustat==1)~SSBs_category+Age+Ethnic+Townsend_deprivation_index+
                  smoking_status+Alcohol_intake_frequency+activity+Sedentary_hours+
                  Total_sugar+Energy+Fat+Fish+red_meat+vegefruit_sum+
                  Insulin_user+Antihypertensive_drugs_user+Lipid_lowering_drugs_user+Aspirin_user,
                subset =Sex =="Male",data=df))

利用森林图格式整理的方法提取cox回归中的HR,然后用rbind连接

a <- rbind(a,c(round(a1$coefficients[1,2],2),round(a1$conf.int[1,3],2),round(a1$conf.int[1,4],2),round(a1$coefficients[1,5],3),
               round(a1$coefficients[2,2],2),round(a1$conf.int[2,3],2),round(a1$conf.int[2,4],2),round(a1$coefficients[2,5],3),
               round(a1$coefficients[3,2],2),round(a1$conf.int[3,3],2),round(a1$conf.int[3,4],2),round(a1$coefficients[3,5],3)))

a1$con.int提取HR,95%CI, a1$coefficients 提取P value

再次重复上述过程!!!!!只需要复制粘贴,修改subset内容就行

a1 <-summary(coxph(Surv(futime,fustat==1)~SSBs_category+Age+Ethnic+Townsend_deprivation_index+
                     smoking_status+Alcohol_intake_frequency+activity+Sedentary_hours+
                     Total_sugar+Energy+Fat+Fish+red_meat+vegefruit_sum+
                     Insulin_user+Antihypertensive_drugs_user+Lipid_lowering_drugs_user+Aspirin_user,
                   subset =Sex =="Female",data=df))
a <- rbind(a,c(round(a1$coefficients[1,2],2),round(a1$conf.int[1,3],2),round(a1$conf.int[1,4],2),round(a1$coefficients[1,5],3),
               round(a1$coefficients[2,2],2),round(a1$conf.int[2,3],2),round(a1$conf.int[2,4],2),round(a1$coefficients[2,5],3),
               round(a1$coefficients[3,2],2),round(a1$conf.int[3,3],2),round(a1$conf.int[3,4],2),round(a1$coefficients[3,5],3)))

b.给数据设置行名

rownames(a) <- c("characteristic",
                 "male","female","<60 years",">=65 years",
                 "white","non-white","smoking_yes","smoking_no","<3 times/week",">=3times/week","<25kg/m2",">=25kg/m2","low","high",
                 "sedentary_low","sedentary_high","sleep_low","sleep_high","activity_low","activity_high",
                 "diabetes_yes","diabetes_no","hypertension_yes","hypertension_no")

a <- data.frame(a)#设置为数据框

c.统计各亚组的样本数n

详情可以参考实用技巧——绘制森林图-CSDN博客中的“总结每个变量的病例数”部分。

library(tableone)
names(df)
myVars <- c("Sex","Age.group","Ethnic","smoking_status","Alcohol_intake_frequency","BMI.group","Townsend_deprivation_index.group",
            "Sedentary_hours.group","sleep_duration_group","physical_activity_group","diabetes","Antihypertensive_drugs_user")
catVars <-c("Sex","Age.group","Ethnic","smoking_status","Alcohol_intake_frequency","BMI.group","Townsend_deprivation_index.group",
            "Sedentary_hours.group","sleep_duration_group","physical_activity_group","diabetes","Antihypertensive_drugs_user")
table1 <- print(CreateTableOne(vars=myVars,
                              data=df,
                              factorVars=catVars),
                catDigits=2,contDigits=2,showAllLevels=TRUE)
table1 <- data.frame(table1)
a$n <- table1$Overall

table1的样式,然后将值赋值给a

d.统计亚组中发生的事件数events

#亚组中发生的事件数
table2 <- print(CreateTableOne(vars=myVars,
                              data=df,
                              strata="fustat",
                              factorVars=catVars),
               catDigits=2,contDigits=2,showAllLevels=TRUE)
table2 <- data.frame(table2)
a$event <- table2[,3]

e.创建ref组和HR(95%CI)组合

#新建一列作为对照组
a$reference <- round(c(rep(1.00,25)),2)
a$HR.95CI1 <- paste0(a$X1,"(",a$X2,"-",a$X3,")");a
a$HR.95CI2 <- paste0(a$X5,"(",a$X6,"-",a$X7,")");a
a$HR.95CI3 <- paste0(a$X9,"(",a$X10,"-",a$X11,")");a

f.将为行名设置为第一列,设置行名

a <- tibble::rownames_to_column(a,var = "Characteristics")
ncol(a)
colnames(a) <- c("characteristic","HR1","5%CI1","95%CI1","P1","HR2","5%CI2","95%CI2","P2","HR3","5%CI3","95%CI3","P3",
                 "n","event","0/day","0-1/day","1-2/day",">2/day")
a[,1] <- c("characteristic","male","female","<60 years",">=65 years","white","non-white","yes","no","<3 times/week",">=3times/week",
                 "<25kg/m2",">=25kg/m2","low","high","low","high","<7 h/day","≥7 h/day","low","high","yes","no","yes","no")
a <- a[-1,]

g.每个亚组变量上设置一个空行,插入变量名称

a <- rbind(c("Sex",rep(NA,18)),
           a[c(1:2),],
           c("Age",rep(NA,18)),
           a[c(3:4),],
           c("Ethnicity",rep(NA,18)),
           a[c(5:6),],
           c("smoking status",rep(NA,18)),
           a[c(7:8),],
           c("alcohol intake",rep(NA,18)),
           a[c(9:10),],
           c("BMI",rep(NA,18)),
           a[c(11:12),],
           c("deprivation index",rep(NA,18)),
           a[c(13:14),],
           c("sedentary hours",rep(NA,18)),
           a[c(15:16),],
           c("Sleep duration",rep(NA,18)),
           a[c(17:18),],
           c("physical activity",rep(NA,18)),
           a[c(19:20),],
           c("Diabetes",rep(NA,18)),
           a[c(21:22),],
           c("hypertension",rep(NA,18)),
           a[c(23:24),])
subgroup1 <-a

  • 11
    点赞
  • 9
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值