实用技巧——亚组分析表格快速绘制

Dr_long1996

已于 2024-01-07 23:04:37 修改

阅读量730

点赞数 11

分类专栏：实用R语言技能 R语言文章标签： r语言

于 2023-12-12 16:10:54 首次发布

本文链接：https://blog.csdn.net/dr_long1996/article/details/134950033

版权

R语言同时被 2 个专栏收录

16 篇文章 20 订阅

订阅专栏

实用R语言技能

2 篇文章 0 订阅

订阅专栏

本文详细介绍了在生存分析中如何处理连续变量（如年龄、Towesnd指数和久坐时间）为分类变量，使用因子化方法，以及如何通过Cox回归进行亚组分析，包括HR、95%CI和P值的计算，以及数据的整理和展示。

摘要由CSDN通过智能技术生成

可能不是最方便的方法，但是自己学习过程总结的。如果有更好的方法欢迎各位大佬们补充！！！

以生存分析的材料为例：

1. 变量处理

按照亚组分析的变量创建一个新的分组变量，将连续型变量按照事先设定的阈值调整为分类变量。

#将连续型变量分类处理
#年龄
mydata$Age.group <- ifelse(mydata$Age<=60,1,
                           ifelse(mydata$Age>60,2,NA))
mydata$Age.group <- factor(mydata$Age.group,levels = c(1,2),labels = c("<=60",">60"))
table(mydata$Age.group)
#Towsend指数
summary(mydata$Townsend_deprivation_index)
mydata$Townsend_deprivation_index.group <- ifelse(mydata$Townsend_deprivation_index<(-2.3),1,
                                                  ifelse(mydata$Townsend_deprivation_index>=(-2.3),2,NA))
mydata$Townsend_deprivation_index.group <- factor(mydata$Townsend_deprivation_index.group,levels = c(1,2),labels = c("low","high"))
table(mydata$Townsend_deprivation_index.group)

#久坐时间
summary(mydata$Sedentary_hours)
mydata$Sedentary_hours.group <- ifelse(mydata$Sedentary_hours<4.5,1,
                                       ifelse(mydata$Sedentary_hours>=4.5,2,NA))
mydata$Sedentary_hours.group <- factor(mydata$Sedentary_hours.group,levels = c(1,2),labels = c("low","high"))
table(mydata$Sedentary_hours.group)

例如，我将上述的变量，年龄，Towsend指数和久坐时间进行的分类处理，并对其进行了因子化

2. 产生新的数据框

赋值到一个新的数据框，便于保留原始数据，挑选出自己需要的列，简化数据表格

library(dplyr)
names(mydata)
df <- mydata%>%
  select(patientID,Age,Townsend_deprivation_index,activity,Sedentary_hours,Total_sugar,Energy,Fat,Fish,red_meat,
         vegefruit_sum,Insulin_user,Lipid_lowering_drugs_user,Aspirin_user,futime,fustat,SSBs_category,ASBs_category,PJs_category,
         Sex,Age.group,Ethnic,smoking_status,Alcohol_intake_frequency,BMI.group,Townsend_deprivation_index.group,
         Sedentary_hours.group,sleep_duration_group,physical_activity_group,diabetes,Antihypertensive_drugs_user)


str(df)

3. 亚组分析的表格绘制（重点）

a.收集HR，95%CI 和p value值

创建一个表头，用于收纳需要收集的数据，因为自变量有四个水平，以1作为对照，那么就有3个比较的水平，所以设置3个HR

a <- c("HR1","5%CI1","95%CI1","P1","HR2","5%CI2","95%CI2","P2","HR3","5%CI3","95%CI3","P3")

建立cox回归结果

a1<-summary(coxph(Surv(futime,fustat==1)~SSBs_category+Age+Ethnic+Townsend_deprivation_index+
                  smoking_status+Alcohol_intake_frequency+activity+Sedentary_hours+
                  Total_sugar+Energy+Fat+Fish+red_meat+vegefruit_sum+
                  Insulin_user+Antihypertensive_drugs_user+Lipid_lowering_drugs_user+Aspirin_user,
                subset =Sex =="Male",data=df))

利用森林图格式整理的方法提取cox回归中的HR，然后用rbind连接

a <- rbind(a,c(round(a1$coefficients[1,2],2),round(a1$conf.int[1,3],2),round(a1$conf.int[1,4],2),round(a1$coefficients[1,5],3),
               round(a1$coefficients[2,2],2),round(a1$conf.int[2,3],2),round(a1$conf.int[2,4],2),round(a1$coefficients[2,5],3),
               round(a1$coefficients[3,2],2),round(a1$conf.int[3,3],2),round(a1$conf.int[3,4],2),round(a1$coefficients[3,5],3)))

a1$con.int提取HR，95%CI， a1$coefficients 提取P value

再次重复上述过程！！！！！只需要复制粘贴，修改subset内容就行

a1 <-summary(coxph(Surv(futime,fustat==1)~SSBs_category+Age+Ethnic+Townsend_deprivation_index+
                     smoking_status+Alcohol_intake_frequency+activity+Sedentary_hours+
                     Total_sugar+Energy+Fat+Fish+red_meat+vegefruit_sum+
                     Insulin_user+Antihypertensive_drugs_user+Lipid_lowering_drugs_user+Aspirin_user,
                   subset =Sex =="Female",data=df))
a <- rbind(a,c(round(a1$coefficients[1,2],2),round(a1$conf.int[1,3],2),round(a1$conf.int[1,4],2),round(a1$coefficients[1,5],3),
               round(a1$coefficients[2,2],2),round(a1$conf.int[2,3],2),round(a1$conf.int[2,4],2),round(a1$coefficients[2,5],3),
               round(a1$coefficients[3,2],2),round(a1$conf.int[3,3],2),round(a1$conf.int[3,4],2),round(a1$coefficients[3,5],3)))

b.给数据设置行名

rownames(a) <- c("characteristic",
                 "male","female","<60 years",">=65 years",
                 "white","non-white","smoking_yes","smoking_no","<3 times/week",">=3times/week","<25kg/m2",">=25kg/m2","low","high",
                 "sedentary_low","sedentary_high","sleep_low","sleep_high","activity_low","activity_high",
                 "diabetes_yes","diabetes_no","hypertension_yes","hypertension_no")

a <- data.frame(a)#设置为数据框

c.统计各亚组的样本数n

详情可以参考实用技巧——绘制森林图-CSDN博客中的“总结每个变量的病例数”部分。

library(tableone)
names(df)
myVars <- c("Sex","Age.group","Ethnic","smoking_status","Alcohol_intake_frequency","BMI.group","Townsend_deprivation_index.group",
            "Sedentary_hours.group","sleep_duration_group","physical_activity_group","diabetes","Antihypertensive_drugs_user")
catVars <-c("Sex","Age.group","Ethnic","smoking_status","Alcohol_intake_frequency","BMI.group","Townsend_deprivation_index.group",
            "Sedentary_hours.group","sleep_duration_group","physical_activity_group","diabetes","Antihypertensive_drugs_user")
table1 <- print(CreateTableOne(vars=myVars,
                              data=df,
                              factorVars=catVars),
                catDigits=2,contDigits=2,showAllLevels=TRUE)
table1 <- data.frame(table1)
a$n <- table1$Overall

table1的样式，然后将值赋值给a

d.统计亚组中发生的事件数events

#亚组中发生的事件数
table2 <- print(CreateTableOne(vars=myVars,
                              data=df,
                              strata="fustat",
                              factorVars=catVars),
               catDigits=2,contDigits=2,showAllLevels=TRUE)
table2 <- data.frame(table2)
a$event <- table2[,3]

e.创建ref组和HR（95%CI）组合

#新建一列作为对照组
a$reference <- round(c(rep(1.00,25)),2)
a$HR.95CI1 <- paste0(a$X1,"(",a$X2,"-",a$X3,")");a
a$HR.95CI2 <- paste0(a$X5,"(",a$X6,"-",a$X7,")");a
a$HR.95CI3 <- paste0(a$X9,"(",a$X10,"-",a$X11,")");a

f.将为行名设置为第一列，设置行名

a <- tibble::rownames_to_column(a,var = "Characteristics")
ncol(a)
colnames(a) <- c("characteristic","HR1","5%CI1","95%CI1","P1","HR2","5%CI2","95%CI2","P2","HR3","5%CI3","95%CI3","P3",
                 "n","event","0/day","0-1/day","1-2/day",">2/day")
a[,1] <- c("characteristic","male","female","<60 years",">=65 years","white","non-white","yes","no","<3 times/week",">=3times/week",
                 "<25kg/m2",">=25kg/m2","low","high","low","high","<7 h/day","≥7 h/day","low","high","yes","no","yes","no")
a <- a[-1,]

g.每个亚组变量上设置一个空行，插入变量名称

a <- rbind(c("Sex",rep(NA,18)),
           a[c(1:2),],
           c("Age",rep(NA,18)),
           a[c(3:4),],
           c("Ethnicity",rep(NA,18)),
           a[c(5:6),],
           c("smoking status",rep(NA,18)),
           a[c(7:8),],
           c("alcohol intake",rep(NA,18)),
           a[c(9:10),],
           c("BMI",rep(NA,18)),
           a[c(11:12),],
           c("deprivation index",rep(NA,18)),
           a[c(13:14),],
           c("sedentary hours",rep(NA,18)),
           a[c(15:16),],
           c("Sleep duration",rep(NA,18)),
           a[c(17:18),],
           c("physical activity",rep(NA,18)),
           a[c(19:20),],
           c("Diabetes",rep(NA,18)),
           a[c(21:22),],
           c("hypertension",rep(NA,18)),
           a[c(23:24),])
subgroup1 <-a