Cox回归全称Cox比例风险模型,主要用于带有时间的生存结局的影响因素研究,或评价某个临床治疗措施对患者生存的影响,是流行性学研究中常用的统计分析方法,适用于队列数据的分析。随着近些年大量队列研究的开展,Cox回归应用越来越多,今天给大家介绍一下Cox回归的R语言实现的方法。
- 加载需要的程序包
library(pacman)
pacman::p_load(openxlsx,compareGroups,tidyverse,survival,plyr,survminer,Ipaper,dplyr,irr)
library(car)
- 读取数据,将数据整理成合适的形式,为了剔除缺失数据,提取得到参与了开始调研及随访期调查的样本尤为重要。
# loadig data,一次读取多个sheet
name_sheet = 1:3
re2 = map(name_sheet, ~ read.xlsx("~file",sheet=.))
re2
# # write ,一次多个sheet
# write.xlsx(re2,"file.xlsx")
#按照"档案编号"去除当年重复的数据
data1 <- re2[[1]] %>% distinct(档案编号, .keep_all = T)
data2 <- re2[[2]] %>% distinct(档案编号, .keep_all = T)
data3 <- re2[[3]] %>% distinct(档案编号, .keep_all = T)
# 列出数据框
data_list <- list(data1,data2,data3)
#tidyverse 进行多数据集数据合并
data <- data_list %>% reduce(inner_join, by = "档案编号")
# 保存合并的数据
write.xlsx(data,...)
- 进行Cox回归分析
进行分析前,将数据转换为 numeric类型,接下来进行 cox单因素分析
basur <- Surv(time = Coxdata$time,event = Coxdata$肾功能损伤 == 1)
Coxdata$basur <- with(Coxdata,basur)
Gcox <- coxph(basur~性别,data = Coxdata)
Gsum <- summary(Gcox)
HR <- round(Gsum$coefficients[,2],2)
Pvalue <- round(Gsum$coefficients[,5],3)
CI <- paste0(round(Gsum$conf.int[,3:4],2),collapse = '-')
UNIcox <- data.frame('Characteristics' = '性别',
'Harzard Ratio' = HR,
'CI95' = CI,
'P value' = Pvalue)
# 如果使用循环,查看R 内存
# memory.limit()
# 制做自己的函数,适用于二分类及连续变量
UNIcox <- function(x){
fml <- as.formula(paste0('basur~',x))
Gcox <- coxph(fml,data = Coxdata)
Gsum <- summary(Gcox)
HR <- round(Gsum$coefficients[,2],2)
Pvalue <- round(Gsum$coefficients[,5],3)
CI <- paste0(round(Gsum$conf.int[,3:4],2),collapse = '-')
UNIcox <- data.frame('Characteristics' = x,
'Harzard Ratio' = HR,
'CI95' = CI,
'P value' = Pvalue)
return(UNIcox)
}
UNIcox('性别')
colnames(Coxdata)
Varnames <- colnames(Coxdata)[c(2:16,20:30,40:42)]
Varnames
Univar <- lapply(Varnames,UNIcox)
Univar
Univar <- bind_rows(Univar)
# Univar <- ldply(Univar, data.frame)
Univar
write.xlsx(Univar,"dir.xlsx",
colNames = T, rowNames = F, showNA = T)
Univar$Characteristics[Univar$P.value <0.05]
## multivariable analysis
FML <- as.formula(paste0('basur~',paste0(Univar$Characteristics[Univar$P.value <0.05],collapse = '+')))
FML
Multicox <- coxph(FML,data = Coxdata)
## ph assumption
test.ph <- cox.zph(Multicox)
test.ph
## 可视化PH假定结果
ggcoxzph(test.ph)
## 输入显著变量
Multicox <- coxph(basur~x1+x2+x3+x4
,data = Coxdata)
lm.reg<-lm(y~x1+x2+x3+...
,data = Coxdata)
vif(lm.reg)
# Multiname <- c(Univar$Characteristics[Univar$P.value <0.05])
MultiSum <- summary(Multicox)
MHR <- round(MultiSum$coefficients[,2],2)
MPvalue <- round(MultiSum$coefficients[,5],3)
MCIL <- round(MultiSum$conf.int[,3],2)
MCIU <- round(MultiSum$conf.int[,4],2)
MCI <- paste0(MCIL,'-',MCIU)
MULcox <- data.frame(
'Harzard Ratio' = MHR,
'CI95' = MCI,
'P value' = MPvalue)
MULcox
write.xlsx(MULcox,"file.xlsx",
colNames = T, rowNames = T, showNA = T)
# 整合表格
finalTab <- merge.data.frame(Univar,MULcox,by = 'Characteristics', all = T,sort = TRUE)
write.xlsx(finalTab,"file.xlsx", colNames = T, rowNames = F, showNA = T)