ROC曲线

rm(list=ls())
library(dplyr)
library(survivalROC)
library(rbsurv)
library(parallel)
#detectCores()检查当前电脑可用核数
cl.cores <- detectCores()
cl <- makeCluster(24)
#多核操作
registerDoParallel(cl)
#cox_hcc_methy是行名样本,列名为时间,生存状态,和甲基化表达的数据框
dim(cox_hcc_methy)
#生成随机数
set.seed(2018)
c <- sample(1:365,183)
length(c)
#训练组
cox_hcc_methy_training <- cox_hcc_methy[c,]
dim(cox_hcc_methy_training)

#validate组
cox_hcc_methy_validate <- cox_hcc_methy[-c,]
dim(cox_hcc_methy_validate)

#训练组rbsurv
x <- t(as.matrix(cox_hcc_methy_training[,3:74]))
View(x)
time <- cox_hcc_methy_training$Time
status <- cox_hcc_methy_training$Status

fit <- rbsurv(time=time, status=status, x=x,  
               method="efron", max.n.genes=40, n.iter=100, n.fold=3, n.seq=1)
#获取被选中基因的表达矩阵
nData=x[as.numeric(fit$gene.list),]
row.names(nData)
#保存数据
save.image(file = "rbsurv.RData")
getwd()


fit1 <- rbsurv(time=time, status=status, x=x,  
              method="efron", max.n.genes=10, n.iter=100, n.fold=3, n.seq=1)
nData1=x[as.numeric(fit1$gene.list),]
rownames(nData1)
save(nData,file = "最终入选基因.RData")

#实验组的ROC曲线
profile2=t(nData1)
genes=colnames(profile2)
#clinical是行名为样本,列名为临床资料
clinical
profile2 <- as.data.frame(profile2)
profile2$patientid <- row.names(profile2)
profile3 <- inner_join(profile2,clinical,by = "patientid")
rownames(profile3) <- profile3$patientid
#删除patientid
profile3 <- profile3[,-7]
#risk 矩阵构建完成profile3
#构建rbsurv输入矩阵profile_matrix
profile_matrix <- profile3[,1:6]
Time <- profile3$Time/365
Status <- profile3$Status
genes=colnames(profile_matrix)
fmla <- as.formula(paste("Surv(Time, Status) ~", paste0(genes, collapse = "+")))
cox <- coxph(fmla, data = data.frame(profile_matrix))
#获得基因表达公式
coef <- cox$coefficients
riskScore_test <- as.matrix(profile_matrix) %*% coef
riskScore_test <- as.data.frame(riskScore_test)

riskScore_test$score <- riskScore_test$V1
riskScore_test$risk <- ifelse(riskScore_test$score>median(riskScore_test$score),'high','low')
#删除V1行
riskScore_test <- riskScore_test[,-1]

riskScore_test$patientid <- row.names(riskScore_test)
hcc_risk_test <- inner_join(riskScore_test,clinical,by = "patientid")
row.names(hcc_risk_test) <- hcc_risk_test$patientid

hcc_risk_test
hcc_risk_test <- hcc_risk_test[,-3]
hcc_risk_test$Time <- hcc_risk_test$Time/365
#存储risk数据
write.table(hcc_risk_test,file="HCC_risk_test.xls",sep="\t",row.names=T,quote=F)
roc_test=survivalROC(Stime=hcc_risk_test$Time,status=hcc_risk_test$Status, marker =    hcc_risk_test$score, 
                predict.time =3.5, method="KM")
#获得test组的ROC
roc_test$AUC



#验证组
profile_validata <- cox_hcc_methy_validate %>%
  dplyr::select(Time,Status,genes)
dim(profile_validata)
profile_matrix_validata <- profile_validata[,3:8]
fmla <- as.formula(paste("Surv(profile_validata$Time, profile_validata$Status) ~", paste0(genes, collapse = "+")))
#直接带入test组的表达公式:coef
riskScore_validata <- as.matrix(profile_matrix_validata) %*% coef
riskScore_validata <- as.data.frame(riskScore_validata)

riskScore_validata$score <- riskScore_validata$V1
#2.922652为分组的临界值
riskScore_validata$risk <- ifelse(riskScore_validata$score>2.922652,'high','low')
riskScore_validata <- riskScore_validata[,-1]
riskScore_validata <- as.data.frame(riskScore_validata)
riskScore_validata$patientid <- row.names(riskScore_validata)
hcc_risk_validate <- inner_join(riskScore_validata,clinical,by = "patientid")
row.names(hcc_risk_validate) <- hcc_risk_validate$patientid

hcc_risk_validate
hcc_risk_validate <- hcc_risk_validate[,-3]
hcc_risk_validate$Time <- hcc_risk_validate$Time/365

roc_vlidate=survivalROC(Stime=hcc_risk_validate$Time, status=hcc_risk_validate$Status, marker = hcc_risk_validate$score, 
                predict.time =2.32, method="KM")
#获得验证组的AUC
roc_vlidate$AUC

#总体
profile_total <- cox_hcc_methy %>%
  dplyr::select(Time,Status,genes)
profile_matrix_total <- profile_total[,3:8]
#直接带入公式
riskScore_total <- as.matrix(profile_matrix_total) %*% coef
riskScore_total <- as.data.frame(riskScore_total)

riskScore_total$score <- riskScore_total$V1
riskScore_total$risk <- ifelse(riskScore_total$score>2.922652,'high','low')
riskScore_total <- riskScore_total[,-1]
riskScore_total <- as.data.frame(riskScore_total)
riskScore_total$patientid <- row.names(riskScore_total)
hcc_risk_total <- inner_join(riskScore_total,clinical,by = "patientid")
row.names(hcc_risk_total) <- hcc_risk_total$patientid

hcc_risk_total
hcc_risk_total <- hcc_risk_total[,-3]
hcc_risk_total$Time <- hcc_risk_total$Time/365

roc_total=survivalROC(Stime=hcc_risk_total$Time, status=hcc_risk_total$Status, marker = hcc_risk_total$score, 
                        predict.time =3.78, method="KM")
#获得总体的AUC
roc_total$AUC

#ROC曲线绘制
x <- unlist(roc_test$FP)  ##提取x值
y <- unlist(roc_test$TP)
plotdata_test <- data.frame(x,y) 
plotdata_test$group <- "test"

x <-unlist(roc_vlidate$FP)  
y <- unlist(roc_vlidate$TP)
plotdata_validation <- data.frame(x,y) 
plotdata_validation$group <- "validation"

x <-unlist(roc_total$FP)  
y <- unlist(roc_total$TP)
plotdata_total <- data.frame(x,y) 
plotdata_total$group <- "total"

plotdata <- rbind(plotdata_test,plotdata_validation,plotdata_total)


g <- ggplot(plotdata) + 
  geom_path(aes(x = x, y = y,colour = group), size=1) + 
  labs(x="1 - Specificity", y = "Ture positive rate") +
  ggpubr::theme_classic2()+
  scale_x_continuous(expand = c(0, 0)) + scale_y_continuous(expand = c(0, 0))+
  theme(axis.text=element_text(size=14,face="bold"),axis.title=element_text(size=16,face="bold"))+
  geom_abline(intercept=0, slope=1,linetype="dashed")+
  scale_colour_hue(name="my legend", labels=c("test group AUC = 0.802","total group AUC = 0.750","validation group AUC = 0.691"))+
  theme(legend.title=element_blank())+theme(legend.text = element_text(size = 14,face = "bold"))+
  theme(legend.position=c(.75,.25))
g
save.image(file="AUC.RData")
  • 0
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值