CHARLS 是一项具备中国大陆 45 岁及以上人群代表性的追踪调查,旨在建设一个高质量的公共微观数据库,采集的信息涵盖社会经济状况和健康状况等多维度的信息,以满足老龄科学研究的需要。
为利用国际上最佳的数据采集方式,并确保研究结果的国际可比性CHARLS 参照包括美国的健康与退休研究(HRS)在内的系列国际老龄调查研究开展调查设计。其全国基线调查于 2011-12 年进行,于 2013 年、2015 年、2018 年和 2020 年分别开展了 4 轮常规问卷的追踪调查,并于 2014 年完成了中国中老年人生命历程调查。为确保样本的代表性,CHARLS 基线调查覆盖了全国 150 个国家/地区、450 个村庄/城市社区,涉及 10,257户家庭的 17,708 人,反映了中国中老年人群的总体情况。2019 年底到 2020 年初,新冠疫情在中国爆发,为及时记录新冠疫情对中国中老年人生活和健康的影响,在 2020 年的第 5 轮调查中增加采集了疫情相关的信息。
本期以视频和代码方式显示使用scitable包快速完成一篇charls文章全部数据分析
scitable包快速完成一篇charls文章全部表格和图片分析
library(survival)
library(scitable)
setwd("E:/公众号文章2024年/charls数据库/class5") #设置你放数据文件的地址
bc<-read.csv("data.final.csv",sep=',',header=TRUE)
str(bc)
dput(names(bc))
########
allVars <-c("age", "sex", "edu", "smoking", "married", "drink", "wc", "bmi","time","TYG","TG",
"TC", "HDL", "LDL", "FBG", "hba1c", "Hypertension", "CVD", "New.diabetes")
fvars<-c("sex","edu")
#####
x<-"TYG" #你研究的变量
y<-"New.diabetes" #你的结局变量
cov3<-c("TYG","age","sex","edu","married","drink","smoking","Hypertension",
"wc","bmi","CVD") #你的协变量,不包含X和y
family<-"cox" #你的研究类型
time<-"time"
####检查-整理数据
out<-organizedata(data = bc,allVars = allVars,x=x,y=y,fvars=fvars,cov3=cov3,family=family,time=time,
username=username,token=token)
data<-out[["data"]]
fit<-out[["fit"]]
fit[["formula"]] #查看模型
cov3<-out[["cov"]]
########
###表一
strata<-"TYG"
allVars <-c("age", "sex", "edu", "smoking", "married", "drink", "wc", "bmi","time","TYG",
"TC", "HDL", "LDL", "FBG", "hba1c", "Hypertension", "CVD", "New.diabetes")
fvars<-c("sex","edu","smoking","married","drink","Hypertension","CVD","New.diabetes")
tb1<-scitb1(vars=allVars,fvars=fvars,strata=strata,data=data,num = 4)
##表二
strata<-"New.diabetes"
allVars <-c("age", "sex", "edu", "smoking", "married", "drink", "wc", "bmi","time","TYG",
"TC", "HDL", "LDL", "FBG", "hba1c", "Hypertension", "CVD", "New.diabetes")
fvars<-c("sex","edu","smoking","married","drink","Hypertension","CVD","New.diabetes")
tb2<-scitb1(vars=allVars,fvars=fvars,strata=strata,data=data,num = 4)
#表三
#3a类型
cov2<-c("age","sex")
cov3<-cov3
tb3<-scitb3a(data=data,x=x,y=y,cov2 = cov2,cov3=cov3,family = family,time=time,username=username,token=token)
#图一
library(rms)
library(ggrcs)
library(ggplot2)
dd<-datadist(data)
options(datadist="dd")
fit<-cph(Surv(time, New.diabetes) ~ rcs(TYG,3) + age + sex + edu + married +
drink + smoking + Hypertension + wc + bmi + CVD,data = data,x=T,y=T)
ggrcs(data=data,x=x,fit=fit,px=6,py=6)
fit1<-cph(Surv(time, New.diabetes) ~ TYG + age + sex + edu + married +
drink + smoking + Hypertension + wc + bmi + CVD,data = data)
cuttabtb<-cuttab(fit1,x,data)
#表四
data$age.f<-ifelse(data$age>=65,1,2)
data$bmi.f<-ifelse(data$bmi>=24,1,2)
data$FBG.f<-ifelse(data$FBG>=100,1,2)
data$TG.f<-ifelse(data$TG>=200,1,2)
data$age.f<-as.factor(data$age.f)
data$bmi.f<-as.factor(data$bmi.f)
data$FBG.f<-as.factor(data$FBG.f)
data$TG.f<-as.factor(data$TG.f)
Interaction<-c("age.f","bmi.f","FBG.f","TG.f","sex")
cov1<-c("sex", "edu", "married", "bmi",
"sbp", "wc", "Hypertension", "CVD", "Lipid.drug", "FBG", "TYG", "Hypertension",
"CVD")
tb5<-scitb5b(data=data,x=x,y=y,Interaction=Interaction,cov = cov3,time=time,family=family,username=username,token=token)