#HLM
library(readxl)
library(xlsx)
mydata <- as.data.frame(read_excel("mydata_2019_12.xlsx"))
View(mydata)
#create new dataframe
mydata_new_constructed <- data.frame()
k=1
for (i in 1:nrow(mydata)){
for (j in 2:35) {
if (!is.na(mydata[i,j])) {
mydata_new_constructed[k,1]<-mydata[i,1]
mydata_new_constructed[k,2]<-as.numeric(colnames(mydata[j]))
mydata_new_constructed[k,3]<-mydata[i,j]
mydata_new_constructed[k,4]<-mydata[i,36]
k<-k+1
}
}
}
colnames(mydata_new_constructed)<-c('id','age_week','weight_gain','BMI_group')
###############################################################generate the RCS variables based on age_week
RCS=as.data.frame(rcspline.eval(mydata_new_constructed$age_week, nk=6))
mydata_new_RCS = cbind(mydata_new_constructed,RCS)
write.table(mydata_new_RCS,'mydata_new_RCS.csv')
#############################################################begin to deal with the model
library(readxl)
mydata <- as.data.frame(read_excel("mydata_new_RCS.xlsx"))
names(mydata)<-c('id','age_week','weight_gain','BMI_group','age1','age2','age3','age4')
#delete the outliers original num = 9432,
mydata<-mydata[mydata$weight_gain<32,]#丢去后9405
mydata<-mydata[mydata$weight_gain>-30,]#丢去后9423
mydata_new_constructed<-mydata_new_constructed[mydata_new_constructed$weight_gain<32,]
mydata_new_constructed<-mydata_new_constructed[mydata_new_constructed$weight_gain>-30,]
#if need to create the dependant variable
for (i in 1:nrow(mydata)){
if (mydata[i,4]==1) {
mydata[i,'LOGWTGAINKG']<-log(mydata[i,3]+4.5)
} else if (mydata[i,4]==2) {
mydata[i,'LOGWTGAINKG']<-log(mydata[i,3]+5.9)
} else if (mydata[i,4]==3) {
mydata[i,'LOGWTGAINKG']<-log(mydata[i,3]+7)
}
}
#build up the model
library(lme4)
Model3.1 = lmer(weight_gain ~ age1+age2+age3+age4+ (1|id) +(1+age1|BMI_group)+(1+age2|BMI_group)+(1+age3|BMI_group)+(1+age4|BMI_group), data=mydata,REML=FALSE)
summary(Model3.1)
coef(Model3.1)
Model3.1
anova(Model3.1)
VarCorr(Model3.1)
diag(VarCorr(Model3.1)$index)
attr(VarCorr(Model3.1),"sc")
#var example: 0.207 + 0.0002(GA Spline term1^2)+ 2(GA Spline term1)(-0.005) + 0.013
RCS_data <- unique(mydata[c(2,5,6,7,8)])
for (i in 1:nrow(RCS_data)){
RCS_data[i,'mean1'] <- 2.867073+ 3.597614*RCS_data[i,2] -12.54554*RCS_data[i,3] +19.79477*RCS_data[i,4] -22.73170*RCS_data[i,5]
RCS_data[i,'mean2'] <- 2.867073+ 3.568180*RCS_data[i,2]-12.49818*RCS_data[i,3] +19.79483*RCS_data[i,4]-22.73137*RCS_data[i,5]
RCS_data[i,'mean3'] <- 2.867073+ 3.357253*RCS_data[i,2]-12.09662*RCS_data[i,3] +19.79483*RCS_data[i,4]-22.73169*RCS_data[i,5]
RCS_data[i,'sd1'] <- sqrt((14.71+1.578) + 0.04079*(RCS_data[i,2]^2)+