病例对照研究,对年龄性别进行频数匹配
示例数据
示例数据下载
ID | group | sex | age |
---|
A1 | Control | 女 | 90 |
A10 | Control | 男 | 89 |
A100 | Control | 男 | 85 |
A1000 | Control | 男 | 74 |
A1001 | Case | 女 | 74 |
A1002 | Case | 男 | 74 |
A1003 | Case | 女 | 74 |
A1004 | Case | 男 | 74 |
A1005 | Control | 男 | 74 |
data <- read.csv("年龄性别频数匹配.csv")
data$agegroup <- cut(data$age,c(0,10,20,30,40,50,60,70,80,90,Inf),
right =FALSE
)
dataSplit <- split(data, list(data$sex, data$age))
A <- as.data.frame(data_case[[1]])
data1 <- A[A$group == "Case", ]
data2 <- A[A$group == "Control", ]
num <- as.numeric(table(A$group == "Case")[2])
set.seed(123)
data3 <- data2[sample(1:NROW(data2), num, replace = FALSE), ]
assign(paste0("dataFinal", 1), rbind(data1, data3))
for (i in 1:length(dataSplit)) {
A <- as.data.frame(data_case[[i]])
data1 <- A[A$group == "Case", ]
data2 <- A[A$group == "Control", ]
numCase <- nrow(data1)
numControl <- nrow(data2)
if (numCase > 0 & numControl>0 & numControl-numCase>=0) {
na.omit <- TRUE
set.seed(123)
data3 <- data2[sample(1:numControl, numCase, replace = FALSE), ]
assign(paste0("dataFinal", i), rbind(data1, data3))
}
if (numCase > 0 & numControl>0 & numControl-numCase<0) {
na.omit <- TRUE
set.seed(123)
data3 <- data1[sample(1:numCase, numControl, replace = FALSE), ]
assign(paste0("dataFinal", i), rbind(data2, data3))
}
}
multimerge<-function(dat=list(),...){
if(length(dat)<2)return(as.data.frame(dat))
mergedat<-dat[[1]]
dat[[1]]<-NULL
for(i in dat){
mergedat<-merge(all=TRUE,mergedat,i,...)
}
return(mergedat)
}
files=ls(pattern = "dataFinal")
listALL=list()
for (i in 1:length(ls(pattern = "dataFinal"))) {
eval(parse(text = paste0("listALL","[[",i,"]]", " <- ",files[i])))
}
dataALL <- multimerge(listALL)
dataALL=dplyr::arrange(dataALL,age,sex,group)
write.csv(dataALL,"dataALL.csv",row.names = F)