将表1的ID 对应到下面这个表2,将对应到的一整行整合过去(表1ID重复的则取第一行)表1表2
data1 <- read.csv("blca_clusterInfo.csv")
data2 <- read.csv("subtype.csv")
length(data1$ID)
length(unique(data1$ID))
length(data2$ID)
length(unique(data2$ID))
# 两表ID名取交集
ids <- Reduce(intersect,list(data1$ID,data2$ID))
# 表1去除在表2中没有的ID对应的行
data1 <- data1[data1$ID %in% ids,]
# 根据某一列的重复值,去除该重复值所在的整行(保留第一行)
data1 <- data1[!duplicated(data1$ID),]
### 左合并
data <- dplyr::left_join(data2,data1,by = "ID")
write.csv(data,file = "data.csv")
### 法二,依次在表2添加表1对应的每一列
colname_data1 <- colnames(data1)[-which(colnames(data1) == "ID")]
data <- as.data.frame(matrix(NA,nrow(data2),ncol(data1)))
colnames(data) <- colnames(data1)
data$ID <- data2$ID
for(i in 1:nrow(data1))
{
data[which(data$ID == data1$ID[i]),] <- data1[i,]
}
for(i in 1:length(colname_data1)){
data2[[colname_data1[i]]] <- data[[colname_data1[i]]]
}
write.csv(data2,file = "data.csv")