两个数据框合并
data1=data.frame(
names = c('AAA', 'CCC', 'DDD', 'EEE', 'FFF' ),
logFC = c(3, -0.4, 5, 0.4, -3 ),
bmi = c('正常', '正常','超重','肥胖','正常')
)
data2=data.frame(
names = c('AAA','BBB', 'CCC', 'DDD', 'EEE' ),
gender = c(1, 2, 1, 2, 1), #1=男 2=女
bmi = c('正常','低体重','正常','超重','肥胖' )
)
merge(data1,data2,all=TRUE)
# 同样结果,可以不用加by
merge(data1,data2,all=TRUE,by = intersect(names(data1), names(data2)))
names | bmi | logFC | gender |
---|
AAA | 正常 | 3 | 1 |
BBB | 低体重 | NA | 2 |
CCC | 正常 | -0.4 | 1 |
DDD | 超重 | 5 | 2 |
EEE | 肥胖 | 0.4 | 1 |
FFF | 正常 | -3 | NA |
dplyr::full_join函数情况
dplyr::full_join(data1,data2)
dplyr::full_join函数情况,如果加by,会出现重复的变量名为 .x .y
dplyr::full_join(data1,data2,by=c("names"))
> dplyr::full_join(data1,data2)
Joining, by = c("names", "bmi")
names logFC bmi gender
1 AAA 3.0 正常 1
2 CCC -0.4 正常 1
3 DDD 5.0 超重 2
4 EEE 0.4 肥胖 1
5 FFF -3.0 正常 NA
6 BBB NA 低体重 2
names | logFC | bmi.x | gender | bmi.y |
---|
AAA | 3 | 正常 | 1 | 正常 |
CCC | -0.4 | 正常 | 1 | 正常 |
DDD | 5 | 超重 | 2 | 超重 |
EEE | 0.4 | 肥胖 | 1 | 肥胖 |
FFF | -3 | 正常 | NA | NA |
BBB | NA | NA | 2 | 低体重 |
R merge()
与dplyr join()
dplyr | base |
---|
inner_join(df1, df2) | merge(df1, df2) |
left_join(df1, df2) | merge(df1, df2, all.x = TRUE) |
right_join(df1, df2) | merge(df1, df2, all.y = TRUE) |
full_join(df1, df2) | merge(df1, df2, all = TRUE) |
semi_join(df1, df2) | df1[df1
x
x %in% df2
xx, , drop = FALSE] |
anti_join(df1, df2) | df1[!df1
x
x %in% df2
xx, , drop = FALSE] |
# 假设相同变量,里面得内容不一样,如AAA-BMI-常1
data2=data.frame(
names = c('AAA','BBB', 'CCC', 'DDD', 'EEE' ),
gender = c(1, 2, 1, 2, 1), #1=男 2=女
bmi = c('正常1','低体重','正常','超重','肥胖' )
)
merge(data1,data2,all=TRUE,by = intersect(names(data1), names(data2)))
# 结果会另起一行
names | bmi | logFC | gender |
---|
AAA | 正常 | 3 | NA |
AAA | 正常1 | NA | 1 |
BBB | 低体重 | NA | 2 |
CCC | 正常 | -0.4 | 1 |
DDD | 超重 | 5 | 2 |
EEE | 肥胖 | 0.4 | 1 |
FFF | 正常 | -3 | NA |
多个数据框合并
data1=data.frame(
names = c('AAA', 'CCC', 'DDD', 'EEE', 'FFF' ),
logFC = c(3, -0.4, 5, 0.4, -3 ),
bmi = c('正常', '正常','超重','肥胖','正常')
)
data2=data.frame(
names = c('AAA','BBB', 'CCC', 'DDD', 'EEE' ),
gender = c(1, 2, 1, 2, 1), #1=男 2=女
bmi = c('正常','低体重','正常','超重','肥胖' )
)
data3=data.frame(
names = c('GGG' ),
gender = c("男"), #1=男 2=女
bmi = c('正常' )
)
file=ls(pattern = "data")
ALL1=list(data1,data2,data3)
multimerge<-function(dat=list(),...){
if(length(dat)<2)return(as.data.frame(dat))
mergedat<-dat[[1]]
dat[[1]]<-NULL
for(i in dat){
mergedat<-merge(all=TRUE,mergedat,i,...)
}
return(mergedat)
}
multimerge(ALL1)