R语言merge函数简介

authors <- data.frame(
    surname = I(c("Tukey", "Venables", "Tierney", "Ripley", "McNeil")),
    nationality = c("US", "Australia", "US", "UK", "Australia"),
    deceased = c("yes", rep("no", 4)))
books <- data.frame(
    name = I(c("Tukey", "Venables", "Tierney",
             "Ripley", "Ripley", "McNeil", "R Core")),
    title = c("Exploratory Data Analysis",
              "Modern Applied Statistics ...",
              "LISP-STAT",
              "Spatial Statistics", "Stochastic Simulation",
              "Interactive Data Analysis",
              "An Introduction to R"),
    other.author = c(NA, "Ripley", NA, NA, NA, NA,
                     "Venables & Smith"))

如果要实现类似sql里面的inner join 功能,则用代码
m1 <- merge(authors, books, by.x = "surname", by.y = "name")
如果要实现left join功能则用代码
m1 <- merge(authors, books, by.x = "surname", by.y = "name",all.x=TRUE)
right join功能代码
m1 <- merge(authors, books, by.x = "surname", by.y = "name",all.y=TRUE)
all join功能代码
m1 <- merge(authors, books, by.x = "surname", by.y = "name",all=TRUE)

关于单变量匹配的总结就是这些,但对于多变量匹配呢,例如下面两个表,需要对k1,k2两个变量都相等的情况下匹配
x <- data.frame(k1 = c(NA,NA,3,4,5), k2 = c(1,NA,NA,4,5), data = 1:5)
y <- data.frame(k1 = c(NA,2,NA,4,5), k2 = c(NA,NA,3,4,5), data = 1:5)

匹配代码如下merge(x, y, by = c("k1","k2"))  #inner join





> ID<-c(1,2,3,4)
> name<-c("Jim","Tony","Lisa","Tom")
> score<-c(89,22,78,78)
> student1<-data.frame(ID,name)
> student2<-data.frame(ID,score)
> student1
  ID name
1  1  Jim
2  2 Tony
3  3 Lisa
4  4  Tom
> student2
  ID score
1  1    89
2  2    22
3  3    78
4  4    78
> total_student<-merge(student1,student2,by="ID")
> total_student
  ID name score
1  1  Jim    89
2  2 Tony    22
3  3 Lisa    78
4  4  Tom    78
> ID<-c(1,2,3)
> name<-c("Jame","Kevin","Sunny")
> student1<-data.frame(ID,name)
> ID<-c(4,5,6)
> name<-c("Sun","Frame","Eric")
> student2<-data.frame(ID,name)
> student1
  ID  name
1  1  Jame
2  2 Kevin
3  3 Sunny
> student2
  ID  name
1  4   Sun
2  5 Frame
3  6  Eric
> total<-rbind(student1,student2)
> total
  ID  name
1  1  Jame
2  2 Kevin
3  3 Sunny
4  4   Sun
5  5 Frame
6  6  Eric
> authors <- data.frame(
+     surname = I(c("Tukey", "Venables", "Tierney", "Ripley", "McNeil")),
+     nationality = c("US", "Australia", "US", "UK", "Australia"),
+     deceased = c("yes", rep("no", 4)))
> authors
   surname nationality deceased
1    Tukey          US      yes
2 Venables   Australia       no
3  Tierney          US       no
4   Ripley          UK       no
5   McNeil   Australia       no
> books <- data.frame(
+     name = I(c("Tukey", "Venables", "Tierney",
+              "Ripley", "Ripley", "McNeil", "R Core")),
+     title = c("Exploratory Data Analysis",
+               "Modern Applied Statistics ...",
+               "LISP-STAT",
+               "Spatial Statistics", "Stochastic Simulation",
+               "Interactive Data Analysis",
+               "An Introduction to R"),
+     other.author = c(NA, "Ripley", NA, NA, NA, NA,
+                      "Venables & Smith"))
>
> books
      name                         title     other.author
1    Tukey     Exploratory Data Analysis             <NA>
2 Venables Modern Applied Statistics ...           Ripley
3  Tierney                     LISP-STAT             <NA>
4   Ripley            Spatial Statistics             <NA>
5   Ripley         Stochastic Simulation             <NA>
6   McNeil     Interactive Data Analysis             <NA>
7   R Core          An Introduction to R Venables & Smith
> authors
   surname nationality deceased
1    Tukey          US      yes
2 Venables   Australia       no
3  Tierney          US       no
4   Ripley          UK       no
5   McNeil   Australia       no
> m1 <- merge(authors, books, by.x = "surname", by.y = "name")
> m1
   surname nationality deceased                         title other.author
1   McNeil   Australia       no     Interactive Data Analysis         <NA>
2   Ripley          UK       no            Spatial Statistics         <NA>
3   Ripley          UK       no         Stochastic Simulation         <NA>
4  Tierney          US       no                     LISP-STAT         <NA>
5    Tukey          US      yes     Exploratory Data Analysis         <NA>
6 Venables   Australia       no Modern Applied Statistics ...       Ripley
> m1 <- merge(authors, books, by.x = "surname", by.y = "name",all.x=TRUE)
> m1
   surname nationality deceased                         title other.author
1   McNeil   Australia       no     Interactive Data Analysis         <NA>
2   Ripley          UK       no            Spatial Statistics         <NA>
3   Ripley          UK       no         Stochastic Simulation         <NA>
4  Tierney          US       no                     LISP-STAT         <NA>
5    Tukey          US      yes     Exploratory Data Analysis         <NA>
6 Venables   Australia       no Modern Applied Statistics ...       Ripley
> m1 <- merge(authors, books, by.x = "surname", by.y = "name",all.y=TRUE)
> m1
   surname nationality deceased                         title
1   McNeil   Australia       no     Interactive Data Analysis
2   R Core        <NA>     <NA>          An Introduction to R
3   Ripley          UK       no            Spatial Statistics
4   Ripley          UK       no         Stochastic Simulation
5  Tierney          US       no                     LISP-STAT
6    Tukey          US      yes     Exploratory Data Analysis
7 Venables   Australia       no Modern Applied Statistics ...
      other.author
1             <NA>
2 Venables & Smith
3             <NA>
4             <NA>
5             <NA>
6             <NA>
7           Ripley
> x <- data.frame(k1 = c(NA,NA,3,4,5), k2 = c(1,NA,NA,4,5), data = 1:5)
> y <- data.frame(k1 = c(NA,2,NA,4,5), k2 = c(NA,NA,3,4,5), data = 1:5)
> x
  k1 k2 data
1 NA  1    1
2 NA NA    2
3  3 NA    3
4  4  4    4
5  5  5    5
> y
  k1 k2 data
1 NA NA    1
2  2 NA    2
3 NA  3    3
4  4  4    4
5  5  5    5
> merge(x, y, by = c("k1","k2"))
  k1 k2 data.x data.y
1  4  4      4      4
2  5  5      5      5
3 NA NA      2      1
>

  • 2
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值