原文地址:https://zhuanlan.zhihu.com/p/25784721
使用举例
# 安装并载入dplyr包
install.packages("dplyr")
library(dplyr)
# 创建两个数据集order和price
> set.seed(1)
> ordernum <- sample(1:20, 5)
> itemprice <- sample(100:500, 5)
> order <- data.frame(itemid=1:5, ordernum, class=rep(1:2,2:3))
> order
itemid ordernum class
1 1 6 1
2 2 8 1
3 3 11 2
4 4 16 2
5 5 4 2
> price <- data.frame(itemid = 2:6, itemprice)
> price
itemid itemprice
1 2 460
2 3 477
3 4 363
4 5 350
5 6 124
# filter()--返回满足条件的观测
> # filter()
> filter(order, ordernum >= 10)
itemid ordernum class
1 3 11 2
2 4 16 2
> filter(price, itemid %in% c(3,5))
itemid itemprice
1 3 477
2 5 350
# select()--选择所需变量
> # select()
> select(order, itemid, ordernum)
itemid ordernum
1 1 6
2 2 8
3 3 11
4 4 16
5 5 4
> select(price, -itemid)
itemprice
1 460
2 477
3 363
4 350
5 124
# arrange()--对观测进行排序
> # arrange()
> arrange(order, ordernum)
itemid ordernum class
1 5 4 2
2 1 6 1
3 2 8 1
4 3 11 2
5 4 16 2
> arrange(price, -itemprice)
itemid itemprice
1 3 477
2 2 460
3 4 363
4 5 350
5 6 124
# mutate()--添加新变量
> # mutate()
> mutate(price, cost=0.7*itemprice, profit=itemprice-cost)
itemid itemprice cost profit
1 2 460 322.0 138.0
2 3 477 333.9 143.1
3 4 363 254.1 108.9
4 5 350 245.0 105.0
5 6 124 86.8 37.2
# rename()--重命名变量
> # rename()
> rename(price, id=itemid)
id itemprice
1 2 460
2 3 477
3 4 363
4 5 350
5 6 124
# summarise()--数据汇总
> # summarise()
> mygroup <- group_by(order, class)
> summarise(mygroup, count=n(), total=sum(ordernum))
# A tibble: 2 × 3
class count total
<int> <int> <int>
1 1 2 14
2 2 3 31
# %>%--管道操作(将上个输出作为下个输入)
> # %>%
> order %>% group_by(class) %>%
+ summarise(count=n(), total=sum(ordernum))
# A tibble: 2 × 3
class count total
<int> <int> <int>
1 1 2 14
2 2 3 31
# join()--连接数据集
> # join()
> inner_join(order, price, by="itemid")
itemid ordernum class itemprice
1 2 8 1 460
2 3 11 2 477
3 4 16 2 363
4 5 4 2 350
> left_join(order, price, by="itemid")
itemid ordernum class itemprice
1 1 6 1 NA
2 2 8 1 460
3 3 11 2 477
4 4 16 2 363
5 5 4 2 350
> right_join(order, price, by="itemid")
itemid ordernum class itemprice
1 2 8 1 460
2 3 11 2 477
3 4 16 2 363
4 5 4 2 350
5 6 NA NA 124
> full_join(order, price, by="itemid")
itemid ordernum class itemprice
1 1 6 1 NA
2 2 8 1 460
3 3 11 2 477
4 4 16 2 363
5 5 4 2 350
6 6 NA NA 124
> semi_join(order, price, by="itemid")
itemid ordernum class
1 2 8 1
2 3 11 2
3 4 16 2
4 5 4 2
> anti_join(order, price, by="itemid")
itemid ordernum class
1 1 6 1
# src_mysql()--连接MySQL数据库
> # src_mysql()
> library(RMySQL)
> library(DBI)
> src_mysql("mydb",user="root",
password="****") %>% tbl(from = "sample")
Source: query [?? x 2]
Database: mysql 5.7.17 [root@localhost:/mydb]
itemid class
<int> <int>
1 1 1
2 2 1
3 3 1
4 4 2
5 5 2