# dplyr,很多包没有下载,先安装
library(dplyr)
head(ToothGrowth)
str(ToothGrowth)
# 新增变量和变量重新赋值
toothgrowth2 <- mutate(ToothGrowth, # 用自己的数据试试
len = len^2,
nv = 1:nrow(ToothGrowth),
nv2 = ifelse(nv > median(nv), "H", "L")
head(toothgrowth2)
# 筛选行(样本)
toothgrowth3 <- filter(toothgrowth2,
nv %in% 1:50,
nv == "H"
toothgrowth3
# 筛选列(样本)
toothgrowth4 <- select(toothgrowth3,
c(2,4))
head(toothgrowth4)
# 分组计算
summarise(ToothGrowth, len_max = max(len), len_min = min(len))
summarise(group_by(ToothGrowth, supp), len_max = max(len))
summarise(group_by(ToothGrowth, dose), len_max = max(len))
summarise(group_by(ToothGrowth, dose, supp), len_max = max(len))
# 管道操作符 ,快捷键ctrl+shift+M,为了什么?
library(magritter)
ToothGrowth %>%
mutate(nv = 1:nrow(ToothGrowth)) %>%
filter(nv %in% 1:50) %>%
select(1:2) %>%
group_by(supp) %>%
summarise(len_max = max(len)) %>%
as.data.frame()
# 连接/合并数据框
library(dplyr)
df1 <- data.frame(c1 = 2:5,
c2 = LETTERS[2:5])
df1
df2 <- data.frame(c3 = LETTERS[c(2:3,20:23)],
c4 = sample(1:100, size = 6))
df2
# 左连接
left_join(df1, df2, by = c('c2' = 'c3'))
df1 %>% left_join(df2, by = c('c2' = 'c3'))
# 右连接
df1 %>% right_join(df2, by = c('c2' = 'c3'))
# 全连接
df1 %>% full_join(df2, by = c('c2' = 'c3'))
# 内连接
df1 %>% inner_join(df2, by = c('c2' = 'c3'))
# 列的分裂与合并
library(tidyr)
# 分裂
df3 <- data.frame(c5 = paste(letters[1:3], 1:3, sep = "-"),
c6 = paste(letters[1:3], 1:3, sep = "."),
c4 = c("B", "B", "B"),
c3 = c("H", "M", "L"))
df3
df4 <- df3 %>%
separate(col = c5, sep = "-", into = c("c7", "c8"), remove = F) %>%
separate(col = c6, sep = "\\.", into = c("c9", "c10"), remove = T)
df4
# 合并
df4 %>%
unite(col = "c11", c("c7", "c8"), sep = "_", remove = F) %>%
unite(col = "c12", c("c9", "c10"), sep = ".", remove = T) %>%
unite(col = "c13", c("c4", "c3"), sep = "", remove = F)
# 长宽数据转换
library(tidyr)
# 宽数据转长数据
set.seed(42) # 任意整数
df5 <- data.frame(time = rep(2011:2013, each=3),
area = rep(letters[1:3], times=3),
pop = sample(100:1000, 9),
den = round(rnorm(9, mean = 3, sd = 0.1), 2),
mj = sample(8:12, 9, replace = T))
df5
df6 <- df5 %>%
pivot_longer(cols = -c(1:2),
names_to = "varb",
values_to = "value")
df6
# 长数据转宽数据,行列转置吗?
df6 %>%
pivot_wider(names_from = c(area, varb),
values_from = value)
注:学习视频来源于B站,特别感谢R语言数据分析从入门到进阶(已完结。第6部分可视化作图请移步至 R语言数据可视化)_哔哩哔哩_bilibili