管道符号
%>% 将前一数据作为下一项运算的第一个参数
library(tidyverse)
library(sciplot)
iris%>%group_by(Species)%>%summarise(across(1:4,list(mean,se)))
长宽数据转化
iris%>%gather(key = "class",value = "case","Sepal.Length","Sepal.Width","Petal.Length","Petal.Width" ) ## gather写法
stocks <- data.frame(
time = as.Date('2009-01-01') + 0:9,
X = rnorm(10, 0, 1),
Y = rnorm(10, 0, 2),
Z = rnorm(10, 0, 4)
)
stocksm <- stocks %>% gather(stock, price, -time)
stocksm %>% spread(stock, price)
stocksm %>% spread(time, price)
iris%>%pivot_longer(cols = -Species,names_to="Class",values_to="case") ## pivot_longer写法
stocksm%>%pivot_wider(names_from = stock, values_from = price)
filter 筛选
iris%>%filter(Species=="virginica") ###取列中需要的数据
iris%>%filter(Species!="virginica") ## 取某列非某一数据以外的数据
iris%>%filter(Species%in%c("virginica")) ## 取特定的几列
iris%>%filter(Sepal.Length%in%c(5,4.6))
slice选取行
iris%>%slice_sample(n=10) ####选取需要的行数
iris%>%slice(c(1,3,5))
iris%>%slice_head(n=10) ##选取首行
iris%>%slice_tail(n=10) ##选取尾行
iris%>%slice_max(order_by = Sepal.Length) ##选取某列最大的数据
iris%>%slice_min(order_by = Sepal.Length) ##选取某列最小的数据
iris%>%group_by(Species)%>%slice_max(order_by = Sepal.Length) ##先分组再选取最大数据
distinct
iris%>%distinct(Species,keep_all=TRUE)####移除重复行/保留想要的行
select选择列
iris%>%select(-c(1,2)) ##选择需要的列
iris%>%select(last_col()) ##选择最后一列
iris%>%select(c(5),everything()) ##将第五列放在首列
iris%>%pull(1) ####抽取特定的列
arrange排序
iris%>%arrange(desc(Sepal.Length)) #排序(降序)
iris%>%arrange(Sepal.Length) #默认升序
mutate添加新列
iris%>%mutate(Sepal.Length2=Sepal.Length+20, log.Sepapl.Length=log(Sepal.Length)) ####加新的列
iris%>%transmute(Sepal.Length2=Sepal.Length+20) ##只保留新的列
iris%>%mutate(Sepal.Length2=Sepal.Length+20,
log.Sepapl.Length=log(Sepal.Length),.before=Sepal.Length) ####加新的列
####相同的名即可替换
rename 更改变量名
iris%>%rename(x=Sepal.Length)
##iris%>%rownames_to_column(,var = ) ##将行名转化为列名
join合并
###join 结合
y_mean <- iris%>%summarise(mean=mean(Sepal.Length))
y_se <- iris%>%summarise(mean=se(Sepal.Length))
y_mean;y_se
iris%>%summarise(mean=mean(Sepal.Length))%>%left_join(iris%>%summarise(mean=se(Sepal.Length)))
inner_join(y_mean,y_se)###内结合
#left_join() ###左结合
#right_join()
## full_join() 结合的列有重复值