1、cast函数
cast函数,把长型数据转换成你想要的任何宽型数据:
dcast(data, formula, fun.aggregate = NULL, ..., margins = NULL, subset = NULL, fill = NULL, drop = TRUE, value.var = guess_value(data))
acast(data, formula, fun.aggregate = NULL, ..., margins = NULL, subset = NULL, fill = NULL, drop = TRUE, value.var = guess_value(data))
#参数:
#data 要进行转换的数据框
#formula 用于转换的公式
#fun.aggregate 聚合函数,表达式为:行变量~列变量~三维变量~
#margins 用于添加边界汇总数据
#subset 用于添加过滤条件,需要载入plyr包
acast,dcast的区别在于输出结果。acast 输出结果为vector/matrix/array,dcast 输出结果为data.frame。
> library(reshape2)
> x<-data.frame(id=1:6,name=c("wang","zhang","li","chen","zhao","song"),shuxue=c(89,85,68,79,96,53),yuwen=c(77,68,86,87,92,63))
> x
id name shuxue yuwen
1 1 wang 89 77
2 2 zhang 85 68
3 3 li 68 86
4 4 chen 79 87
5 5 zhao 96 92
6 6 song 53 63
> x1<-melt(x,id=c("id","name")) #先使用melt函数对数据进行融化操作,把数据已经变成长型数据
> x1
id name variable value
1 1 wang shuxue 89
2 2 zhang shuxue 85
3 3 li shuxue 68
4 4 chen shuxue 79
5 5 zhao shuxue 96
6 6 song shuxue 53
7 1 wang yuwen 77
8 2 zhang yuwen 68
9 3 li yuwen 86
10 4 chen yuwen 87
11 5 zhao yuwen 92
12 6 song yuwen 63
> x2<-acast(x1,id~variable)
> x2
shuxue yuwen
1 89 77
2 85 68
3 68 86
4 79 87
5 96 92
6 53 63
> x3<-dcast(x1,id~variable)
> x3
id shuxue yuwen
1 1 89 77
2 2 85 68
3 3 68 86
4 4 79 87
5 5 96 92
6 6 53 63
#从以上两个执行结果来看,可以看出acast和dcast的区别:这里acast输出结果省略了id这个列,而dcast则输出id列。
> x4<-acast(x1,id~name~variable)
> x4
, , shuxue
chen li song wang zhang zhao
1 NA NA NA 89 NA NA
2 NA NA NA NA 85 NA
3 NA 68 NA NA NA NA
4 79 NA NA NA NA NA
5 NA NA NA NA NA 96
6 NA NA 53 NA NA NA
, , yuwen
chen li song wang zhang zhao
1 NA NA NA 77 NA NA
2 NA NA NA NA 68 NA
3 NA 86 NA NA NA NA
4 87 NA NA NA NA NA
5 NA NA NA NA NA 92
6 NA NA 63 NA NA NA
#三维的情况下acast输出的是一个数组,而dcast则报错,因为dcast输出结果为数据框。
> x5<-dcast(x1,id~variable,mean,margins=T)
> x5
id shuxue yuwen (all)
1 1 89.00000 77.00000 83.00000
2 2 85.00000 68.00000 76.50000
3 3 68.00000 86.00000 77.00000
4 4 79.00000 87.00000 83.00000
5 5 96.00000 92.00000 94.00000
6 6 53.00000 63.00000 58.00000
7 (all) 78.33333 78.83333 78.58333
#可以看到,边缘多了两列汇总数据是对行列求平均的结果
> x6<-dcast(x1,id~variable,mean,margins=c("id"))
> x6
id shuxue yuwen
1 1 89.00000 77.00000
2 2 85.00000 68.00000
3 3 68.00000 86.00000
4 4 79.00000 87.00000
5 5 96.00000 92.00000
6 6 53.00000 63.00000
7 (all) 78.33333 78.83333
#只对列求平均值,当然也可以只对行求平均值,把id改成variable就可以了
> x7<-dcast(x1,id~variable,mean,margins=c("variable"))
> x7
id shuxue yuwen (all)
1 1 89 77 83.0
2 2 85 68 76.5
3 3 68 86 77.0
4 4 79 87 83.0
5 5 96 92 94.0
6 6 53 63 58.0
> library(plyr)
> x8<-dcast(x1,id~variable,mean,subset=.(id==1|id==3))
> x8
id shuxue yuwen
1 1 89 77
2 3 68 86
#subset的筛选功能强大可以进行各种各样的筛选操作,类似filter的作用
2、melt函数
> x
id name shuxue yuwen
1 1 wang 89 77
2 2 zhang 85 68
3 3 li 68 86
4 4 chen 79 87
5 5 zhao 96 92
6 6 song 53 63
> x9<-melt(x,id=1:2,variable.name="课目",value.name="值")
> x9
id name 课目 值
1 1 wang shuxue 89
2 2 zhang shuxue 85
3 3 li shuxue 68
4 4 chen shuxue 79
5 5 zhao shuxue 96
6 6 song shuxue 53
7 1 wang yuwen 77
8 2 zhang yuwen 68
9 3 li yuwen 86
10 4 chen yuwen 87
11 5 zhao yuwen 92
12 6 song yuwen 63