文章目录
1 plyr
(1)思想:利用拆分、运算、结合的并行运算方式实现对数据的处理。同时提供了良好的辅助函数
(2)a*ply()函数
a*ply(.data,.margins,.fun,.progress)
//.data : 处理数组、矩阵
//.margins =1按行 =2按列 =c(1,2)按个元素
//.fun: 执行的函数功能
//.progress: 进度条显示 ="text" ="win"
a <-matrix(1:21,nrow=3,ncol=7)
aaply(.data=a,.margins=1,.fun=mean)
/*
1 2 3
10 11 12
*/
aaply(a,2,mean)
/*
1 2 3 4 5 6 7
2 5 8 11 14 17 20
*/
aaply(a,c(1,2),mean)
/*
X2
X1 1 2 3 4 5 6 7
1 1 4 7 10 13 16 19
2 2 5 8 11 14 17 20
3 3 6 9 12 15 18 21
*/
aaply(a,1,mean,.progress="text")
/*
|================================================================| 100%
1 2 3
10 11 12
*/
(3)d*ply()函数
d*ply(.data,.variable,.fun)
//data: data.frame类型
//.variable : 按数据框中的变量分类与输出
//.(sex,age) 按sex age分类,并输出sex age为变量的数据框
//.fun: function()
names <-c('John','Marry','Alice','Peter','Roger','Pepe')
age <-c(23,54,21,63,32,14)
sex <-c('M','F','M','F','M','M')
people <-data.frame(names,age,sex)
amean <-function(data)
+ {
+ return(mean(data[,2]))
+ }
ddply(people,.(sex),amean)
/*
sex V1
1 F 58.5
2 M 22.5
*/
ddply(people,.(sex,age),amean)
/*
sex age V1
1 F 54 54
2 F 63 63
3 M 14 14
4 M 21 21
5 M 23 23
6 M 32 32
*/
daply(people,.(sex),amean)
/*
F M
58.5 22.5
*/
(3)l*ply()函数 *_ply()函数
l*ply(.data,.fun)
//l*ply: 直接对list变量进行分类执行函数
a <-c(1,2,3,4,5)
b <-c(1,2,4)
c <-c(1,2)
d <-list(a,b,c)
llply(d,mean)
/*
[[1]]
[1] 3
[[2]]
[1] 2.333333
[[3]]
[1] 1.5
*/
(4)m_ply()批量系数操作函数
m_ply(data,.fun)
//data: fun()函数中的系数数据框
data <-data.frame(n=c(10,100,50),mean=c(5,5,10),sd=c(1,2,1))
mlply(data,rnorm) //批量操作正态分布函数
/*
$`1`
[1] 3.336331 5.491110 4.825945 5.961291 5.293827 5.080999 5.183662
[8] 5.166255 3.730401 7.349493
$`2`
[1] 2.1759892 4.9660770 3.9113613 8.6002247 7.0228804 3.8725669
[7] 5.4108416 7.3309239 9.4726457 5.6045302 2.9149868 3.0329154
[13] 9.0114372 0.8588570 11.1114847 4.4772988 4.0912135 5.3151211
[19] 6.8667775 5.6056566 1.0876996 5.7070734 5.9008490 6.3191017
[25] 2.9371585 0.2579542 4.3508474 3.1114025 3.4682200 3.0924415
[31] 4.2039911 4.3775659 6.5921854 6.9728567 3.4109367 4.3823641
[37] 5.7228895 7.7975822 4.8878592 1.6022530 5.4637051 4.7618187
[43] 8.5449857 5.6868443 3.7539004 4.1209554 3.9894064 5.3720703
[49] 5.3528356 6.8316964 5.6403535 4.2666254 3.1187774 6.2694059
[55] 4.8750230 5.3656757 7.2072820 8.5040712 3.0923671 8.2881609
[61] 3.2665329 5.5327044 5.4447410 4.4461830 7.7885061 3.6821760
[67] 6.3210626 4.9734886 3.1370394 7.4293783 0.8233193 3.9476951
[73] 1.9171949 5.3886421 5.5288451 2.7625297 6.3019059 2.9341995
[79] 6.3184030 5.4756588 6.4305519 3.1230339 5.1907080 4.0743612
[85] 2.0622357 5.3053730 8.5475252 3.7038581 4.6003650 6.3784875
[91] 5.0722910 8.8870726 6.4744275 9.6426679 5.6978187 2.7321667
[97] 5.8426705 3.1508875 2.9858753 4.6210513
$`3`
[1] 10.933917 10.343910 10.814020 10.915341 9.828148 7.597769 10.795907
[8] 12.169116 10.058383 8.645086 9.632449 9.065482 9.958361 10.676112
[15] 10.866436 10.235175 9.066030 10.813252 11.348319 12.251883 9.506348
[22] 10.474093 11.193692 9.883605 10.524859 10.214421 9.865550 10.168509
[29] 10.964733 10.408779 9.533562 7.760217 9.204937 9.980045 7.485575
[36] 12.210952 8.511238 8.839248 11.457738 7.810124 10.739063 9.655991
[43] 10.455877 10.878110 9.041536 9.294155 7.003051 9.038948 10.380188
[50] 10.505068
attr(,"split_type")
[1] "array"
attr(,"split_labels")
*/