apply
apply函数形式为apply(X, MARGIN, FUN, …),其中X为matrix或者array,MARGIN = 1 表示第一维(行)运算,= 2 表示第一维(列)运算。=n表示第n维。用apply可以很方便地按行列求和/平均,其结果与colMeans,colSums,rowMeans,rowSums是一样的。
x <- matrix(rnorm(200), 20, 10)
apply(x, 2, mean)
[1] 0.04868268 0.35743615 -0.09104379
[4] -0.05381370 -0.16552070 -0.18192493
[7] 0.10285727 0.36519270 0.14898850
[10] 0.26767260
apply(x, 1, sum)
[1] -1.94843314 2.60601195 1.51772391
[4] -2.80386816 3.73728682 -1.69371360
[7] 0.02359932 3.91874808 -2.39902859
[10] 0.48685925 -1.77576824 -3.34016277
[13] 4.04101009 0.46515429 1.83687755
[16] 4.36744690 2.21993789 2.60983764
[19] -1.48607630 3.58709251
rowSums = apply(x, 1, sum)
rowMeans = apply(x, 1, mean)
colSums = apply(x, 2, sum)
colMeans = apply(x, 2, mean)
> x <- matrix(rnorm(200), 20, 10)
> apply(x, 1, quantile, probs = c(0.25, 0.75))
[,1] [,2] [,3] [,4]
25% -0.3304284 -0.99812467 -0.9186279 -0.49711686
75% 0.9258157 0.07065724 0.3050407 -0.06585436
[,5] [,6] [,7] [,8]
25% -0.05999553 -0.6588380 -0.653250 0.01749997
75% 0.52928743 0.3727449 1.255089 0.72318419
[,9] [,10] [,11] [,12]
25% -1.2467955 -0.8378429 -1.0488430 -0.7054902
75% 0.3352377 0.7297176 0.3113434 0.4581150
[,13] [,14] [,15] [,16]
25% -0.1895108 -0.5729407 -0.5968578 -0.9517069
75% 0.5326299 0.5064267 0.4933852 0.8868922
[,17] [,18] [,19] [,20]
lappy
lappy遍历list或者data.frame的每个元素,并且使用指定函数来对其元素进行处理,返回列表向量。
> x <- list(a = 1, b = 1:3, c = 10:100)
> lapply(x, FUN = length)
$a
[1] 1
$b
[1] 3
$c
[1] 91
> lapply(x, FUN = sum)
$a
[1] 1
$b
[1] 6
$c
[1] 5005
sapply
sapply与lapply基本相同,只是对返回结果进行了简化,返回的是普通的向量。
x <- list(a = 1, b = 1:3, c = 10:100)
#Compare with above; a named vector, not a list
sapply(x, FUN = length)
a b c
1 3 91sapply(x, FUN = sum)
a b c
1 6 5005 nt size = 4, color = blue>
tapply
把函数应用到一个vector上,这个vector是用另一个vector分组的。simplify = FALSE返回的是list,simplify = TRUE返回的是array。
> n <- 100
> grp1 <- sample(c("a", "b"), n, rep = T)
> grp2 <- sample(c("c", "d"), n, rep = T)
> length <- rnorm(n, 100, 4)
> weight <- rnorm(n, 3, 1)
> tapply(length, grp1, mean)
a b
100.0825 100.3731
> tapply(length, list(grp1, grp2), mean)
c d
a 100.2081 99.93832
b 101.1868 99.38823
> tapply(length, grp1, range)
$a
[1] 92.77222 108.01450
$b
[1] 90.5310 109.4077
> tapply(length, grp1, range, simplify = FALSE)
$a
[1] 92.77222 108.01450
$b
[1] 90.5310 109.4077
> arr.list <- tapply(length, list(grp1, grp2), range)
> arr.list["a", "c"]
[[1]]
[1] 93.03771 108.01450
综合举例
输入参数:X为原始样本点矩阵,c为聚类中心点矩阵
输出:类内距离和
centroidDists <- function(X, c) {
dists = apply(X, 1, function(point)
sapply(1:nrow(c), function(dim)
dist(rbind(point, c[dim, ]))))
sum = sum(apply(t(dists), 1, min))
sum
}
sapply() type function for two 2-d arrays
sapply(1:n, function(i) snow_free(doy_stack[,i], snow_stack[,i]))