R for Data Science总结之——Iteration
不想多说了,直接上代码
library(tidyverse)
df <- tibble(
a = rnorm(10),
b = rnorm(10),
c = rnorm(10),
d = rnorm(10)
)
median(df$a)
#> [1] -0.246
median(df$b)
#> [1] -0.287
median(df$c)
#> [1] -0.0567
median(df$d)
#> [1] 0.144
使用for循环的写法:
output <- vector("double", ncol(df)) # 1. output
for (i in seq_along(df)) { # 2. sequence
output[[i]] <- median(df[[i]]) # 3. body
}
output
最常见的写法为:
for (i in seq_along(x)) {
name <- names(x)[[i]]
value <- x[[i]]
}
如果不清楚要输出向量的长度,则用以下方法表示:
means <- c(0, 1, 2)
output <- double()
for (i in seq_along(means)) {
n <- sample(100, 1)
output <- c(output, rnorm(n, means[[i]]))
}
str(output)
#> num [1:202] 0.912 0.205 2.584 -0.789 0.588 ...
更好的解决办法是将其存放在一个list中:
out <- vector("list", length(means))
for (i in seq_along(means)) {
n <- sample(100, 1)
out[[i]] <- rnorm(n, means[[i]])
}
str(out)
#> List of 3
#> $ : num [1:83] 0.367 1.13 -0.941 0.218 1.415 ...
#> $ : num [1:21] -0.485 -0.425 2.937 1.688 1.324 ...
#> $ : num [1:40] 2.34 1.59 2.93 3.84 1.3 ...
str(unlist(out))
#> num [1:144] 0.367 1.13 -0.941 0.218 1.415 ...
map函数
map_dbl(df, mean)
#> a b c d
#> 0.2026 -0.2068 0.1275 -0.0917
map_dbl(df, median)
#> a b c d
#> 0.237 -0.218 0.254 -0.133
map_dbl(df, sd)
#> a b c d
#> 0.796 0.759 1.164 1.062
同样可在map中写匿名函数,甚至可以直接写字符串得到对应的数值或写整数得到对应位置的数值:
models <- mtcars %>%
split(.$cyl) %>%
map(function(df) lm(mpg ~ wt, data = df))
models <- mtcars %>%
split(.$cyl) %>%
map(~lm(mpg ~ wt, data = .))
models %>%
map(summary) %>%
map_dbl(~.$r.squared)
#> 4 6 8
#> 0.509 0.465 0.423
models %>%
map(summary) %>%
map_dbl("r.squared")
#> 4 6 8
#> 0.509 0.465 0.423
x <- list(list(1, 2, 3), list(4, 5, 6), list(7, 8, 9))
x %>% map_dbl(2)
#> [1] 2 5 8