数据使用的是Motor Trend杂志的车辆路试(mtcars)数据集。监测点在于每加仑汽油行驶英里数(mpg)、马力(hp)、车重(wt)。
主要是计算描述性统计量,通过summary()、sapply()、describe()、stat.desc()等函数作为测试,然后进行了简单的分组计算概述统计量,主要通过aggregate()、by()、summaryBy()、describeBy()等函数去测试。
结果中关于最小值、最大值、四分位数、数值型变量均值、因子向量、逻辑向量、标准差、平均值、偏度、峰度等的表示没有作过多的描述,可自行查找。
> #均值和标准差计算
>
> x <- c(1,2,3,4,5,6,7,8)
> mean(x)
[1] 4.5
> sd(x)
[1] 2.44949
> #用算术过程的程序表达
> n <- length(x)
> meanx <- sum(x)/n
> css <- sum((x-meanx)^2)
> sdx <- sqrt(css/(n-1))
> meanx
[1] 4.5
> sdx
[1] 2.44949
> #描述性统计分析
> myvars <- c("mpg","hp","wt")
> head(mtcars[myvars])
mpg hp wt
Mazda RX4 21.0 110 2.620
Mazda RX4 Wag 21.0 110 2.875
Datsun 710 22.8 93 2.320
Hornet 4 Drive 21.4 110 3.215
Hornet Sportabout 18.7 175 3.440
Valiant 18.1 105 3.460
> #summary计算描述性统计量
> summary(mtcars[myvars])
mpg hp wt
Min. :10.40 Min. : 52.0 Min. :1.513
1st Qu.:15.43 1st Qu.: 96.5 1st Qu.:2.581
Median :19.20 Median :123.0 Median :3.325
Mean :20.09 Mean :146.7 Mean :3.217
3rd Qu.:22.80 3rd Qu.:180.0 3rd Qu.:3.610
Max. :33.90 Max. :335.0 Max. :5.424
> #通过sapply()计算描述性统计量
> mystats <- function(x,na.omit=FALSE){
+
+ if (na.omit)
+ x <- x[!is.na(x)]
+ m <- mean(x)
+ n <- length(x)
+ s .... [TRUNCATED]
> myvars <- c("mpg","hp","wt")
> sapply(mtcars[myvars],mystats)
mpg hp wt
n 32.000000 32.0000000 32.00000000
mean 20.090625 146.6875000 3.21725000
stdev 6.026948 68.5628685 0.97845744
skew 0.610655 0.7260237 0.42314646
kurtosis -0.372766 -0.1355511 -0.02271075
> library(Hmisc)
> #通过Hmisc包的describe()
> describe(mtcars[myvars])
vars n mean sd median trimmed mad min max range skew kurtosis se
mpg 1 32 20.09 6.03 19.20 19.70 5.41 10.40 33.90 23.50 0.61 -0.37 1.07
hp 2 32 146.69 68.56 123.00 141.19 77.10 52.00 335.00 283.00 0.73 -0.14 12.12
wt 3 32 3.22 0.98 3.33 3.15 0.77 1.51 5.42 3.91 0.42 -0.02 0.17
> library(pastecs)
> #通过pastecs包中的stat.desc()函数
> stat.desc(mtcars[myvars])
mpg hp wt
nbr.val 32.0000000 32.0000000 32.0000000
nbr.null 0.0000000 0.0000000 0.0000000
nbr.na 0.0000000 0.0000000 0.0000000
min 10.4000000 52.0000000 1.5130000
max 33.9000000 335.0000000 5.4240000
range 23.5000000 283.0000000 3.9110000
sum 642.9000000 4694.0000000 102.9520000
median 19.2000000 123.0000000 3.3250000
mean 20.0906250 146.6875000 3.2172500
SE.mean 1.0654240 12.1203173 0.1729685
CI.mean.0.95 2.1729465 24.7195501 0.3527715
var 36.3241028 4700.8669355 0.9573790
std.dev 6.0269481 68.5628685 0.9784574
coef.var 0.2999881 0.4674077 0.3041285
> library(psych)
> #通过psych包中的describe()计算描述性统计量
> describe(mtcars[myvars])
vars n mean sd median trimmed mad min max range skew kurtosis se
mpg 1 32 20.09 6.03 19.20 19.70 5.41 10.40 33.90 23.50 0.61 -0.37 1.07
hp 2 32 146.69 68.56 123.00 141.19 77.10 52.00 335.00 283.00 0.73 -0.14 12.12
wt 3 32 3.22 0.98 3.33 3.15 0.77 1.51 5.42 3.91 0.42 -0.02 0.17
> #分组计算描述性统计量
> aggregate(mtcars[myvars],by=list(am=mtcars$am),mean)
am mpg hp wt
1 0 17.14737 160.2632 3.768895
2 1 24.39231 126.8462 2.411000
> aggregate(mtcars[myvars],by=list(am=mtcars$am),sd)
am mpg hp wt
1 0 3.833966 53.90820 0.7774001
2 1 6.166504 84.06232 0.6169816
> #by()分组计算描述性统计量
>
> dstats <- function(x) sapply(x,mystats)
> by(mtcars[myvars],mtcars$am,dstats)
mtcars$am: 0
mpg hp wt
n 19.00000000 19.00000000 19.0000000
mean 17.14736842 160.26315789 3.7688947
stdev 3.83396639 53.90819573 0.7774001
skew 0.01395038 -0.01422519 0.9759294
kurtosis -0.80317826 -1.20969733 0.1415676
-----------------------------------------------------------------
mtcars$am: 1
mpg hp wt
n 13.00000000 13.0000000 13.0000000
mean 24.39230769 126.8461538 2.4110000
stdev 6.16650381 84.0623243 0.6169816
skew 0.05256118 1.3598859 0.2103128
kurtosis -1.45535200 0.5634635 -1.1737358
> #使用doBy包中summaryBy()分组计算概述统计量
> library(doBy)
> summaryBy(mpg+hp+wt~am,data = mtcars,FUN = mystats)
am mpg.n mpg.mean mpg.stdev mpg.skew mpg.kurtosis hp.n hp.mean hp.stdev
1 0 19 17.14737 3.833966 0.01395038 -0.8031783 19 160.2632 53.90820
2 1 13 24.39231 6.166504 0.05256118 -1.4553520 13 126.8462 84.06232
hp.skew hp.kurtosis wt.n wt.mean wt.stdev wt.skew wt.kurtosis
1 -0.01422519 -1.2096973 19 3.768895 0.7774001 0.9759294 0.1415676
2 1.35988586 0.5634635 13 2.411000 0.6169816 0.2103128 -1.1737358
> #使用psych包中的describeBy()分组计算概述统计量
> describeBy(mtcars[myvars],list(am=mtcars$am))
Descriptive statistics by group
am: 0
vars n mean sd median trimmed mad min max range skew kurtosis se
mpg 1 19 17.15 3.83 17.30 17.12 3.11 10.40 24.40 14.00 0.01 -0.80 0.88
hp 2 19 160.26 53.91 175.00 161.06 77.10 62.00 245.00 183.00 -0.01 -1.21 12.37
wt 3 19 3.77 0.78 3.52 3.75 0.45 2.46 5.42 2.96 0.98 0.14 0.18
-----------------------------------------------------------------
am: 1
vars n mean sd median trimmed mad min max range skew kurtosis se
mpg 1 13 24.39 6.17 22.80 24.38 6.67 15.00 33.90 18.90 0.05 -1.46 1.71
hp 2 13 126.85 84.06 109.00 114.73 63.75 52.00 335.00 283.00 1.36 0.56 23.31
wt 3 13 2.41 0.62 2.32 2.39 0.68 1.51 3.57 2.06 0.21 -1.17 0.17