library(vcd)
states <- data.frame(state.region,state.x77)
means <- aggregate(states$Illiteracy,by=list(state.region),FUN=mean)
means
Group.1 x
1 Northeast 1.0000002 South 1.7375003 North Central 0.7000004 West 1.023077
means <- means[order(means$x),]# 均值从小到大排序
means
Group.1 x
3 North Central 0.7000001 Northeast 1.0000004 West 1.0230772 South 1.737500
barplot(means$x,names.arg=means$Group.1,main ="Mean Illiteracy Rate")
lines(means$x,lty=2,lwd=2)# 辅助线虚线(lty=2),两倍宽(lwd=2)
par(mfrow=c(2,2))#1.简单直方图
hist(mtcars$mpg)#2.指定组数和颜色
hist(mtcars$mpg,
breaks =9,
col="red",
xlab="Miles Per Gallon",
main="Colored histogram with 12 bins")#3.添加轴须图
hist(mtcars$mpg,
freq=F,
breaks =12,
col=rainbow(length(mtcars$mpg)),
xlab="Miles Per Gallon",
main="Histogram,rug plot,density curve")
rug(jitter(mtcars$mpg))# mycars是包自带的数据集
mpg cyl disp hp drat wt qsec vs am gear carb
Mazda RX4 21.06160.01103.902.62016.460144
Mazda RX4 Wag 21.06160.01103.902.87517.020144
Datsun 71022.84108.0933.852.32018.611141
Hornet 4 Drive 21.46258.01103.083.21519.441031
Hornet Sportabout 18.78360.01753.153.44017.020032
Valiant 18.16225.01052.763.46020.221031
Duster 36014.38360.02453.213.57015.840034
Merc 240D 24.44146.7623.693.19020.001042
Merc 23022.84140.8953.923.15022.901042
Merc 28019.26167.61233.923.44018.301044
Merc 280C 17.86167.61233.923.44018.901044
Merc 450SE 16.48275.81803.074.07017.400033
Merc 450SL 17.38275.81803.073.73017.600033
Merc 450SLC 15.28275.81803.073.78018.000033
Cadillac Fleetwood 10.48472.02052.935.25017.980034
Lincoln Continental 10.48460.02153.005.42417.820034
Chrysler Imperial 14.78440.02303.235.34517.420034
Fiat 12832.4478.7664.082.20019.471141
Honda Civic 30.4475.7524.931.61518.521142
Toyota Corolla 33.9471.1654.221.83519.901141
Toyota Corona 21.54120.1973.702.46520.011031
Dodge Challenger 15.58318.01502.763.52016.870032
AMC Javelin 15.28304.01503.153.43517.300032
Camaro Z28 13.38350.02453.733.84015.410034
Pontiac Firebird 19.28400.01753.083.84517.050032
Fiat X1-927.3479.0664.081.93518.901141
Porsche 914-226.04120.3914.432.14016.700152
Lotus Europa 30.4495.11133.771.51316.901152
Ford Pantera L 15.88351.02644.223.17014.500154
Ferrari Dino 19.76145.01753.622.77015.500156
Maserati Bora 15.08301.03353.543.57014.600158
Volvo 142E 21.44121.01094.112.78018.601142# 轴须线,轴须图是实际数据值的一种一维呈现方式.如果数据中有很多结(出现相同的值,称为结(tie))# 利用函数rug(jitter(mtcars$mpg,amount=0.01))将轴须图的数据打散:# 这样将向每个数据点添加一个小的随机值(一个± a m o u n t \pm amount±amount之间的均匀分布随机数),以避免重叠的点产生影响
lines(density(mtcars$mpg),col="#FF0000",lwd=2)# 密度曲线,是一个核密度估计,为数据的分布提供了一种更加平滑的描述# RGB模式,十六进制(0~F),六位数字,两位代表一种颜色,顺序为红绿蓝(RGB)#4.添加正态密度曲线和外框
x <- mtcars$mpg
h <- hist(x,
breaks=12,
col=rainbow(length(mtcars$mpg)),
xlab="Miles Per Gallon",
main="Histogram with normal curve and box")
xfit <- seq(min(x),max(x),length=40)# seq(from,to,by) 生成一个序列,从from至to,间隔by
yfit <- dnorm(xfit,mean=mean(x),sd=sd(x))# 正态分布密度函数
yfit <- yfit*diff(h$mids[1:2])*length(x)# mids 是每个柱子的中心点,diff()滞后差分函数# diff(h$mids[1:2])计算每个柱子的宽度值
lines(xfit,yfit,col="#FF0000",lwd=2)
box()# 添加外框
四、核密度图
4.1、简易核密度图
概述:核密度估计是用于估计随机变量概率密度函数的一种非参数方法
函数:plot(density(x))
对象:连续型变量
示例:
par(mfrow=c(2,1))
d <- density(mtcars$mpg)
plot(d)
plot(d,main ="Kernel Density of Miles Per Gallon")# plot为高级绘图函数,其会绘制一份新的图形# 如果希望在当前图片上添加核密度曲线,则使用line()函数(详见上述hist例子)
polygon(d,col="red",border ="#0000FF")# polygon()函数根据顶点的x和y坐标(本例中由density()函数提供)绘制多边形.# 将曲线修改为蓝色,并使用红色填充曲线下方区域
rug(mtcars$mpg,col="brown")# 添加棕色轴须线
dev.off()