####渐近正态性的图形检验####
limite.central<- function(r=runif,distpar=c(0,1),m=0.5,
s=1/sqrt(12),
n=c(1,3,10,30),N=1000) {
for(i in n){
if(length(distpar)==2){
x<-matrix(r(i*N,distpar[1],distpar[2]),nc=i)
}
else {
x<-matrix(r(i*N,distpar),nc=i)
}
x<-(apply(x,1,sum)-i*m)/(sqrt(i)*s)
hist(x,col='light blue',probability=T,main=paste("n=",i),
ylim=c(0,max(.4,density(x)$y)))
lines(density(x),col='red',lwd=3)
curve(dnorm(x),col='blue',lwd=3,lty=3,add=T)
if(N>100){
rug(sample(x,100))
}
else{
rug(x)
}
}
}
op<-par(mfrow=c(2,2))
limite.central(rbinom,distpar=c(10,0.1),m=1,s=0.9,N=100)
par(op)
####正态分布的概率函数图形####
par(mfrow=c(1,1))
curve(dnorm(x,0,1),xlim = c(-5,5),ylim=c(0,.8),
col='red',lwd=2,lty=3)
curve(dnorm(x,0,2),add=T,col='blue',lwd=2,lty=2)
curve(dnorm(x,0,.5),add=T,lwd=2,lty=1)
title(main='Gaussian distributions')
legend(par('usr')[2],par('usr')[4],xjust=1,
c('Sigma=1','Sigma=2','Sigma=0.5'),
lwd=c(2,2,2),lty=c(3,2,1),
col=c('red','blue',par("fg")))
####直方图####
hist(x, breaks = "Sturges",
freq = NULL, probability = !freq,
include.lowest = TRUE, right = TRUE, fuzz = 1e-7,
density = NULL, angle = 45, col = "lightgray", border = NULL,
main = paste("Histogram of" , xname),
xlim = range(breaks), ylim = NULL,
xlab = xname, ylab,
axes = TRUE, plot = TRUE, labels = FALSE,
nclass = NULL, warn.unused = TRUE, ...)
####核密度估计####
density(x, bw = "nrd0", adjust = 1,
kernel = c("gaussian", "epanechnikov", "rectangular",
"triangular", "biweight",
"cosine", "optcosine"),
weights = NULL, window = kernel, width,
give.Rkern = FALSE, subdensity = FALSE,
n = 512, from, to, cut = 3, na.rm = FALSE, ...)
####茎叶图####
stem(x, scale = 1, width = 80, atom = 1e-08)
####箱线图####
boxplot(formula, data = NULL, ..., subset, na.action = NULL,
xlab = mklab(y_var = horizontal),
ylab = mklab(y_var =!horizontal),
add = FALSE, ann = !add, horizontal = FALSE,
drop = FALSE, sep = ".", lex.order = FALSE)
#formula指箱线图的作图规则,y~grp指y根据grp分类
boxplot(x, ..., range = 1.5, width = NULL, varwidth = FALSE,
notch = FALSE, outline = TRUE, names, plot = TRUE,
border = par("fg"), col = "lightgray", log = "",
pars = list(boxwex = 0.8, staplewex = 0.5, outwex = 0.5),
ann = !add, horizontal = FALSE, add = FALSE, at = NULL)
####正态性检验####
{
####QQ图####
qqnorm(y, ylim, main = "Normal Q-Q Plot",
xlab = "Theoretical Quantiles", ylab = "Sample Quantiles",
plot.it = TRUE, datax = FALSE, ...)
qqline(y, datax = FALSE, distribution = qnorm,
probs = c(0.25, 0.75), qtype = 7, ...)
qqplot(x, y, plot.it = TRUE,
xlab = deparse1(substitute(x)),
ylab = deparse1(substitute(y)), ...)
####与正态密度函数比较####
hist()
curve()
lines()
####使用经验分布函数####
data<-rnorm(100)
x<-sort(data)
n<-length(x)
y<-(1:n)/n
m<-mean(x)
s<-sd(x)
plot(x,y,type='s',main=".")
curve(pnorm(x,m,s),col='red',lwd=2,add=T)
}
####多组数据描述性统计####
#散点图+箱线图
library(DAAG)
data("cars")
op<-par()
layout(matrix(c(2,1,0,3),2,2,byrow = T),c(1,6),c(4,1))
#matrix(绘图顺序),区域宽度1:6,4:1
par(mar=c(1,1,5,2))#下、左、上、右
plot(cars$dist~cars$speed,xlab='',ylab='',las=1)
rug(side=1,jitter(cars$speed,5))
rug(side=2,jitter(cars$dist,20))#jitter()在数据上加一个小小的扰动
title(main="cars data")
par(mar=c(1,2,5,1))
boxplot(cars$dist,axes=F)
title(ylab='Stopping distance(ft)',line=0)
par(mar=c(5,1,1,2))
boxplot(cars$speed,horizontal = T,axes=F)
title(xlab='Speed(mph)',line=1)
par(op)
####等高线图####
library(MASS)
z<-kdeed(x,y)#估计二维数据的密度函数
contour(z,col='red',drawlabels=F,main='')#绘制等高线图
####三维透视图####
persq(z,main='')
####分组数据的图形概括####
histogram(x,
data,
allow.multiple, outer = TRUE,
auto.key = FALSE,
aspect = "fill",
panel = lattice.getOption("panel.histogram"),
prepanel, scales, strip, groups,
xlab, xlim, ylab, ylim,
type = c("percent", "count", "density"),
nint = if (is.factor(x)) nlevels(x)
else round(log2(length(x)) + 1),
endpoints = extend.limits(range(as.numeric(x),
finite = TRUE), prop = 0.04),
breaks,
equal.widths = TRUE,
drop.unused.levels =
lattice.getOption("drop.unused.levels"),
...,
lattice.options = NULL,
default.scales = list(),
default.prepanel =
lattice.getOption("prepanel.default.histogram"),
subscripts,
subset)
####条形图####
stripchart(x, method = "overplot", jitter = 0.1, offset = 1/3,
vertical = FALSE, group.names, add = FALSE,
at = NULL, xlim = NULL, ylim = NULL,
ylab = NULL, xlab = NULL, dlab = "", glab = "",
log = "", pch = 0, col = par("fg"), cex = par("cex"),
axes = TRUE, frame.plot = axes, ...)
####密度曲线图####
library(lattice)
densityplot(x,
data,
allow.multiple = is.null(groups) || outer,
outer = !is.null(groups),
auto.key = FALSE,
aspect = "fill",
panel = lattice.getOption("panel.densityplot"),
prepanel, scales, strip, groups, weights,
xlab, xlim, ylab, ylim,
bw, adjust, kernel, window, width, give.Rkern,
n = 512, from, to, cut, na.rm,
drop.unused.levels =
lattice.getOption("drop.unused.levels"),
...,
lattice.options = NULL,
default.scales = list(),
default.prepanel =
lattice.getOption("prepanel.default.densityplot"),
subscripts,
subset)
####分类数据的描述性统计分析####
Eye.Hair<-matrix(c(68,20,15,5,119,84,54,29,26,17,14,14,7,94,10,16),
nrow=4,byrow = T)
colnames(Eye.Hair)<-c("Brown","Blue","Hazel","Green")
rownames(Eye.Hair)<-c("Black","Brown","Red","Blond")
Eye.Hair
#全局相对频率列表
round(Eye.Hair/sum(Eye.Hair),digits = 3)
####列联表的图形描述####
data("HairEyeColor")
a<-as.table(apply(HairEyeColor,c(1,2),sum))
barplot(a,legend.text=attr(a,"dimnames")$Hair,beside = T)
#beside=F叠加柱状图
barplot(a,legend.text=attr(a,"dimnames")$Hair)
dotchart(Eye.Hair)
《R语言与统计分析》-探索性数据分析
最新推荐文章于 2024-03-09 18:06:58 发布
这篇博客探讨了多种统计图形的绘制和使用,包括直方图、核密度估计、箱线图、QQ图等,用于数据分布的可视化和正态性检验。此外,还展示了如何通过R语言实现这些图形,并通过实例演示了正态分布的概率函数和多组数据的描述性统计分析。
摘要由CSDN通过智能技术生成