《R语言与统计分析》-探索性数据分析

这篇博客探讨了多种统计图形的绘制和使用,包括直方图、核密度估计、箱线图、QQ图等,用于数据分布的可视化和正态性检验。此外,还展示了如何通过R语言实现这些图形,并通过实例演示了正态分布的概率函数和多组数据的描述性统计分析。
摘要由CSDN通过智能技术生成
####渐近正态性的图形检验####
limite.central<- function(r=runif,distpar=c(0,1),m=0.5,
                          s=1/sqrt(12),
                          n=c(1,3,10,30),N=1000) {
  for(i in n){
    if(length(distpar)==2){
      x<-matrix(r(i*N,distpar[1],distpar[2]),nc=i)
    }
    else {
      x<-matrix(r(i*N,distpar),nc=i)
    }
    x<-(apply(x,1,sum)-i*m)/(sqrt(i)*s)
    hist(x,col='light blue',probability=T,main=paste("n=",i),
         ylim=c(0,max(.4,density(x)$y)))
    lines(density(x),col='red',lwd=3)
    curve(dnorm(x),col='blue',lwd=3,lty=3,add=T)
    if(N>100){
      rug(sample(x,100))
    }
    else{
      rug(x)
    }
  }
}

op<-par(mfrow=c(2,2))
limite.central(rbinom,distpar=c(10,0.1),m=1,s=0.9,N=100)
par(op)

####正态分布的概率函数图形####
par(mfrow=c(1,1))
curve(dnorm(x,0,1),xlim = c(-5,5),ylim=c(0,.8),
      col='red',lwd=2,lty=3)
curve(dnorm(x,0,2),add=T,col='blue',lwd=2,lty=2)
curve(dnorm(x,0,.5),add=T,lwd=2,lty=1)
title(main='Gaussian distributions')
legend(par('usr')[2],par('usr')[4],xjust=1,
       c('Sigma=1','Sigma=2','Sigma=0.5'),
       lwd=c(2,2,2),lty=c(3,2,1),
       col=c('red','blue',par("fg")))

####直方图####
hist(x, breaks = "Sturges",
     freq = NULL, probability = !freq,
     include.lowest = TRUE, right = TRUE, fuzz = 1e-7,
     density = NULL, angle = 45, col = "lightgray", border = NULL,
     main = paste("Histogram of" , xname),
     xlim = range(breaks), ylim = NULL,
     xlab = xname, ylab,
     axes = TRUE, plot = TRUE, labels = FALSE,
     nclass = NULL, warn.unused = TRUE, ...)

####核密度估计####
density(x, bw = "nrd0", adjust = 1,
        kernel = c("gaussian", "epanechnikov", "rectangular",
                   "triangular", "biweight",
                   "cosine", "optcosine"),
        weights = NULL, window = kernel, width,
        give.Rkern = FALSE, subdensity = FALSE,
        n = 512, from, to, cut = 3, na.rm = FALSE, ...)

####茎叶图####
stem(x, scale = 1, width = 80, atom = 1e-08)

####箱线图####
boxplot(formula, data = NULL, ..., subset, na.action = NULL,
        xlab = mklab(y_var = horizontal),
        ylab = mklab(y_var =!horizontal),
        add = FALSE, ann = !add, horizontal = FALSE,
        drop = FALSE, sep = ".", lex.order = FALSE)
#formula指箱线图的作图规则,y~grp指y根据grp分类
boxplot(x, ..., range = 1.5, width = NULL, varwidth = FALSE,
        notch = FALSE, outline = TRUE, names, plot = TRUE,
        border = par("fg"), col = "lightgray", log = "",
        pars = list(boxwex = 0.8, staplewex = 0.5, outwex = 0.5),
        ann = !add, horizontal = FALSE, add = FALSE, at = NULL)

####正态性检验####
{
  ####QQ图####
  qqnorm(y, ylim, main = "Normal Q-Q Plot",
         xlab = "Theoretical Quantiles", ylab = "Sample Quantiles",
         plot.it = TRUE, datax = FALSE, ...)
  
  qqline(y, datax = FALSE, distribution = qnorm,
         probs = c(0.25, 0.75), qtype = 7, ...)
  
  qqplot(x, y, plot.it = TRUE,
         xlab = deparse1(substitute(x)),
         ylab = deparse1(substitute(y)), ...)
  
  ####与正态密度函数比较####
  hist()
  curve()
  lines()
  
  ####使用经验分布函数####
  data<-rnorm(100)
  x<-sort(data)
  n<-length(x)
  y<-(1:n)/n
  m<-mean(x)
  s<-sd(x)
  plot(x,y,type='s',main=".")
  curve(pnorm(x,m,s),col='red',lwd=2,add=T)
}

####多组数据描述性统计####
#散点图+箱线图
library(DAAG)
data("cars")
op<-par()
layout(matrix(c(2,1,0,3),2,2,byrow = T),c(1,6),c(4,1))
#matrix(绘图顺序),区域宽度1:6,4:1
par(mar=c(1,1,5,2))#下、左、上、右
plot(cars$dist~cars$speed,xlab='',ylab='',las=1)
rug(side=1,jitter(cars$speed,5))
rug(side=2,jitter(cars$dist,20))#jitter()在数据上加一个小小的扰动
title(main="cars data")
par(mar=c(1,2,5,1))
boxplot(cars$dist,axes=F)
title(ylab='Stopping distance(ft)',line=0)
par(mar=c(5,1,1,2))
boxplot(cars$speed,horizontal = T,axes=F)
title(xlab='Speed(mph)',line=1)
par(op)

####等高线图####
library(MASS)
z<-kdeed(x,y)#估计二维数据的密度函数
contour(z,col='red',drawlabels=F,main='')#绘制等高线图

####三维透视图####
persq(z,main='')

####分组数据的图形概括####
histogram(x,
          data,
          allow.multiple, outer = TRUE,
          auto.key = FALSE,
          aspect = "fill",
          panel = lattice.getOption("panel.histogram"),
          prepanel, scales, strip, groups,
          xlab, xlim, ylab, ylim,
          type = c("percent", "count", "density"),
          nint = if (is.factor(x)) nlevels(x)
          else round(log2(length(x)) + 1),
          endpoints = extend.limits(range(as.numeric(x),
                                          finite = TRUE), prop = 0.04),
          breaks,
          equal.widths = TRUE,
          drop.unused.levels =
            lattice.getOption("drop.unused.levels"),
          ...,
          lattice.options = NULL,
          default.scales = list(),
          default.prepanel =
            lattice.getOption("prepanel.default.histogram"),
          subscripts,
          subset)

####条形图####
stripchart(x, method = "overplot", jitter = 0.1, offset = 1/3,
           vertical = FALSE, group.names, add = FALSE,
           at = NULL, xlim = NULL, ylim = NULL,
           ylab = NULL, xlab = NULL, dlab = "", glab = "",
           log = "", pch = 0, col = par("fg"), cex = par("cex"),
           axes = TRUE, frame.plot = axes, ...)

####密度曲线图####
library(lattice)
densityplot(x,
            data,
            allow.multiple = is.null(groups) || outer,
            outer = !is.null(groups),
            auto.key = FALSE,
            aspect = "fill",
            panel = lattice.getOption("panel.densityplot"),
            prepanel, scales, strip, groups, weights,
            xlab, xlim, ylab, ylim,
            bw, adjust, kernel, window, width, give.Rkern,
            n = 512, from, to, cut, na.rm,
            drop.unused.levels =
              lattice.getOption("drop.unused.levels"),
            ...,
            lattice.options = NULL,
            default.scales = list(),
            default.prepanel =
              lattice.getOption("prepanel.default.densityplot"),
            subscripts,
            subset)

####分类数据的描述性统计分析####
Eye.Hair<-matrix(c(68,20,15,5,119,84,54,29,26,17,14,14,7,94,10,16),
                 nrow=4,byrow = T)
colnames(Eye.Hair)<-c("Brown","Blue","Hazel","Green")
rownames(Eye.Hair)<-c("Black","Brown","Red","Blond")
Eye.Hair

#全局相对频率列表
round(Eye.Hair/sum(Eye.Hair),digits = 3)

####列联表的图形描述####
data("HairEyeColor")
a<-as.table(apply(HairEyeColor,c(1,2),sum))
barplot(a,legend.text=attr(a,"dimnames")$Hair,beside = T)
#beside=F叠加柱状图
barplot(a,legend.text=attr(a,"dimnames")$Hair)

dotchart(Eye.Hair)

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值