********************预测海藻数量R语言脚本************************
---加载数据包
library(DMwR)
head(algae)
---对于数据给出行名称
algae=read.table("Analysis.txt",
header=F,
dec='.',
col.names=c('season','size','speed','mxPH','mno2','cl','no3','nh4','opo4','po4','chla','a1','a2','a3','a4','a5','a6','a7'),
na.strings=c('XXXXXXX'))
---绘制PH直方图
hist(algae$mxPH,prob=T)
---绘制PH直方图加密度图,用QQ图查看数据是否符合正态分布
library(car)
par(mfrow=c(1,2))
hist(algae$mxPH,prob=T,xlab='',main='Histogram of maximum ph value',ylim=0:1)
lines(density(algae$mxPH,na.rm=T))
rug(jitter(algae$mxPH))
qq.plot(algae$mxPH,main='Normal QQ Plot of maximum PH')
par(mfrow=c(1,1))
---绘制opo4箱线图
boxplot(algae$opo4,ylab="orthophosphate (opo4)")
rug(jitter(algae$opo4),side=2)
abline(h=mean(algae$opo4,na.rm=T),lty=2)
---离群值的检测,三条线分别表示均值,均值加标准差,中位数
plot(algae$nh4,xlab="")
abline(h=mean(algae$nh4,na.rm=T),lty=1)
abline(h=mean(algae$nh4,na.rm=T)+sd(algae$nh4,na.rm=T),lty=2)
abline(h=median(algae$nh4,na.rm=T),lty=3)
identify(algae$nh4)
---离群值的检测
plot(algae$nh4,xlab="")
clicked.lines=identify(algae$nh4)
algae[clicked.lines, ]
algae[algae$nh4.line>19000, ]
---因子变量绘制lattice箱线图(在规模较小的河流中,a1的频率较高)
library(lattice)
bwplot(size~a1,data=algae,ylab='Rive Size',xlab='Algal A1')
---分位箱线图
library(Hmisc)
bwplot(size~a1,data=algae,panel=panel.bpplot,
probs=seq(.01,.49,by=.01),datadensity=TRUE,
yalb='River Size',xlab='Algal A1'
)
---两个条件的影响绘图