应用回归及分类——基于R 第二章 经典线性回归——交叉验证比较模型

应用回归及分类——基于R
第二章 经典线性回归——交叉验证比较模型

########第二章########
###2.4.1线性模型###
Puromycin<-read.csv("F:/R/应用回归分析/数据和代码/PureData/Puromycin.csv")
#y=b0+b1x+c
a1=lm(rate ~ conc,Puromycin)
summary(a1)
shapiro.test(a1$res)#对残差的shapiro正态性检验,原假设:正态性

#加上定性变量state:y=b0+b1x+ai+c,ai代表state两个水平的效应
a2=lm(rate ~ .,Puromycin)
summary(a2)
shapiro.test(a2$res)
anova(a2)#定性变量要用方差分析检验

#加入交叉效应:y=b0+(b1+ri)x+ai+c,ri(i=1,2)代表state两个水平对x斜率的增量
a3=lm(rate ~ conc * state,Puromycin)
summary(a3)
shapiro.test(a3$res)
anova(a3)

#对conc做对数变换:y=b0+b1ln(x)+c
a4=lm(rate ~ log(conc),Puromycin)
summary(a4)
shapiro.test(a4$res)

#对a4加上定性变量state:y=b0+b1ln(x)+ai+c
a5=lm(rate ~ log(conc) + state,Puromycin)
summary(a5)
shapiro.test(a5$res)
anova(a5)

#再加入交叉效应:y=b0+(b1+ri)ln(x)+ai+c
a6=lm(rate ~ log(conc) * state,Puromycin)
summary(a6)
shapiro.test(a6$res)
anova(a6)



#######2.4.4交叉验证######
###划分数据集###
Fold=function(Z=5,w,D,seed=7777){ 
  n=nrow(w);d=1:n;dd=list() 
  e=levels(w[,D]);T=length(e)#因变量T类
  set.seed(seed)
  for(i in 1:T){
    d0=d[w[,D]==e[i]];j=length(d0)
    ZT=rep(1:Z,ceiling(j/Z))[1:j]
    id=cbind(sample(ZT,length(ZT)),d0);dd[[i]]=id}
  #上面每个dd[[i]]是随机1:Z及i类的下标集组成的矩阵
  mm=list()
  for(i in 1:Z){u=NULL;
  for(j in 1:T)u=c(u,dd[[j]][dd[[j]][,1]==i,2])
  mm[[i]]=u} #mm[[i]]为第i个下标集i=1,...,Z
  return(mm)}#输出Z个下标集

###创造6个模型的回归公式###
w=Puromycin; D=2; FML=list();J=1
FML[[J]]=as.formula(paste(names(w)[D],"~",names(w)[1]))
J=J+1; FML[[J]]=as.formula(paste(names(w)[D],"~."))
J=J+1;FML[[J]]=as.formula(paste(names(w)[D],"~",names(w)[1],
                                "*",names(w)[3]))
J=J+1; 
FML[[J]]=as.formula(paste(names(w)[D],"~log(",names(w)[1],")"))
J=J+1;FML[[J]]=as.formula(paste(names(w)[D],
                                "~log(",names(w)[1],")+",names(w)[3]))
J=J+1; FML[[J]]=as.formula(paste(names(w)[D],
                                 "~log(",names(w)[1],")*",names(w)[3]))

###具体的1000次5折交叉验证,每次的随机种子完全随机产生(不重复)###
JJ=6;D=2;Z=5;WW=NULL;N=1000
set.seed(1010);Seed=sample(1:100000,N)
for(k in 1:N){
  mm=Fold(Z=5,w,3,Seed[N])
  E=matrix(-99,Z,JJ)
  for(J in 1:JJ){for(i in 1:Z){
    m=mm[[i]];M=mean((w[m,D]-mean(w[m,D]))^2)
    a0=lm(FML[[J]],data=w[-m,])
    pa=predict(a0,w[m,])
    E[i,J]=mean((w[m,D]-pa)^2)/M}}
  WW=rbind(WW,E)}
(ZZ=apply(WW,2,mean))#最后输出的6个平均NMSE


######2.5######
w<-read.csv("F:/R/应用回归分析/数据和代码/PureData/Concrete.csv")
plot(w)

#线性回归,不考虑交互效应
a=lm(Compressive.strength~.,w)
shapiro.test(a$res)
#作残差的正态QQ图和残差-拟合值图
par(mfrow=c(1,2))
qqnorm(a$res);qqline(a$res)
plot(a$res~a$fit);abline(h=0)

summary(a)

#################和其他方法的交叉验证比较####################
####CV函数随机把数据的下标分成Z份以做交叉验证时用
CV=function(n,Z=10,seed=888){
  z=rep(1:Z,ceiling(n/Z))[1:n]
  set.seed(seed);z=sample(z,n)
  mm=list();for (i in 1:Z) mm[[i]]=(1:n)[z==i]
  return(mm)}
##################################################
#install.packages("rpart.plot")
#install.packages("ipred")
#install.packages("mboost")
#install.packages("randomForest")
#install.packages("kernlab")
#install.packages("e1071")
#install.packages("nnet")
#install.packages("neuralnet")
library(rpart.plot)
library(ipred)
library(mboost)
library(randomForest)
library(kernlab)
library(e1071)
library(nnet)
library(neuralnet)
D=9
Z=10
mm=CV(nrow(w),Z)
gg=paste(names(w)[D],"~",".",sep="")#gg=(Ozone~.)
(gg=as.formula(gg))

zy=(1:ncol(w))[-D]
gg1=paste(names(w)[D],"~btree(",names(w)[zy[1]],")",sep="")
for(i in (1:ncol(w))[-D][-1])gg1=paste(gg1,
                                       "+btree(",names(w)[i],")",sep="")
gg1=as.formula(gg1)

MSE=matrix(0,Z,6);J=1
set.seed(1010);for(i in 1:Z)
{m=mm[[i]];M=mean((w[m,D]-mean(w[m,D]))^2)
a=mboost(gg1,data =w[-m,])
MSE[i,J]=mean((w[m,D]-predict(a,w[m,]))^2)/M}
J=J+1;set.seed(1010);for(i in 1:Z)
{m=mm[[i]];M=mean((w[m,D]-mean(w[m,D]))^2)
a=bagging(gg,data =w[-m,])
MSE[i,J]=mean((w[m,D]-predict(a,w[m,]))^2)/M} 
J=J+1;set.seed(1010);for(i in 1:Z)
{m=mm[[i]];M=mean((w[m,D]-mean(w[m,D]))^2)
a=randomForest(gg,data=w[-m,])
MSE[i,J]=mean((w[m,D]-predict(a,w[m,]))^2)/M }
J=J+1;for(i in 1:Z)
{m=mm[[i]];M=mean((w[m,D]-mean(w[m,D]))^2)
a=ksvm(gg,w[-m,],model="svm")
MSE[i,J]=mean((w[m,D]-predict(a,w[m,]))^2)/M }
J=J+1;set.seed(1010);for(i in 1:Z){
  m=mm[[i]];M=mean((w[m,D]-mean(w[m,D]))^2)
  a=nnet(w[-m,D]/max(w[,D]) ~ ., data=w[-m,], size=6, decay=0.1) 
  MSE[i,J]=mean((w[m,D]-predict(a,w[m,])*max(w[,D]))^2)/M}
J=J+1;for(i in 1:Z)
{m=mm[[i]];M=mean((w[m,D]-mean(w[m,D]))^2)
a=lm(gg,w[-m,])#线性回归
MSE[i,J]=mean((w[m,D]-predict(a,w[m,]))^2)/M}

MSE=data.frame(MSE)
names(MSE)=c("adaboost","bagging","RF","SVM","NNET","lm")
(NMSE=apply(MSE,2,mean));MSE

####作图
#绘制折线图
attach(MSE)
plot(adaboost,type = "b",main='10-fold cross validation',xlab='Fold',ylab='MSE',ylim=c(0,0.5))
lines(bagging,type = "o",col='blue')
lines(RF,type = "l",col='green')
lines(SVM,type = "b",pch = 2,col='red')
lines(NNET,type = "b",pch = 3,col='yellow')
lines(lm,type = "b",pch = 4,col='black')
library(Hmisc)
legend("topleft",c('adaboost','bagging','RF','SVM','NNET','lm'),pch=c(1, 1, 1,2,3,4),
       lty=c(1,1,1,1,1,1),col=c("black","blue",'green',"red",'yellow','black'))
detach(MSE)
#绘制柱状图
barplot(NMSE)
在这里插入代码片
  • 0
    点赞
  • 12
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值