covdesign2.R-20170829

data=as.matrix(oadata)
colnames(data)=c('y','x1','x2','x17',paste("x",3:16,sep=""))

# library(DoE.base)
# oa.design(ID=L9.3.4)
# oa.design(nfactors=6, nlevels=3)

#leave-one-out验证
library(caret) 
#r1=matrix(0,5,1)
index<-createDataPartition(data[,1], time=1, p=0.5, list=F)
train<-data[index, ]
test<-data[-index, ]
print(nrow(train))
print(nrow(test))
colnames(train)=c('y','x1','x2','x17',paste("x",3:16,sep=""))
colnames(test)=c('y','x1','x2','x17',paste("x",3:16,sep=""))
#assign(paste("train",k,sep=""),data[index, ])
#assign(paste("test",k,sep=""),data[-index, ])

a.lm = lm(y~0+x1+x2+x17+as.factor(x3)+as.factor(x4)+as.factor(x5)+as.factor(x6)+as.factor(x7)+as.factor(x8)
          +as.factor(x9)+as.factor(x10)+as.factor(x11)+as.factor(x12)+as.factor(x13)+as.factor(x14)
          +as.factor(x15)+as.factor(x16),data=data.frame(train)) #anova模型
summary(a.lm)
# ytest=predict(a.lm,data.frame(test[,2:18]))
# resi1=abs(ytest-test[,1])/test[,1]
# print(mean(resi1))
anova(a.lm)
aov(a.lm)
#assign(paste("beta",k,sep=""),data.matrix(coef(a.lm))) #系数
ytest=predict(a.lm,data.frame(test[,2:18]))
#assign(paste("ytest",k,sep=""),ytest)
resi1=abs(ytest-test[,1])/test[,1]
r1[k]=mean(resi1) #误差
print(mean(abs(predict(a.lm,data.frame(test[,2:18]))-test[,1])/test[,1]))
#交叉验证
a_c.lm = lm(y~0+x1+x2+x17+as.factor(x3)+as.factor(x4)+as.factor(x5)+as.factor(x6)+as.factor(x7)+as.factor(x8)
           +as.factor(x9)+as.factor(x10)+as.factor(x11)+as.factor(x12)+as.factor(x13)+as.factor(x14)
           +as.factor(x15)+as.factor(x16),data=data.frame(test))
#summary(a_c.lm)
# ytrain=predict(a_c.lm,data.frame(train[,2:18]))
# resi1_c=abs(ytrain-train[,1])/train[,1]
# print(mean(r1_c))
#anova(a_c.lm)
print(mean(abs(predict(a_7c.lm,data.frame(train[,2:18]))-train[,1])/train[,1]))

#去掉p值大的x7
a_7.lm = lm(y~0+x1+x2+x17+as.factor(x3)+as.factor(x4)+as.factor(x5)+as.factor(x6)+as.factor(x8)
          +as.factor(x9)+as.factor(x10)+as.factor(x11)+as.factor(x12)+as.factor(x13)+as.factor(x14)
          +as.factor(x15)+as.factor(x16),data=data.frame(data)) #anova模型
#summary(a_7.lm)
#anova(a_7.lm)
print(mean(abs(predict(a_7.lm,data.frame(test[,2:18]))-test[,1])/test[,1]))
#交叉验证
a_7c.lm = lm(y~0+x1+x2+x17+as.factor(x3)+as.factor(x4)+as.factor(x5)+as.factor(x6)+as.factor(x8)
           +as.factor(x9)+as.factor(x10)+as.factor(x11)+as.factor(x12)+as.factor(x13)+as.factor(x14)
           +as.factor(x15)+as.factor(x16),data=data.frame(test))
#summary(a_7c.lm)
#anova(a_7c.lm)
print(mean(abs(predict(a_7c.lm,data.frame(train[,2:18]))-train[,1])/train[,1]))

anova(a.lm,a_7.lm)
anova(a_c.lm,a_7c.lm)

attach(data)
#正态性检验
shapiro.test(y[x3==0])
shapiro.test(y[x3==1])
shapiro.test(y[x4==0])
shapiro.test(y[x4==1])
shapiro.test(y[x4==2])
shapiro.test(y[x4==3])
shapiro.test(y[x4==4])
shapiro.test(y[x4==5])
shapiro.test(y[x4==6])
#方差齐性检验 
#attach(data)
bartlett.test(y~x3,data=data)#不满足
bartlett.test(y~x4,data=data)#不满足
bartlett.test(y~x5,data=data)
bartlett.test(y~x6,data=data)#不满足
bartlett.test(y~x7,data=data)#不满足
bartlett.test(y~x8,data=data)#不满足
bartlett.test(y~x9,data=data)#不满足
bartlett.test(y~x10,data=data)#不满足
bartlett.test(y~x11,data=data)#不满足
bartlett.test(y~x12,data=data)#不满足
bartlett.test(y~x13,data=data)
bartlett.test(y~x14,data=data)#不满足
bartlett.test(y~x15,data=data)#不满足
bartlett.test(y~x16,data=data)#不满足

kruskal.test(a.lm, data=data)

TukeyHSD(aov(y~as.factor(x3)+as.factor(x4)+as.factor(x5)+as.factor(x6)+as.factor(x8)
             +as.factor(x9)+as.factor(x10)+as.factor(x11)+as.factor(x12)+as.factor(x13)+as.factor(x14)
             +as.factor(x15)+as.factor(x16),data=data.frame(test)))#多重比较,比较差异

friedman.test(data)

cor(data[,5:18])
scatterplotMatrix(data[,5:18],spread=FALSE,lty.smooth=2,main="Scatter Plot Matrix") 
#box-cox变换
summary(powerTransform(y))

#bootstrap
install.packages("bootstrap")  
library(bootstrap)  
shrinkage<-function(fit,k=10){  
  require(bootstrap)  
  theta.fit<-function(x,y){lm(x,y)}  
  theta.predict<-function(fit,x){predict(fit,data.frame(data))}  
  x<-fit$model[,2:ncol(fit$model)]  
  y<-fit$model[,1]  
  results<-crossval(x,y,theta.fit,theta.predict,ngroup=k)  
  r2<-cor(y,fit$fitted.values)^2  
  r2cv<-cor(y,results$cv.fit)^2  
  cat("Original R-square=",r2,"\n")  
  cat(k,"Fold Cross-Validated R-square=",r2cv,"\n")  
  cat("Change=",r2-r2cv,"\n")  
}  
fit<-lm(y~0+x1+x2+x17+as.factor(x3)+as.factor(x4)+as.factor(x5)+as.factor(x6)+as.factor(x8)
        +as.factor(x9)+as.factor(x10)+as.factor(x11)+as.factor(x12)+as.factor(x13)+as.factor(x14)
        +as.factor(x15)+as.factor(x16),data=data.frame(data))  
shrinkage(fit)  

#相对重要性
zstates<-as.data.frame(scale(states))  
zfit<-lm(Murder~Population+Income+Illiteracy+Frost,data=zstates)  
coef(zfit)
#相对权重
relweights<-function(fit,……){  
R<-cor(fit$model)  
nvar<-ncol(R)  
rxx<-R[2:nvar,2:nvar]  
rxy<-R[2:nvar,1]  
svd<-eigen(rxx)  
evec<-svd$vectors  
ev<-svd$values  
delta<-diag(sqrt(ev))  
lambda<-evec%*%delta%*%t(evec)  
lambdasq<-lambda^2  
beta<-solve(lambda)%*%rxy  
rsqrare<-colSums(beta^2)  
rawwgt<-lambdasq%*%beta^2  
import<-(rawwgt/rsquare)*100  
lbls<-names(fit$model[2:nvar])  
rownames(import)<-lbls  
colnames(import)<-"Weight"  
barplot(t(import),names.arg=lbls,  
        ylab="% of R-Square",  
        xlab="Predictor Variables",  
        main="Relative Importance of Predictor Variables",  
        sub=paste("R-Square=",round(rsquare,digits=3)),……)  
return(import)  
}  
fit<-lm(Murder~Population+Illiteracy+Income+Frost,data=states)  
relweights(fit,col="lightgrey") 

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值