data=as.matrix(oadata)
colnames(data)=c('y','x1','x2','x17',paste("x",3:16,sep=""))
# library(DoE.base)
# oa.design(ID=L9.3.4)
# oa.design(nfactors=6, nlevels=3)
#leave-one-out验证
library(caret)
#r1=matrix(0,5,1)
index<-createDataPartition(data[,1], time=1, p=0.5, list=F)
train<-data[index, ]
test<-data[-index, ]
print(nrow(train))
print(nrow(test))
colnames(train)=c('y','x1','x2','x17',paste("x",3:16,sep=""))
colnames(test)=c('y','x1','x2','x17',paste("x",3:16,sep=""))
#assign(paste("train",k,sep=""),data[index, ])
#assign(paste("test",k,sep=""),data[-index, ])
a.lm = lm(y~0+x1+x2+x17+as.factor(x3)+as.factor(x4)+as.factor(x5)+as.factor(x6)+as.factor(x7)+as.factor(x8)
+as.factor(x9)+as.factor(x10)+as.factor(x11)+as.factor(x12)+as.factor(x13)+as.factor(x14)
+as.factor(x15)+as.factor(x16),data=data.frame(train)) #anova模型
summary(a.lm)
# ytest=predict(a.lm,data.frame(test[,2:18]))
# resi1=abs(ytest-test[,1])/test[,1]
# print(mean(resi1))
anova(a.lm)
aov(a.lm)
#assign(paste("beta",k,sep=""),data.matrix(coef(a.lm))) #系数
ytest=predict(a.lm,data.frame(test[,2:18]))
#assign(paste("ytest",k,sep=""),ytest)
resi1=abs(ytest-test[,1])/test[,1]
r1[k]=mean(resi1) #误差
print(mean(abs(predict(a.lm,data.frame(test[,2:18]))-test[,1])/test[,1]))
#交叉验证
a_c.lm = lm(y~0+x1+x2+x17+as.factor(x3)+as.factor(x4)+as.factor(x5)+as.factor(x6)+as.factor(x7)+as.factor(x8)
+as.factor(x9)+as.factor(x10)+as.factor(x11)+as.factor(x12)+as.factor(x13)+as.factor(x14)
+as.factor(x15)+as.factor(x16),data=data.frame(test))
#summary(a_c.lm)
# ytrain=predict(a_c.lm,data.frame(train[,2:18]))
# resi1_c=abs(ytrain-train[,1])/train[,1]
# print(mean(r1_c))
#anova(a_c.lm)
print(mean(abs(predict(a_7c.lm,data.frame(train[,2:18]))-train[,1])/train[,1]))
#去掉p值大的x7
a_7.lm = lm(y~0+x1+x2+x17+as.factor(x3)+as.factor(x4)+as.factor(x5)+as.factor(x6)+as.factor(x8)
+as.factor(x9)+as.factor(x10)+as.factor(x11)+as.factor(x12)+as.factor(x13)+as.factor(x14)
+as.factor(x15)+as.factor(x16),data=data.frame(data)) #anova模型
#summary(a_7.lm)
#anova(a_7.lm)
print(mean(abs(predict(a_7.lm,data.frame(test[,2:18]))-test[,1])/test[,1]))
#交叉验证
a_7c.lm = lm(y~0+x1+x2+x17+as.factor(x3)+as.factor(x4)+as.factor(x5)+as.factor(x6)+as.factor(x8)
+as.factor(x9)+as.factor(x10)+as.factor(x11)+as.factor(x12)+as.factor(x13)+as.factor(x14)
+as.factor(x15)+as.factor(x16),data=data.frame(test))
#summary(a_7c.lm)
#anova(a_7c.lm)
print(mean(abs(predict(a_7c.lm,data.frame(train[,2:18]))-train[,1])/train[,1]))
anova(a.lm,a_7.lm)
anova(a_c.lm,a_7c.lm)
attach(data)
#正态性检验
shapiro.test(y[x3==0])
shapiro.test(y[x3==1])
shapiro.test(y[x4==0])
shapiro.test(y[x4==1])
shapiro.test(y[x4==2])
shapiro.test(y[x4==3])
shapiro.test(y[x4==4])
shapiro.test(y[x4==5])
shapiro.test(y[x4==6])
#方差齐性检验
#attach(data)
bartlett.test(y~x3,data=data)#不满足
bartlett.test(y~x4,data=data)#不满足
bartlett.test(y~x5,data=data)
bartlett.test(y~x6,data=data)#不满足
bartlett.test(y~x7,data=data)#不满足
bartlett.test(y~x8,data=data)#不满足
bartlett.test(y~x9,data=data)#不满足
bartlett.test(y~x10,data=data)#不满足
bartlett.test(y~x11,data=data)#不满足
bartlett.test(y~x12,data=data)#不满足
bartlett.test(y~x13,data=data)
bartlett.test(y~x14,data=data)#不满足
bartlett.test(y~x15,data=data)#不满足
bartlett.test(y~x16,data=data)#不满足
kruskal.test(a.lm, data=data)
TukeyHSD(aov(y~as.factor(x3)+as.factor(x4)+as.factor(x5)+as.factor(x6)+as.factor(x8)
+as.factor(x9)+as.factor(x10)+as.factor(x11)+as.factor(x12)+as.factor(x13)+as.factor(x14)
+as.factor(x15)+as.factor(x16),data=data.frame(test)))#多重比较,比较差异
friedman.test(data)
cor(data[,5:18])
scatterplotMatrix(data[,5:18],spread=FALSE,lty.smooth=2,main="Scatter Plot Matrix")
#box-cox变换
summary(powerTransform(y))
#bootstrap
install.packages("bootstrap")
library(bootstrap)
shrinkage<-function(fit,k=10){
require(bootstrap)
theta.fit<-function(x,y){lm(x,y)}
theta.predict<-function(fit,x){predict(fit,data.frame(data))}
x<-fit$model[,2:ncol(fit$model)]
y<-fit$model[,1]
results<-crossval(x,y,theta.fit,theta.predict,ngroup=k)
r2<-cor(y,fit$fitted.values)^2
r2cv<-cor(y,results$cv.fit)^2
cat("Original R-square=",r2,"\n")
cat(k,"Fold Cross-Validated R-square=",r2cv,"\n")
cat("Change=",r2-r2cv,"\n")
}
fit<-lm(y~0+x1+x2+x17+as.factor(x3)+as.factor(x4)+as.factor(x5)+as.factor(x6)+as.factor(x8)
+as.factor(x9)+as.factor(x10)+as.factor(x11)+as.factor(x12)+as.factor(x13)+as.factor(x14)
+as.factor(x15)+as.factor(x16),data=data.frame(data))
shrinkage(fit)
#相对重要性
zstates<-as.data.frame(scale(states))
zfit<-lm(Murder~Population+Income+Illiteracy+Frost,data=zstates)
coef(zfit)
#相对权重
relweights<-function(fit,……){
R<-cor(fit$model)
nvar<-ncol(R)
rxx<-R[2:nvar,2:nvar]
rxy<-R[2:nvar,1]
svd<-eigen(rxx)
evec<-svd$vectors
ev<-svd$values
delta<-diag(sqrt(ev))
lambda<-evec%*%delta%*%t(evec)
lambdasq<-lambda^2
beta<-solve(lambda)%*%rxy
rsqrare<-colSums(beta^2)
rawwgt<-lambdasq%*%beta^2
import<-(rawwgt/rsquare)*100
lbls<-names(fit$model[2:nvar])
rownames(import)<-lbls
colnames(import)<-"Weight"
barplot(t(import),names.arg=lbls,
ylab="% of R-Square",
xlab="Predictor Variables",
main="Relative Importance of Predictor Variables",
sub=paste("R-Square=",round(rsquare,digits=3)),……)
return(import)
}
fit<-lm(Murder~Population+Illiteracy+Income+Frost,data=states)
relweights(fit,col="lightgrey")