ESL-12-svm

直接贴出12.2.2的代码

library(MASS)



x.1 <- mvrnorm(n=10,mu=c(1,0),Sigma=diag(rep(1,2)))
y.1 <- rep(1,10)#blue
blue <- cbind(x.1,y.1)


x.2 <- mvrnorm(n=10,mu=c(0,1),Sigma=diag(rep(1,2)))
y.2 <- rep(0,10)#orange
orange <- cbind(x.2,y.2)




x.blue <- data.frame()
x.orange <- data.frame()


for(i in 1:100)
    {
        m <- sample(1:10,1)
        x.blue.temp <- mvrnorm(n=1,mu=blue[m,c(1,2)],Sigma=diag(rep(0.2,2)))
        x.blue <- rbind(x.blue,x.blue.temp)
        m.orange <- sample(1:10,1)
        x.orange.temp <- mvrnorm(n=1,mu=orange[m.orange,c(1,2)],Sigma=diag(rep(0.2,2)))
        x.orange <- rbind(x.orange,x.orange.temp)
    }
plot(x.blue[,1],x.blue[,2],col="blue",xlim=c(-2.5,2.5),ylim=c(-3,3))
points(x.orange[,1],x.orange[,2],col="orange")


x.blue.train <- data.frame(x=x.blue,y=as.factor(rep(1,100)))
x.orange.train <- data.frame(x=x.orange,y=as.factor(rep(-1,100)))
names(x.blue.train) <- c("x1","x2","y")
names(x.orange.train) <- c("x1","x2","y")
x.train <- rbind(x.blue.train ,x.orange.train )
library(e1071)


svmfit.smaller=svm(y~., data=x.train, kernel="linear", cost=0.01,scale=FALSE)
plot(svmfit.smaller , x.train)
summary(svmfit.smaller)




svmfit.bigger=svm(y~., data=x.train, kernel="linear", cost=10000,scale=FALSE)
plot(svmfit.bigger , x.train)
summary(svmfit.bigger)


tune.out=tune(svm,y~.,data=x.train,kernel="linear",ranges=list(cost=c(0.001, 0.01, 0.1, 1,10,1000,10000)))


summary(tune.out)



下面贴出表12.2的代码,结果基本和书上是一致的。

library(e1071)
x.train.1 <- matrix(rnorm(100*4,0,1),100,4)#生成训练集
x.train.2 <- matrix(rnorm(10000*4,0,1),10000,4)
y.train.2 <- rep(1,10000)
y.train.2 <- ifelse((rowSums(x.train.2^2)>9 & rowSums(x.train.2^2)<16),1,0)
x.train.2.temp <- x.train.2[which(y.train.2==1),][1:100,]
x.train.1 <- data.frame(x.train.1,as.factor(rep(1,100)))
x.train.2 <- data.frame(x.train.2.temp,as.factor(rep(-1,100)))
names(x.train.1) <- c("x1","x2","x3","x4","y")
names(x.train.2) <- c("x1","x2","x3","x4","y")
x.train <- rbind(x.train.1,x.train.2)




x.train.1.noise <- matrix(rnorm(100*10,0,1),100,10)#生成带噪音的训练集
x.train.2.noise <- matrix(rnorm(10000*10,0,1),10000,10)
y.train.2.noise <- rep(1,10000)
y.train.2.noise <- ifelse((rowSums(x.train.2.noise[,1:4]^2)>9 & rowSums(x.train.2.noise[,1:4]^2)<16),1,0)
x.train.2.temp.noise <- x.train.2.noise[which(y.train.2.noise==1),][1:100,]
x.train.1.noise <- data.frame(x.train.1.noise,as.factor(rep(1,100)))
x.train.2.noise <- data.frame(x.train.2.temp.noise,as.factor(rep(-1,100)))
names(x.train.1.noise) <- c("x1","x2","x3","x4","x5","x6","x7","x8","x9","x10","y")
names(x.train.2.noise) <- c("x1","x2","x3","x4","x5","x6","x7","x8","x9","x10","y")
x.train.noise <- rbind(x.train.1.noise,x.train.2.noise)


#test set


x.test.1 <- matrix(rnorm(1000*4,0,1),1000,4)#生成测试集
x.test.2 <- matrix(rnorm(100000*4,0,1),100000,4)
y.test.2 <- rep(1,100000)
y.test.2 <- ifelse((rowSums(x.test.2^2)>9 & rowSums(x.test.2^2)<16),1,0)
x.test.2.temp <- x.test.2[which(y.test.2==1),][1:1000,]
x.test.1 <- data.frame(x.test.1,as.factor(rep(1,1000)))
x.test.2 <- data.frame(x.test.2.temp,as.factor(rep(-1,1000)))
names(x.test.1) <- c("x1","x2","x3","x4","y")
names(x.test.2) <- c("x1","x2","x3","x4","y")
x.test <- rbind(x.test.1,x.test.2)




x.test.1.noise <- matrix(rnorm(1000*10,0,1),1000,10)#生成带噪音的测试集
x.test.2.noise <- matrix(rnorm(100000*10,0,1),100000,10)
y.test.2.noise <- rep(1,100000)
y.test.2.noise <- ifelse((rowSums(x.test.2.noise[,1:4]^2)>9 & rowSums(x.test.2.noise[,1:4]^2)<16),1,0)
x.test.2.temp.noise <- x.test.2.noise[which(y.test.2.noise==1),][1:1000,]
x.test.1.noise <- data.frame(x.test.1.noise,as.factor(rep(1,1000)))
x.test.2.noise <- data.frame(x.test.2.temp.noise,as.factor(rep(-1,1000)))
names(x.test.1.noise) <- c("x1","x2","x3","x4","x5","x6","x7","x8","x9","x10","y")
names(x.test.2.noise) <- c("x1","x2","x3","x4","x5","x6","x7","x8","x9","x10","y")
x.test.noise <- rbind(x.test.1.noise,x.test.2.noise)




tune.out=tune(svm,y~.,data=x.train,kernel="linear",ranges=list(cost=c( 0.01, 0.1, 1,10,1000,10000)))#对线性分类进行拟合并预测
summary(tune.out)# no noise treain
svmfit <- svm(y~.,data=x.train,kernel="linear",cost=1,scale=F)
y.pred <- predict(svmfit,x.test.noise)
table(y.pred ,x.test[,"y"])#0.452
tune.out.noise=tune(svm,y~.,data=x.train.noise,kernel="linear",ranges=list(cost=c( 0.01, 0.1, 1,1000,10000)))
summary(tune.out.noise)# no noise treain
svmfit <- svm(y~.,data=x.train.noise,kernel="linear",cost=1,scale=F)
y.pred.noise <- predict(svmfit,x.test.noise)
table(y.pred.noise ,x.test.noise[,"y"])#0.481




tune.out=tune(svm,y~.,data=x.train,kernel="polynomial",degree=2,ranges=list(cost=c( 0.01, 0.1, 1,10,1000)))#对2次多项式进行拟合并预测
summary(tune.out)# no noise treain
svmfit <- svm(y~.,data=x.train,kernel="polynomial",degree=2,cost=1,scale=F)
y.pred.noise <- predict(svmfit,x.test)
table(y.pred.noise ,x.test[,"y"])#0.049


tune.out.noise=tune(svm,y~.,data=x.train.noise,kernel="polynomial",degree=2,ranges=list(cost=c( 0.01, 0.1, 1,1000)))
summary(tune.out.noise)# no noise treain
svmfit <- svm(y~.,data=x.train.noise,kernel="polynomial",degree=2,cost=1,scale=F)
y.pred.noise <- predict(svmfit,x.test.noise)
table(y.pred.noise ,x.test.noise[,"y"])#0.152


tune.out=tune(svm,y~.,data=x.train,kernel="polynomial",degree=5,ranges=list(cost=c( 0.01, 0.1, 1,10,1000,10000)))#对5次多项式进行拟合并预测
summary(tune.out)# no noise treain
svmfit <- svm(y~.,data=x.train,kernel="polynomial",degree=5,cost=1000,scale=F)
y.pred <- predict(svmfit,x.test)
table(y.pred ,x.test[,"y"])#0.329
tune.out.noise=tune(svm,y~.,data=x.train.noise,kernel="polynomial",degree=5,ranges=list(cost=c( 0.01, 0.1, 1,1000,10000)))
summary(tune.out.noise)# no noise treain
svmfit <- svm(y~.,data=x.train.noise,kernel="polynomial",degree=5,cost=1000,scale=F)
y.pred.noise <- predict(svmfit,x.test.noise)
table(y.pred.noise ,x.test.noise[,"y"])#0.409


tune.out=tune(svm,y~.,data=x.train,kernel="polynomial",degree=10,ranges=list(cost=c( 0.01, 0.1, 1,10,1000)))#对10次多项式进行拟合并预测
summary(tune.out)# no noise treain
svmfit <- svm(y~.,data=x.train,kernel="polynomial",degree=10,cost=1,scale=F)
y.pred <- predict(svmfit,x.test)
table(y.pred ,x.test[,"y"])#0.1225
tune.out.noise=tune(svm,y~.,data=x.train.noise,kernel="polynomial",degree=10,ranges=list(cost=c( 0.01, 0.1, 1,1000,10000)))
summary(tune.out.noise)# no noise treain
svmfit <- svm(y~.,data=x.train.noise,kernel="polynomial",degree=10,cost=1000,scale=F)
y.pred.noise <- predict(svmfit,x.test.noise)
table(y.pred.noise ,x.test.noise[,"y"])#0.3585


library(mda)#下面分别用bruto方法和mars算法预测
x.bruto <- bruto(x.train[,-5],x.train[,5])
y.pred <- predict(x.bruto,newdata=as.matrix(x.test[,-5]),type = "fitted")
y.pred.class <- ifelse(y.pred>0,1,-1)
table(y.pred.class,x.test[,5])#132/2000=0.066


x.bruto.noise <- bruto(x.train.noise[,-11],x.train.noise[,11])
y.pred.noise <- predict(x.bruto.noise,newdata=as.matrix(x.test.noise[,-11]),type = "fitted")
y.pred.class.noise <- ifelse(y.pred.noise>0,1,-1)
table(y.pred.class.noise,x.test.noise[,11])#132/2000=0.058


x.mars <- mars(x.train[,-5],x.train[,5])
y.pred <- predict(x.mars,newdata=as.matrix(x.test[,-5]),type = "fitted")
y.pred.class <- ifelse(y.pred>0,1,-1)
table(y.pred.class,x.test[,5])#0.0755


x.mars.noise <- mars(x.train.noise[,-11],x.train.noise[,11])
y.pred.noise <- predict(x.mars.noise,newdata=as.matrix(x.test.noise[,-11]),type = "fitted")
y.pred.class.noise <- ifelse(y.pred.noise>0,1,-1)
table(y.pred.class.noise,x.test.noise[,11])#0.0995

结果表明mars和bruto有较强的变量选择能力,尤其是bruto基本不受噪音变量的影响。预测结果比训练结果还低。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值