2017ericfinaltest

rm(list=ls())
library(Matrix)
library(glmnet)
library(survival)
library(ISLR)
library(MASS)
library(class)
library(caret)
library(gbm)
library(ISLR)
library(nnet)
library(pROC)
library(randomForest)
library(tree)
# Question 1
rm(list=ls())
M=100
set.seed(4061)
dat=iris[sample(1:nrow(iris)),]
dat[,1:4]=apply(dat[,1:4],2,scale)
itrain=sample(1:nrow(iris),M)
# (a)
# grow a classification tree from the training set using the Gini index as splitting criterion
# quote the number of terminal nodes and the misclassification error rate for the full tree
class(dat)
tree.mod=tree(Species~.,data=dat[itrain,],split='gini')
summary(tree.mod)$used
summary(tree.mod)


# (b)
# consider the lassification tree obtained in Figure 1 for a similar training set
# which variables were found to be useful for classification of iris specimens, based on this tree?

par(font=2,mar=c(1,1,1,1))
plot(tree.mod,col='navy')
text(tree.mod,pretty=NULL)

# (c)
par(mfrow=c(2,2))
for(k in 1:4){
  boxplot(dat[,k]~dat[,5],col='pink',main=paste(names(dat)[k]))
}

# (d) based on this boxplot, comment on your findings in (b)
# the Petal information is more clearly separated per species

# (e) compute predictions for the test set based on the tree grown in (a), and provide:
# (i) the corresponding confusion table;
# (ii) the prediction error rate
tree.pred=predict(tree.mod,dat[-itrain,],type='class')
tb.tree=table(tree.pred,dat[-itrain,5])
1-sum(diag(tb.tree))/sum(tb.tree)

# (f)
# we now consider pruning the classification tree obtained in (a),
# based on cross-validated misclassification error. 
# what is the optimal tree size for pruning? explain your answer.修剪的最佳树大小是多

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值