week3 决策树分析 R语言实现

library(tree)


#读入数据
soybean = read.table("soybean-large.data", header=F, sep=",")
soybean_df = data.frame(soybean)
header = paste("att", 1:36, sep="")
names(soybean_df) = header


attach(soybean_df)


#用每列中的众数替换未知的值'?'
soybean_df[which(att1 == '?'), 1] = names(which.max(apply(soybean_df[,1:35], 2, table)$att1))
soybean_df[which(att2 == '?'), 2] = names(which.max(apply(soybean_df[,1:35], 2, table)$att2))
soybean_df[which(att3 == '?'), 3] = names(which.max(apply(soybean_df[,1:35], 2, table)$att3))
soybean_df[which(att4 == '?'), 4] = names(which.max(apply(soybean_df[,1:35], 2, table)$att4))
soybean_df[which(att5 == '?'), 5] = names(which.max(apply(soybean_df[,1:35], 2, table)$att5))
soybean_df[which(att6 == '?'), 6] = names(which.max(apply(soybean_df[,1:35], 2, table)$att6))
soybean_df[which(att7 == '?'), 7] = names(which.max(apply(soybean_df[,1:35], 2, table)$att7))
soybean_df[which(att8 == '?'), 8] = names(which.max(apply(soybean_df[,1:35], 2, table)$att8))
soybean_df[which(att9 == '?'), 9] = names(which.max(apply(soybean_df[,1:35], 2, table)$att9))
soybean_df[which(att10 == '?'), 10] = names(which.max(apply(soybean_df[,1:35], 2, table)$att10))
soybean_df[which(att11 == '?'), 11] = names(which.max(apply(soybean_df[,1:35], 2, table)$att11))
soybean_df[which(att12 == '?'), 12] = names(which.max(apply(soybean_df[,1:35], 2, table)$att12))
soybean_df[which(att13 == '?'), 13] = names(which.max(apply(soybean_df[,1:35], 2, table)$att13))
soybean_df[which(att14 == '?'), 14] = names(which.max(apply(soybean_df[,1:35], 2, table)$att14))
soybean_df[which(att15 == '?'), 15] = names(which.max(apply(soybean_df[,1:35], 2, table)$att15))
soybean_df[which(att16 == '?'), 16] = names(which.max(apply(soybean_df[,1:35], 2, table)$att16))
soybean_df[which(att17 == '?'), 17] = names(which.max(apply(soybean_df[,1:35], 2, table)$att17))
soybean_df[which(att18 == '?'), 18] = names(which.max(apply(soybean_df[,1:35], 2, table)$att18))
soybean_df[which(att19 == '?'), 19] = names(which.max(apply(soybean_df[,1:35], 2, table)$att19))
soybean_df[which(att20 == '?'), 20] = names(which.max(apply(soybean_df[,1:35], 2, table)$att20))
soybean_df[which(att21 == '?'), 21] = names(which.max(apply(soybean_df[,1:35], 2, table)$att21))
soybean_df[which(att22 == '?'), 22] = names(which.max(apply(soybean_df[,1:35], 2, table)$att22))
soybean_df[which(att23 == '?'), 23] = names(which.max(apply(soybean_df[,1:35], 2, table)$att23))
soybean_df[which(att24 == '?'), 24] = names(which.max(apply(soybean_df[,1:35], 2, table)$att24))
soybean_df[which(att25 == '?'), 25] = names(which.max(apply(soybean_df[,1:35], 2, table)$att25))
soybean_df[which(att26 == '?'), 26] = names(which.max(apply(soybean_df[,1:35], 2, table)$att26))
soybean_df[which(att27 == '?'), 27] = names(which.max(apply(soybean_df[,1:35], 2, table)$att27))
soybean_df[which(att28 == '?'), 28] = names(which.max(apply(soybean_df[,1:35], 2, table)$att28))
soybean_df[which(att29 == '?'), 29] = names(which.max(apply(soybean_df[,1:35], 2, table)$att29))
soybean_df[which(att30 == '?'), 30] = names(which.max(apply(soybean_df[,1:35], 2, table)$att30))
soybean_df[which(att31 == '?'), 31] = names(which.max(apply(soybean_df[,1:35], 2, table)$att31))
soybean_df[which(att28 == '?'), 32] = names(which.max(apply(soybean_df[,1:35], 2, table)$att32))
soybean_df[which(att29 == '?'), 33] = names(which.max(apply(soybean_df[,1:35], 2, table)$att33))
soybean_df[which(att30 == '?'), 34] = names(which.max(apply(soybean_df[,1:35], 2, table)$att34))
soybean_df[which(att31 == '?'), 35] = names(which.max(apply(soybean_df[,1:35], 2, table)$att35))


#首先使用将数据集分按照7:3为训练集和测试集
train = sample(1:nrow(soybean_df), 210)
test = soybean_df[-train,]


#使用分类决策树产生决策树模型
soybean.tree = rpart(att36~., data=soybean_df[train,])
plot(soybean.tree, margin=0.1)
text(soybean.tree, cex=0.5)


#使用决策树模型进行预测
soybean.pred = predict(soybean.tree, test, type="class")
att36.test = test[,36]
table(soybean.pred, att36.test)


#计算混淆矩阵的命中率
matrix_pred = matrix(table(soybean.pred, att36.test), ncol=19, nrow=19)
sum(diag(matrix_pred))/nrow(test)
[1] 0.6391753
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值