判别垃圾邮件spam实例(0:好邮件,1:垃圾邮件)
载入需要的包,加载数据。
install.packages("rpart")
library(rpart)
require(rpart)
install.packages("rpart.plot")
library(rpart.plot)
require(rpart.plot)
install.packages("pROC")
library(pROC)
require(pROC)
X <- read.csv("spamTrain.csv")
随机生成训练集和测试机。
# p=0.7,训练集和测试集比例7:3
split.data = function(data, p = 0.7, s = 666){
set.seed(s)
index = sample(1:dim(data)[1])
train = data[index[1:floor(dim(data)[1] * p)], ]
test = data[index[((ceiling(dim(data)[1] * p)) + 1):dim(data)[1]], ]
return(list(train = train, test = test))}
trainset<-split.data(X)$train
testset<-split.data(X)$test
画出完整的决策树,不作任何修剪cp=0,图像非常复杂。
tr1 <- rpart(spam ~ ., data=trainset, me