R语言逻辑回归代码示例
rm(list=ls())
require(kernlab)
data(spam)
data <- spam
n <- nrow(spam)
id <- sample(1:n, floor(n*0.5))
train <- data[id,]
test <- data[-id,]
mdl <- glm(formula = type~.,
family = binomial(link = "logit"),
data = train)
# 直接返回线性回归的值z
z <- predict(mdl, test, type = "link")
# 直接返回概率p
p <- predict(mdl, test, type = "response")
# 根据公式,z与p关系为
# p <- 1/(1+exp(-z))
# 0为test$type中levels=0的label,这里是nonspam
pred <- as.numeric(p>0.5) # 这里的概率可以根据需要修改
# 转成与原标签统一的factor
pred <- factor(pred, levels= c(0,1), labels = c("nonspam","spam"))
table(test$type,pred,dnn = c("Labels","prediction"))
###########################################
输出的混淆矩阵为
prediction
Labels nonspam spam
nonspam 1328 74
spam 105 794