导入数据集
data <- read.csv("heart.csv")
data
划分数据集,70%为训练集
library(e1071)
library(caret)
index <- createDataPartition(data$target, p = 0.8, list = FALSE)
train_data <- data[index, ]
test_data <- data[-index, ]
dim(train_data)
dim(test_data)
建立SVM模型
library(ROCR)
#支持向量分类器,采用多项式核函数,d=1表示线性核函数
svmfit <- svm(target~. , data =train_data, kernel = 'polynomial', d=1,cost = 1e-1,scale=TRUE)
#将预测的类别概率保存在一个名为 svm_probs1 的变量中。这个变量将包含训练数据每个观测的概率值,这些概率值表示该观测属于不同类别的概率。
svm_probs1 <- predict(svmfit, train_data,type="response")
#prediction 函数,将预测的概率值和实际目标变量作为参数,创建一个用于 ROC 曲线分析的 prediction 对象。
svm_predict1 <- prediction(svm_probs1,train_data$target)
#performance 函数,根据给定的性能度量,计算在 ROC 曲线上的性能
svm_per1 <- performance(svm_predict1,measure = 'tpr', x.measure = 'fpr')
# 绘制ROC曲线
plot(svm_per1,col = 'red')
得到ROC曲线
建立LDA模型
library(ROCR)
library(MASS)
lda.fit <- lda(target~., data=train_data)
lda.probs <- predict(lda.fit, train_data, type = "response")
# $posterior 是 LDA 模型的输出中的一个部分,包含了每个观测属于每个类别的后验概率。这个矩阵的每一列代表一个类别,每一行代表一个观测。
# [,2] 表示选择矩阵中的第二列,即提取出属于第二类别的后验概率值。
lda.predict <- prediction(lda.probs$posterior[,2],train_data$target)
lda.per <- performance(lda.predict,measure = 'tpr', x.measure = 'fpr')
plot(lda.per,col='blue')
更换不同的cost值,我们可以得到ISLR教材9.3.3节的结果
定义两个函数:
# SVC
SVC_ROCplot <- function(data){
svmfit <- svm(target~. , data =data, kernel = 'polynomial', d=1,cost = 1e-1,scale=TRUE)
svm_probs <- predict(svmfit, data,type="response")
svm_predict <- prediction(svm_probs,data$target)
svm_per <- performance(svm_predict,measure = 'tpr', x.measure = 'fpr')
return(svm_per)
}
# SVM
SVM_ROCplot <- function(data,gamma){
svmfit <- svm(target~. , data = data, kernel = 'radial', d=1,cost = 1e-1,scale=TRUE, gamma= gamma)
svm_probs <- predict(svmfit, data,type="response")
svm_predict <- prediction(svm_probs,data$target)
svm_per <- performance(svm_predict, measure = 'tpr', x.measure = 'fpr')
return(svm_per)
}
绘制图像:
训练集数据
# 训练集数据
library(ROCR)
#支持向量分类器SVC
svm_per0 <- SVC_ROCplot(train_data)
#LDA
library(MASS)
lda.fit <- lda(target~., data=train_data)
lda.probs <- predict(lda.fit, train_data, type = "response")
lda.predict <- prediction(lda.probs$posterior[,2],train_data$target)
lda.per <- performance(lda.predict,measure = 'tpr', x.measure = 'fpr')
# 创建两个图形
par(mfrow = c(1, 2))
# 创建两个图形
plot(lda.per, col = 'blue', main = "Comparision of LDA and SVC", xlab = "False Positive Rate", ylab = "True Positive Rate")
plot(svm_per0, col = 'red', add = TRUE)
# 添加图例
legend("bottomright", legend = c("LDA", "SVM"), col = c("blue", "red"), lty = 1)
# SVM with gamma = 1e-1
svm_per1 <- SVM_ROCplot(train_data,1e-1)
# SVM with gamma = 1e-2
svm_per2 <- SVM_ROCplot(train_data,1e-2)
# SVM with gamma = 1e-3
svm_per3 <- SVM_ROCplot(train_data,1e-3)
# 创建两个图形
plot(svm_per0, col = 'blue', main = "Comparision of SVC and SVM", xlab = "False Positive Rate", ylab = "True Positive Rate")
plot(svm_per1, col = 'red', add = TRUE)
plot(svm_per2, col = 'black', add = TRUE)
plot(svm_per3, col = 'green', add = TRUE)
# 添加图例
legend("bottomright", legend = c("SVC", "gamma=1e-1","gamma=1e-2","gamma=1e-3"), col = c("blue", "red","black","green"), lty = 1)
测试集数据
# 测试集数据
library(ROCR)
#支持向量分类器SVC
svm_per0 = SVC_ROCplot(test_data)
#LDA
library(MASS)
lda.fit <- lda(target~., data=test_data)
lda.probs <- predict(lda.fit, test_data, type = "response")
lda.predict <- prediction(lda.probs$posterior[,2],test_data$target)
lda.per <- performance(lda.predict,measure = 'tpr', x.measure = 'fpr')
# 创建两个图形
par(mfrow = c(1, 2))
# 创建两个图形
plot(lda.per, col = 'blue', main = "Comparision of LDA and SVC", xlab = "False Positive Rate", ylab = "True Positive Rate")
plot(svm_per0, col = 'red', add = TRUE)
# 添加图例
legend("bottomright", legend = c("LDA", "SVM"), col = c("blue", "red"), lty = 1)
# SVM with gamma = 1e-1
svm_per1 <- SVM_ROCplot(test_data,1e-1)
# SVM with gamma = 1e-2
svm_per2 <- SVM_ROCplot(test_data,1e-2)
# SVM with gamma = 1e-3
svm_per3 <- SVM_ROCplot(test_data,1e-3)
# 创建两个图形
plot(svm_per0, col = 'blue', main = "Comparision of SVC and SVM", xlab = "False Positive Rate", ylab = "True Positive Rate")
plot(svm_per1, col = 'red', add = TRUE)
plot(svm_per2, col = 'black', add = TRUE)
plot(svm_per3, col = 'green', add = TRUE)
# 添加图例
legend("bottomright", legend = c("SVC", "gamma=1e-1","gamma=1e-2","gamma=1e-3"), col = c("blue", "red","black","green"), lty = 1)