基于Heart数据集采用LDA&SVM模型绘制ROC曲线及其分析

导入数据集

data <- read.csv("heart.csv")
data

划分数据集,70%为训练集

library(e1071)
library(caret)
index <- createDataPartition(data$target, p = 0.8, list = FALSE)
train_data <- data[index, ]
test_data <- data[-index, ]
dim(train_data)
dim(test_data)

建立SVM模型

library(ROCR)
#支持向量分类器,采用多项式核函数,d=1表示线性核函数
svmfit <- svm(target~. , data =train_data, kernel = 'polynomial', d=1,cost = 1e-1,scale=TRUE)

#将预测的类别概率保存在一个名为 svm_probs1 的变量中。这个变量将包含训练数据每个观测的概率值,这些概率值表示该观测属于不同类别的概率。
svm_probs1 <- predict(svmfit, train_data,type="response")

#prediction 函数,将预测的概率值和实际目标变量作为参数,创建一个用于 ROC 曲线分析的 prediction 对象。
svm_predict1 <- prediction(svm_probs1,train_data$target)

#performance 函数,根据给定的性能度量,计算在 ROC 曲线上的性能
svm_per1 <- performance(svm_predict1,measure = 'tpr', x.measure = 'fpr')

# 绘制ROC曲线
plot(svm_per1,col = 'red')

得到ROC曲线

建立LDA模型

library(ROCR)
library(MASS)
lda.fit <- lda(target~., data=train_data)

lda.probs <- predict(lda.fit, train_data, type = "response")

# $posterior 是 LDA 模型的输出中的一个部分,包含了每个观测属于每个类别的后验概率。这个矩阵的每一列代表一个类别,每一行代表一个观测。
# [,2] 表示选择矩阵中的第二列,即提取出属于第二类别的后验概率值。
lda.predict <- prediction(lda.probs$posterior[,2],train_data$target)

lda.per <- performance(lda.predict,measure = 'tpr', x.measure = 'fpr')

plot(lda.per,col='blue')

更换不同的cost值,我们可以得到ISLR教材9.3.3节的结果

定义两个函数:

# SVC
SVC_ROCplot <- function(data){
  svmfit <- svm(target~. , data =data, kernel = 'polynomial', d=1,cost = 1e-1,scale=TRUE)

svm_probs <- predict(svmfit, data,type="response")

svm_predict <- prediction(svm_probs,data$target)

svm_per <- performance(svm_predict,measure = 'tpr', x.measure = 'fpr')
return(svm_per)
}

# SVM 
SVM_ROCplot <- function(data,gamma){
  svmfit <- svm(target~. , data = data, kernel = 'radial', d=1,cost = 1e-1,scale=TRUE, gamma= gamma)

svm_probs <- predict(svmfit, data,type="response")

svm_predict <- prediction(svm_probs,data$target)

svm_per <- performance(svm_predict, measure = 'tpr', x.measure = 'fpr')
return(svm_per)
}

绘制图像:

训练集数据

# 训练集数据
library(ROCR)
#支持向量分类器SVC
svm_per0 <- SVC_ROCplot(train_data)

#LDA
library(MASS)
lda.fit <- lda(target~., data=train_data)

lda.probs <- predict(lda.fit, train_data, type = "response")
lda.predict <- prediction(lda.probs$posterior[,2],train_data$target)

lda.per <- performance(lda.predict,measure = 'tpr', x.measure = 'fpr')

# 创建两个图形
par(mfrow = c(1, 2))

# 创建两个图形
plot(lda.per, col = 'blue', main = "Comparision of LDA and SVC", xlab = "False Positive Rate", ylab = "True Positive Rate")
plot(svm_per0, col = 'red', add = TRUE)

# 添加图例
legend("bottomright", legend = c("LDA", "SVM"), col = c("blue", "red"), lty = 1)

# SVM with gamma = 1e-1
svm_per1 <- SVM_ROCplot(train_data,1e-1)

# SVM with gamma = 1e-2
svm_per2 <- SVM_ROCplot(train_data,1e-2)

# SVM with gamma = 1e-3
svm_per3 <- SVM_ROCplot(train_data,1e-3)

# 创建两个图形
plot(svm_per0, col = 'blue', main = "Comparision of SVC and SVM", xlab = "False Positive Rate", ylab = "True Positive Rate")
plot(svm_per1, col = 'red', add = TRUE)
plot(svm_per2, col = 'black', add = TRUE)
plot(svm_per3, col = 'green', add = TRUE)

# 添加图例
legend("bottomright", legend = c("SVC", "gamma=1e-1","gamma=1e-2","gamma=1e-3"), col = c("blue", "red","black","green"), lty = 1)

测试集数据

# 测试集数据
library(ROCR)
#支持向量分类器SVC
svm_per0 = SVC_ROCplot(test_data)

#LDA
library(MASS)
lda.fit <- lda(target~., data=test_data)

lda.probs <- predict(lda.fit, test_data, type = "response")
lda.predict <- prediction(lda.probs$posterior[,2],test_data$target)

lda.per <- performance(lda.predict,measure = 'tpr', x.measure = 'fpr')

# 创建两个图形
par(mfrow = c(1, 2))

# 创建两个图形
plot(lda.per, col = 'blue', main = "Comparision of LDA and SVC", xlab = "False Positive Rate", ylab = "True Positive Rate")
plot(svm_per0, col = 'red', add = TRUE)

# 添加图例
legend("bottomright", legend = c("LDA", "SVM"), col = c("blue", "red"), lty = 1)

# SVM with gamma = 1e-1
svm_per1 <- SVM_ROCplot(test_data,1e-1)

# SVM with gamma = 1e-2
svm_per2 <- SVM_ROCplot(test_data,1e-2)

# SVM with gamma = 1e-3
svm_per3 <- SVM_ROCplot(test_data,1e-3)

# 创建两个图形
plot(svm_per0, col = 'blue', main = "Comparision of SVC and SVM", xlab = "False Positive Rate", ylab = "True Positive Rate")
plot(svm_per1, col = 'red', add = TRUE)
plot(svm_per2, col = 'black', add = TRUE)
plot(svm_per3, col = 'green', add = TRUE)

# 添加图例
legend("bottomright", legend = c("SVC", "gamma=1e-1","gamma=1e-2","gamma=1e-3"), col = c("blue", "red","black","green"), lty = 1)

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值