分类模型-auc-roc曲线
真阳性率:
假阳性率:
参考博客:
https://blog.csdn.net/u011630575/article/details/80250177
# load libraries 加载库
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_curve, roc_auc_score
from sklearn.model_selection import train_test_split
# create feature matrix and target vector 创建特征矩阵和目标向量
features, target = make_classification(n_samples=10000,
n_features=10,
n_classes=2,
n_informative=3,
random_state=3)
# split into training and test sets 将样本划分为测试集和训练集
features_train, features_test, target_train, target_test = train_test_split(features, target, test_size=0.2, random_state=1)
# create classifier 创建逻辑回归分类器
logit = LogisticRegression()
# train model 训练模型
logit.fit(features_train, target_train)
# get predicted probabilities 获取预测概率
target_probabilities = logit.predict_proba(features_test)[:,1]
# create true and positive rates 计算真阳性和假阳性 的概率
false_positive_rate, true_positive_rate, threshold = roc_curve(target_test, target_probabilities)
# plot ROC curve 画ROC曲线
plt.title("Reciever Operating Characteristic")
plt.plot(false_positive_rate, true_positive_rate)
plt.plot([0, 1], ls="--")
plt.plot([0, 0], [1,0], c=".7"), plt.plot([1, 1], c=".7")
plt.ylabel("True Positive Rate")
plt.xlabel("False Positive Rate")
plt.show()
# get predicted probabilities 查看第一个样本的概率
logit.predict_proba(features_test)[0:1]
array([[0.87094106, 0.12905894]])
We can see the classes using classes_:
# 查看分类
logit.classes_
# 属于负类概率 87%,正类概率 12%
array([0, 1])
# 将阈值划定为50% 概率时的值
print("Threshold: {}".format(threshold[116]))
print("True Positive Rate: {}".format(true_positive_rate[116]))
print("False Positive Rate: {}".format(false_positive_rate[116]))
Threshold: 0.762488595149537
True Positive Rate: 0.629889669007021
False Positive Rate: 0.06679960119641076
# 将阈值划定为50% 概率时的值
print("Threshold: {}".format(threshold[116]))
print("True Positive Rate: {}".format(true_positive_rate[116]))
print("False Positive Rate: {}".format(false_positive_rate[116]))
Threshold: 0.762488595149537
True Positive Rate: 0.629889669007021
False Positive Rate: 0.06679960119641076
# calculate area under curve
# 计算auc值
roc_auc_score(target_test, target_probabilities)
0.9060171541543875