使用Sklearn进行精确率-召回率曲线的绘制

使用Sklearn进行精确率-召回率曲线的绘制

  • 精确率:模型判定的正例中真正正例所占的比重
  • 召回率:总正例中被模型判定为正例的比重
#coding=utf-8
"""
#演示目的:利用鸢尾花数据集画出P-R曲线
"""
print(__doc__)
#演示目的:利用鸢尾花数据集画出P-R曲线
import matplotlib.pyplot as plt
import numpy as np
from sklearn import svm, datasets
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import average_precision_score
from sklearn.preprocessing import label_binarize
from sklearn.multiclass import OneVsRestClassifier
#from sklearn.cross_validation import train_test_split  #适用于anaconda 3.6及以前版本
from sklearn.model_selection import train_test_split#适用于anaconda 3.7
#以iris数据为例,画出P-R曲线
iris = datasets.load_iris()
X = iris.data
y = iris.target
print(y)
# 标签二值化,将三个类转为001, 010, 100的格式.因为这是个多类分类问题,后面将要采用
#OneVsRestClassifier策略转为二类分类问题
y = label_binarize(y, classes=[0, 1, 2])
n_classes = y.shape[1]
print(y.shape)
print (y)

# 增加了800维的 噪声特征
random_state = np.random.RandomState(0)
n_samples, n_features = X.shape
# print(X.shape)  (150, 4)
X = np.c_[X, random_state.randn(n_samples, 200 * n_features)]
# print(X.shape)  (150, 804)
# Split into training and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.5, random_state=random_state) #随机数,填0或不填,每次都会不一样

# Run classifier probability : boolean, optional (default=False)Whether to enable probability estimates. This must be enabled prior to calling fit, and will slow down that method.
classifier = OneVsRestClassifier(svm.SVC(kernel='linear', probability=True, random_state=random_state))
y_score = classifier.fit(X_train, y_train).decision_function(X_test)
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2]
(150, 3)
[[1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]]
# Compute Precision-Recall and plot curve  
#下面的下划线是返回的阈值。作为一个名称:此时“_”作为临时性的名称使用。
#表示分配了一个特定的名称,但是并不会在后面再次用到该名称。
precision = dict()
recall = dict()
average_precision = dict()

for i in range(n_classes):
    precision[i], recall[i], _ = precision_recall_curve(y_test[:, i],  y_score[:, i]) #The last precision and recall values are 1. and 0. respectively and do not have a corresponding threshold. This ensures that the graph starts on the x axis.
    average_precision[i] = average_precision_score(y_test[:, i], y_score[:, i])#切片,第i个类的分类结果性能

# Compute micro-average curve and area. ravel()将多维数组降为一维
precision["micro"], recall["micro"], _ = precision_recall_curve(y_test.ravel(),  y_score.ravel())
average_precision["micro"] = average_precision_score(y_test, y_score, average="micro") #This score corresponds to the area under the precision-recall curve.

# Plot Precision-Recall curve for each class
plt.clf()#clf 函数用于清除当前图像窗口
plt.plot(recall["micro"], precision["micro"],
         label='micro-average Precision-recall curve (area = {0:0.2f})'.format(average_precision["micro"]))
for i in range(n_classes):
    plt.plot(recall[i], precision[i],
             label='Precision-recall curve of class {0} (area = {1:0.2f})'.format(i, average_precision[i]))

plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05]) #xlim、ylim:分别设置X、Y轴的显示范围。
plt.xlabel('Recall', fontsize=16)
plt.ylabel('Precision',fontsize=16)
plt.title('Extension of Precision-Recall curve to multi-class',fontsize=16)
plt.legend(loc="lower right")#legend 是用于设置图例的函数
plt.show()

png

  • 1
    点赞
  • 14
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 1
    评论
好的,以下是一份基于Pandas和Scikit-learn库的逻辑回归预测模型评价代码示例,包括绘制二分类混淆矩阵、计算精确率召回率、F1-score、ROC曲线与AUC指标: ```python import pandas as pd import numpy as np import matplotlib.pyplot as plt from sklearn.model_selection import train_test_split from sklearn.linear_model import LogisticRegression from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, roc_curve, roc_auc_score # 1. 数据读取与处理 data = pd.read_csv('data.csv') X = data.drop('target', axis=1) y = data['target'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # 2. 模型训练 model = LogisticRegression() model.fit(X_train, y_train) # 3. 模型预测 y_pred = model.predict(X_test) y_prob = model.predict_proba(X_test)[:, 1] # 4. 绘制二分类混淆矩阵 confusion_mat = confusion_matrix(y_test, y_pred) plt.imshow(confusion_mat, cmap=plt.cm.Blues) plt.title('Confusion Matrix') plt.colorbar() tick_marks = np.arange(2) plt.xticks(tick_marks, ['0', '1']) plt.yticks(tick_marks, ['0', '1']) plt.xlabel('Predicted Label') plt.ylabel('True Label') for i in range(2): for j in range(2): plt.text(j, i, confusion_mat[i, j], ha='center', va='center', color='white' if confusion_mat[i, j] > confusion_mat.max() / 2 else 'black') plt.show() # 5. 计算精确率召回率和F1-score precision = precision_score(y_test, y_pred) recall = recall_score(y_test, y_pred) f1 = f1_score(y_test, y_pred) # 6. 计算AUC指标和绘制ROC曲线 auc = roc_auc_score(y_test, y_prob) fpr, tpr, thresholds = roc_curve(y_test, y_prob) plt.plot(fpr, tpr, label='ROC curve (area = %0.2f)' % auc) plt.plot([0, 1], [0, 1], 'k--') plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('ROC Curve') plt.legend(loc="lower right") plt.show() # 7. 输出结果 print('Precision:', precision) print('Recall:', recall) print('F1-score:', f1) print('AUC:', auc) ``` 这份代码使用了Pandas库读取了名为"data.csv"的数据集,并将其分成训练集和测试集,使用Scikit-learn库中的LogisticRegression类进行模型训练和预测,最后通过Scikit-learn库中的指标函数计算了预测结果的精确率召回率、F1-score和AUC指标,同时绘制了二分类混淆矩阵和ROC曲线。需要注意的是,上述代码仅作为示例,具体实现需要根据实际情况进行调整。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

没有胡子的猫

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值