python实现二分类_python实现二分类和多分类的ROC曲线教程

本文介绍了Python中实现二分类和多分类问题的ROC曲线及其应用。通过加载预训练模型,计算精度、召回率、F1分数和准确率,并绘制ROC曲线,展示了在二分类和多分类任务中评估模型性能的方法。文章提供了详细的代码示例,包括二分类ROC曲线的绘制以及多分类任务中宏平均和微平均的ROC曲线计算。
摘要由CSDN通过智能技术生成

基本概念

precision:预测为对的当中,原本为对的比例(越大越好,1为理想状态)

recall:原本为对的当中,预测为对的比例(越大越好,1为理想状态)

F-measure:F度量是对准确率和召回率做一个权衡(越大越好,1为理想状态,此时precision为1,recall为1)

accuracy:预测对的(包括原本是对预测为对,原本是错的预测为错两种情形)占整个的比例(越大越好,1为理想状态)

fp rate:原本是错的预测为对的比例(越小越好,0为理想状态)

tp rate:原本是对的预测为对的比例(越大越好,1为理想状态)

ROC曲线通常在Y轴上具有真阳性率,在X轴上具有假阳性率。这意味着图的左上角是“理想”点 - 误报率为零,真正的正率为1。这不太现实,但它确实意味着曲线下面积(AUC)通常更好。

二分类问题:ROC曲线

from __future__ import absolute_import

from __future__ import division

from __future__ import print_function

import time

start_time = time.time()

import matplotlib.pyplot as plt

from sklearn.metrics import roc_curve

from sklearn.metrics import auc

import numpy as np

from sklearn.model_selection import train_test_split

from sklearn.metrics import recall_score,accuracy_score

from sklearn.metrics import precision_score,f1_score

from keras.optimizers import Adam,SGD,sgd

from keras.models import load_model

print('读取数据')

X_train = np.load('x_train-rotate_2.npy')

Y_train = np.load('y_train-rotate_2.npy')

print(X_train.shape)

print(Y_train.shape)

print('获取测试数据和验证数据')

X_train, X_valid, Y_train, Y_valid = train_test_split(X_train, Y_train, test_size=0.1, random_state=666)

Y_train = np.asarray(Y_train,np.uint8)

Y_valid = np.asarray(Y_valid,np.uint8)

X_valid = np.array(X_valid, np.float32) / 255.

print('获取模型')

model = load_model('./model/InceptionV3_model.h5')

opt = Adam(lr=1e-4)

model.compile(optimizer=opt, loss='binary_crossentropy')

print("Predicting")

Y_pred = model.predict(X_valid)

Y_pred = [np.argmax(y) for y in Y_pred] # 取出y中元素最大值所对应的索引

Y_valid = [np.argmax(y) for y in Y_valid]

# micro:多分类

# weighted:不均衡数量的类来说,计算二分类metrics的平均

# macro:计算二分类metrics的均值,为每个类给出相同权重的分值。

precision = precision_score(Y_valid, Y_pred, average='weighted')

recall = recall_score(Y_valid, Y_pred, average='weighted')

f1_score = f1_score(Y_valid, Y_pred, average='weighted')

accuracy_score = accuracy_score(Y_valid, Y_pred)

print("Precision_score:",precision)

print("Recall_score:",recall)

print("F1_score:",f1_score)

print("Accuracy_score:",accuracy_score)

# 二分类 ROC曲线

# roc_curve:真正率(True Positive Rate , TPR)或灵敏度(sensitivity)

# 横坐标:假正率(False Positive Rate , FPR)

fpr, tpr, thresholds_keras = roc_curve(Y_valid, Y_pred)

auc = auc(fpr, tpr)

print("AUC : ", auc)

plt.figure()

plt.plot([0, 1], [0, 1], 'k--')

plt.plot(fpr, tpr, label='Keras (area = {:.3f})'.format(auc))

plt.xlabel('False positive rate')

plt.ylabel('True positive rate')

plt.title('ROC curve')

plt.legend(loc='best')

plt.savefig("../images/ROC/ROC_2分类.png")

plt.show()

print("--- %s seconds ---" % (time.time() - start_time))

ROC图如下所示:

多分类问题:ROC曲线

ROC曲线通常用于二分类以研究分类器的输出。为了将ROC曲线和ROC区域扩展到多类或多标签分类,有必要对输出进行二值化。⑴可以每个标签绘制一条ROC曲线。⑵也可以通过将标签指示符矩阵的每个元素视为二元预测(微平均)来绘制ROC曲线。⑶另一种用于多类别分类的评估方法是宏观平均,它对每个标签的分类给予相同的权重。

from __future__ import absolute_import

from __future__ import division

from __future__ import print_function

import time

start_time = time.time()

import matplotlib.pyplot as plt

from sklearn.metrics import roc_curve

from sklearn.metrics import auc

import numpy as np

from sklearn.model_selection import train_test_split

from sklearn.metrics import recall_score,accuracy_score

from sklearn.metrics import precision_score,f1_score

from keras.optimizers import Adam,SGD,sgd

from keras.models import load_model

from itertools import cycle

from scipy import interp

from sklearn.preprocessing import label_binarize

nb_classes = 5

print('读取数据')

X_train = np.load('x_train-resized_5.npy')

Y_train = np.load('y_train-resized_5.npy')

print(X_train.shape)

print(Y_train.shape)

print('获取测试数据和验证数据')

X_train, X_valid, Y_train, Y_valid = train_test_split(X_train, Y_train, test_size=0.1, random_state=666)

Y_train = np.asarray(Y_train,np.uint8)

Y_valid = np.asarray(Y_valid,np.uint8)

X_valid = np.asarray(X_valid, np.float32) / 255.

print('获取模型')

model = load_model('./model/SE-InceptionV3_model.h5')

opt = Adam(lr=1e-4)

model.compile(optimizer=opt, loss='categorical_crossentropy')

print("Predicting")

Y_pred = model.predict(X_valid)

Y_pred = [np.argmax(y) for y in Y_pred] # 取出y中元素最大值所对应的索引

Y_valid = [np.argmax(y) for y in Y_valid]

# Binarize the output

Y_valid = label_binarize(Y_valid, classes=[i for i in range(nb_classes)])

Y_pred = label_binarize(Y_pred, classes=[i for i in range(nb_classes)])

# micro:多分类

# weighted:不均衡数量的类来说,计算二分类metrics的平均

# macro:计算二分类metrics的均值,为每个类给出相同权重的分值。

precision = precision_score(Y_valid, Y_pred, average='micro')

recall = recall_score(Y_valid, Y_pred, average='micro')

f1_score = f1_score(Y_valid, Y_pred, average='micro')

accuracy_score = accuracy_score(Y_valid, Y_pred)

print("Precision_score:",precision)

print("Recall_score:",recall)

print("F1_score:",f1_score)

print("Accuracy_score:",accuracy_score)

# roc_curve:真正率(True Positive Rate , TPR)或灵敏度(sensitivity)

# 横坐标:假正率(False Positive Rate , FPR)

# Compute ROC curve and ROC area for each class

fpr = dict()

tpr = dict()

roc_auc = dict()

for i in range(nb_classes):

fpr[i], tpr[i], _ = roc_curve(Y_valid[:, i], Y_pred[:, i])

roc_auc[i] = auc(fpr[i], tpr[i])

# Compute micro-average ROC curve and ROC area

fpr["micro"], tpr["micro"], _ = roc_curve(Y_valid.ravel(), Y_pred.ravel())

roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])

# Compute macro-average ROC curve and ROC area

# First aggregate all false positive rates

all_fpr = np.unique(np.concatenate([fpr[i] for i in range(nb_classes)]))

# Then interpolate all ROC curves at this points

mean_tpr = np.zeros_like(all_fpr)

for i in range(nb_classes):

mean_tpr += interp(all_fpr, fpr[i], tpr[i])

# Finally average it and compute AUC

mean_tpr /= nb_classes

fpr["macro"] = all_fpr

tpr["macro"] = mean_tpr

roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])

# Plot all ROC curves

lw = 2

plt.figure()

plt.plot(fpr["micro"], tpr["micro"],

label='micro-average ROC curve (area = {0:0.2f})'

''.format(roc_auc["micro"]),

color='deeppink', linestyle=':', linewidth=4)

plt.plot(fpr["macro"], tpr["macro"],

label='macro-average ROC curve (area = {0:0.2f})'

''.format(roc_auc["macro"]),

color='navy', linestyle=':', linewidth=4)

colors = cycle(['aqua', 'darkorange', 'cornflowerblue'])

for i, color in zip(range(nb_classes), colors):

plt.plot(fpr[i], tpr[i], color=color, lw=lw,

label='ROC curve of class {0} (area = {1:0.2f})'

''.format(i, roc_auc[i]))

plt.plot([0, 1], [0, 1], 'k--', lw=lw)

plt.xlim([0.0, 1.0])

plt.ylim([0.0, 1.05])

plt.xlabel('False Positive Rate')

plt.ylabel('True Positive Rate')

plt.title('Some extension of Receiver operating characteristic to multi-class')

plt.legend(loc="lower right")

plt.savefig("../images/ROC/ROC_5分类.png")

plt.show()

print("--- %s seconds ---" % (time.time() - start_time))

ROC图如下所示:

以上这篇python实现二分类和多分类的ROC曲线教程就是小编共享给大家的全部内容了,希望能给大家一个参考,也希望大家多多支持乐购源码。

  • 3
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
好的,我来回答你的问题。 首先,ROC曲线是用于评估二分类多分类模型的性能指标之一。它是一条以假正率(False Positive Rate)为横坐标,真正率(True Positive Rate)为纵坐标的曲线。在二分类模型中,真正率等于真阳性率(True Positive Rate),假正率等于假阳性率(False Positive Rate)。 下面是基于Python实现二分类多分类ROC曲线的步骤: 1. 导入必要的库: ```python import numpy as np import matplotlib.pyplot as plt from sklearn.metrics import roc_curve, auc ``` 2. 准备数据: ```python # 二分类模型 y_true = np.array([0, 0, 1, 1, 0, 1]) y_score = np.array([0.3, 0.4, 0.5, 0.6, 0.2, 0.7]) # 多分类模型 y_true = np.array([0, 1, 2, 0, 1, 2]) y_score = np.array([[0.9, 0.1, 0.0], [0.2, 0.6, 0.2], [0.1, 0.2, 0.7], [0.8, 0.1, 0.1], [0.3, 0.4, 0.3], [0.2, 0.3, 0.5]]) ``` 其中,y_true为真实标签,y_score为预测标签的概率值。 3. 计算ROC曲线: ```python # 二分类模型 fpr, tpr, thresholds = roc_curve(y_true, y_score) roc_auc = auc(fpr, tpr) # 多分类模型 n_classes = y_score.shape[1] fpr = dict() tpr = dict() roc_auc = dict() for i in range(n_classes): fpr[i], tpr[i], _ = roc_curve(y_true, y_score[:, i], pos_label=i) roc_auc[i] = auc(fpr[i], tpr[i]) ``` 4. 绘制ROC曲线: ```python # 二分类模型 plt.figure() plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = %0.2f)' % roc_auc) plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--') plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('Receiver operating characteristic') plt.legend(loc="lower right") plt.show() # 多分类模型 plt.figure() colors = ['blue', 'red', 'green'] for i, color in zip(range(n_classes), colors): plt.plot(fpr[i], tpr[i], color=color, lw=2, label='ROC curve of class %d (area = %0.2f)' % (i, roc_auc[i])) plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--') plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('Receiver operating characteristic') plt.legend(loc="lower right") plt.show() ``` 这样,就可以得到二分类多分类模型的ROC曲线了。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值