分类模型的评价指标

最新推荐文章于 2024-11-03 21:32:44 发布

诶尔法Alpha

最新推荐文章于 2024-11-03 21:32:44 发布

阅读量457

点赞数 9

分类专栏：神经网络学习 python脚本文章标签：分类数据挖掘人工智能

本文链接：https://blog.csdn.net/weixin_45354497/article/details/134714801

版权

python脚本同时被 2 个专栏收录

14 篇文章 0 订阅

订阅专栏

神经网络学习

10 篇文章 3 订阅

订阅专栏

分类模型有时候光靠loss和acc的指标太过于片面，不能很好全面的评判训练出来的模型。所以还需要分类报告、混淆矩阵、ROC曲线（AUC的值）等几个指标进行评判，本文主要用代码简洁的介绍如何得出这些指标。

首先要得到每个数据的真实值及对应的预测值，如下图在模型对测试集测试后位置要一一对应的放入数组中：
在这里插入图片描述
这两组数据将进行下面的指标计算。

分类报告

from sklearn.metrics import classification_report

yt = [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0]
yp = [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0]

report = classification_report(yt, yp)
print("Classification Report:")
print(report)

运行结果：
在这里插入图片描述

混淆矩阵

from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

yt = [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0]
yp = [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0]

conf_matrix = confusion_matrix(yt, yp)
print("Confusion Matrix:")
print(conf_matrix)

# 绘制混淆矩阵的热力图
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues')
plt.xlabel('Predict Label')
plt.ylabel('True Label')
plt.title('the Confusion_matrix of xxx')
# plt.show()   # 显示
plt.savefig("xxx.png")  # 保存

运行结果：
在这里插入图片描述

在这里插入图片描述
如果想要显示概率值的话（有时候测试集太少了写论文啥的放上去也不好看哈哈哈），则用以下代码：

from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

yt = [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0]
yp = [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0]

conf_matrix = confusion_matrix(yt, yp)
cm_normalized = conf_matrix.astype('float') / conf_matrix.sum(axis=1)[:, np.newaxis]
print("Confusion Matrix:")
print(cm_normalized)

# 绘制混淆矩阵的热力图
plt.figure(figsize=(8, 6))
sns.heatmap(cm_normalized, annot=True, fmt='.3f', cmap='Blues')  # 3位小数点
plt.xlabel('Predict Label')
plt.ylabel('True Label')
plt.title('the Confusion_matrix of xxx')
# plt.show()   # 显示
plt.savefig("xxx.png")  # 保存

运行结果：
在这里插入图片描述

ROC曲线（AUC的值）

假设多个模型对同一数据集进行测试，请注意不要进行打乱读取，不然位置不一样计算的指标就有误。

import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc

yt = [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0]
yp_1 = [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0]
yp_2 = [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0]
yp_3 = [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0]

# 计算每个模型的假正率、真正率和阈值
fpr_1, tpr_1, thresholds_1 = roc_curve(yt, yp_1)
fpr_2, tpr_2, thresholds_2 = roc_curve(yt, yp_2)
fpr_3, tpr_3, thresholds_3 = roc_curve(yt, yp_3)

# 计算每个模型的AUC值
auc_1 = auc(fpr_1, tpr_1)
auc_2 = auc(fpr_2, tpr_2)
auc_3 = auc(fpr_3, tpr_3)

# 绘制ROC曲线
plt.figure()
plt.plot(fpr_1, tpr_1, label='Model 1 (AUC = {:.2f})'.format(auc_1))
plt.plot(fpr_2, tpr_2, label='Model 2 (AUC = {:.2f})'.format(auc_2))
plt.plot(fpr_3, tpr_3, label='Model 3 (AUC = {:.2f})'.format(auc_3))
plt.plot([0, 1], [0, 1], 'k--')  # 添加对角线
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic')
plt.legend(loc='lower right')
# plt.show()
plt.savefig("roc.png")