在验证分类算法的好坏时,经常需要用到AUC曲线,而在做不同分类模型的对比实验时,需要将不同模型的AUC曲线绘制到一张图里。
计算机视觉——python在一张图中绘制多个模型的对比ROC线
1. 小型分类模型对比,可以直接调用的
用一样的数据集做示例,简单地直接分别得到每个分类模型预测的结果。
from sklearn.datasets import load_breast_cancer
from sklearn import metrics
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.model_selection import train_test_split
import pylab as plt
import warnings; warnings.filterwarnings('ignore')
dataset = load_breast_cancer()
data = dataset.data
target = dataset.target
X_train, X_test, y_train, y_test = train_test_split(data,target,test_size=0.2)
# 模型1调用sklearn中的RandomForestClassifier
rf1 = RandomForestClassifier(n_estimators=5)
rf1.fit(X_train, y_train)
pred1 = rf1.predict_proba(X_test)[:,1]
# 模型2调用sklearn中的ExtraTreesClassifier
rf2 = ExtraTreesClassifier(n_estimators=5)
rf2.fit(X_train, y_train)
pred2 = rf2.predict_proba(X_test)[:,1]
# 画图部分
fpr1, tpr1, threshold1 = metrics.roc_curve(y_test, pred1) # <class 'numpy.ndarray'> <class 'numpy.ndarray'>
roc_auc1 = metrics.auc(fpr1, tpr1)
fpr2, tpr2, threshold2 = metrics.roc_curve(y_test, pred2) # <class 'numpy.ndarray'> <class 'numpy.ndarray'>
roc_auc2 = metrics.auc(fpr2, tpr2)
plt.figure(figsize=(6,6))
plt.title('Validation ROC')
plt.plot(fpr1, tpr1, 'b', label = 'RandomForestClassifier AUC = %0.3f' % roc_auc1)
plt.plot(fpr2, tpr2, 'b', label = 'ExtraTreesClassifier AUC = %0.3f' % roc_auc2)
plt.legend(loc = 'lower right')
plt.plot([0, 1], [0, 1],'r--')
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.savefig("filename.png")
plt.show()
plt.close()
2. 大型的CNN模型,无法直接得到结果。
2.1 先分别运行每个分类模型,将预测的结果存入csv文件中。
from sklearn.datasets import load_breast_cancer
from sklearn import metrics
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.model_selection import train_test_split
import pylab as plt
import warnings; warnings.filterwarnings('ignore')
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression #线性回归
import csv
dataset = load_breast_cancer()
data = dataset.data
target = dataset.target
X_train, X_test, y_train, y_test = train_test_split(data,target,test_size=0.2)
rf1 = RandomForestClassifier(n_estimators=5)
rf1.fit(X_train, y_train)
pred1 = rf1.predict_proba(X_test)[:,1]
dataframe = pd.DataFrame({'label':y_test,'pred':pred1})
dataframe.to_csv("test1.csv",index=False,sep=',')
rf2 = ExtraTreesClassifier(n_estimators=5)
rf2.fit(X_train, y_train)
pred2 = rf2.predict_proba(X_test)[:,1]
dataframe = pd.DataFrame({'label':y_test,'pred':pred2})
dataframe.to_csv("test2.csv",index=False,sep=',')
2.2 从csv文件读取每个模型的预测结果,绘制AUC曲线
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc
def Draw_ROC(file1,file2):
'''这里注意读取csv的编码方式,
如果csv里有中文,在windows系统上可以直接用encoding='ANSI',
但是到了Mac或者Linux系统上会报错:`LookupError: unknown encoding: ansi`。
解决方法:
1. 可以改成encoding='gbk';
2. 或者把csv文件里的列名改成英文,就不用选择encoding的方式了。
'''
data1=pd.read_csv(file1, encoding='ANSI')
data1=pd.DataFrame(data1)
data2=pd.read_csv(file2, encoding='ANSI')
data2=pd.DataFrame(data2)
print(list(data1['label']), list(data1['pred']))
print(list(data2['label']), list(data2['pred']))
fpr_CSNN,tpr_CSNN,thresholds=roc_curve(list(data1['label']),
list(data1['pred']))
roc_auc_CSSSNN=auc(fpr_CSNN,tpr_CSNN)
fpr_NN,tpr_NN,thresholds=roc_curve(list(data2['label']),
list(data2['pred']))
roc_auc_DL=auc(fpr_NN,tpr_NN)
font = {'family': 'Times New Roman',
'size': 12,
}
'''这里很多电脑上也许默认是'DejaVu Sans'格式,但是在写论文时,
往往需要'Times New Roman'格式,可以参考[这篇教程](https://blog.csdn.net/weixin_43543177/article/details/109723328)
'''
sns.set(font_scale=1.2)
plt.rc('font',family='Times New Roman')
plt.plot(fpr_NN,tpr_NN,'purple',label='NN_AUC = %0.2f'% roc_auc_DL)
plt.plot(fpr_CSNN,tpr_CSNN,'blue',label='CSNN_AUC = %0.2f'% roc_auc_CSSSNN)
plt.legend(loc='lower right',fontsize = 12)
plt.plot([0,1],[0,1],'r--')
plt.ylabel('True Positive Rate',fontsize = 14)
plt.xlabel('Flase Positive Rate',fontsize = 14)
plt.show()
if __name__=="__main__":
Draw_ROC('./test1.csv',
'./test2.csv')