python可视化——混淆矩阵
最近学习模型评估,分享一下混淆矩阵这个小模块。这里用的是鸢尾花数据集(https://pan.baidu.com/s/1WwY7e4bN4r-Mz8fNKW7VOA)
提取码jdta
首先是导入库,并设置一下图片格式大小
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
import numpy as np
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
from sklearn.metrics import precision_score, recall_score, f1_score
plt.rcParams['font.sans-serif'] = ['YaHei Consolas Hybrid'] # 用来正常显示中文标签
plt.rcParams['font.size'] = 16 # 用来设置字体大小
plt.rcParams['figure.figsize'] = (20, 16) # 设置输出图片大小
其次是导入设置并划分训练集测试集(这里将各类改为独热编码)
# 导入数据
file = pd.read_csv('iris.csv')
df=file
#查看数据类别个数
df['Species'].value_counts()
df['Species_num'] = df['Species'].map({'setosa':1,'versicolor':2,'virginica':3})
df = df.drop(columns = ['Species'])
df.head()
X = df.loc[:, ["Sepal.Length","Sepal.Width","Petal.Length","Petal.Width"]]
y = df.loc[:, ["Species_num"]]
le = LabelEncoder()
y = le.fit_transform(y) # 类标整数化
# 划分训练集合测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=1)
pipe_svc = Pipeline([('scl', StandardScaler()), ('clf', SVC(random_state=1))])
pipe_svc.fit(X_train, y_train)
y_pred = pipe_svc.predict(X_test)
最后就是混淆矩阵的可视化了哈
# 混淆矩阵并可视化
confmat = confusion_matrix(y_true=y_test, y_pred=y_pred) # 输出混淆矩阵
print(confmat)
fig, ax = plt.subplots(figsize=(2.5, 2.5))
ax.matshow(confmat, cmap=plt.cm.Blues, alpha=0.3)
for i in range(confmat.shape[0]):
for j in range(confmat.shape[1]):
ax.text(x=j, y=i, s=confmat[i, j], va='center', ha='center')
plt.xlabel('predicted label')
plt.ylabel('true label')
plt.show()
# 召回率、准确率、F1
print('precision:%.3f' % precision_score(y_true=y_test, y_pred=y_pred,average= 'macro'))
print('recall:%.3f' % recall_score(y_true=y_test, y_pred=y_pred,average= 'macro'))
print('F1:%.3f' % f1_score(y_true=y_test, y_pred=y_pred,average= 'macro'))
友情链接
欢迎拜访(https://github.com/datawhalechina/hands-on-data-analysis/)