多分类问题中的混淆矩阵
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
digits = datasets.load_digits()
X = digits.data
y = digits.target
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, random_state=666)
from sklearn.linear_model import LogisticRegression
log_reg = LogisticRegression()
log_reg.fit(X_train, y_train)
log_reg.score(X_test, y_test)
0.93115438108484
y_predict = log_reg.predict(X_test)
from sklearn.metrics import precision_score
precision_score(y_test, y_predict)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-6-ae107c9e86ef> in <module>()
1 from sklearn.metrics import precision_score
2
----> 3 precision_score(y_test, y_predict)
~\Anaconda3\lib\site-packages\sklearn\metrics\classification.py in precision_score(y_true, y_pred, labels, pos_label, average, sample_weight)
1259 average=average,
1260 warn_for=('precision',),
-> 1261 sample_weight=sample_weight)
1262 return p
1263
~\Anaconda3\lib\site-packages\sklearn\metrics\classification.py in precision_recall_fscore_support(y_true, y_pred, beta, labels, pos_label, average, warn_for, sample_weight)
1038 else:
1039 raise ValueError("Target is %s but average='binary'. Please "
-> 1040 "choose another average setting." % y_type)
1041 elif pos_label not in (None, 1):
1042 warnings.warn("Note that pos_label (set to %r) is ignored when "
ValueError: Target is multiclass but average='binary'. Please choose another average setting.
precision_score(y_test, y_predict, average="micro")
0.93115438108484
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test, y_predict)
array([[147, 0, 1, 0, 0, 1, 0, 0, 0, 0],
[ 0, 123, 1, 2, 0, 0, 0, 3, 4, 10],
[ 0, 0, 134, 1, 0, 0, 0, 0, 1, 0],
[ 0, 0, 0, 138, 0, 5, 0, 1, 5, 0],
[ 2, 5, 0, 0, 139, 0, 0, 3, 0, 1],
[ 1, 3, 1, 0, 0, 146, 0, 0, 1, 0],
[ 0, 2, 0, 0, 0, 1, 131, 0, 2, 0],
[ 0, 0, 0, 1, 0, 0, 0, 132, 1, 2],
[ 1, 9, 2, 3, 2, 4, 0, 0, 115, 4],
[ 0, 1, 0, 5, 0, 3, 0, 2, 2, 134]], dtype=int64)
cfm = confusion_matrix(y_test, y_predict)
plt.matshow(cfm, cmap=plt.cm.gray)
plt.show()
row_sums = np.sum(cfm, axis=1)
err_matrix = cfm / row_sums
np.fill_diagonal(err_matrix, 0)
plt.matshow(err_matrix, cmap=plt.cm.gray)
plt.show()
np.fill_diagonal(cfm, 0)
plt.matshow(cfm, cmap=plt.cm.gray)
plt.show()