分类性能量度(Classification-Performance-Measures)
一、准确度的陷阱和混淆矩阵
1、准确度的陷阱
-
对于回归任务,我们前面介绍了均方误差,均方根误差,平均绝对误差,R²误差。但是对于分类任务,我们仅仅介绍了分类准确度这一种评价标准,实际上该评价标准是存在很大问题的,因此我们本章进一步介绍各种分类性能度量。
-
首先,我们介绍一下分类准确度可能存在的问题即陷阱:
2、混淆矩阵
-
在引入其他各种分类指标前,我们首先介绍一个工具:混淆矩阵。
-
对于一个分类算法来说,作用在一组数据上,如何得到混淆矩阵,进一步通过混淆矩阵,我们就能得到各种比分类准确度还要好的分类指标。如下介绍如何得到混淆矩阵:
二、精准率和召回率及F1指标
2.1、精准率与召回率
-
计算方法:
-
几何图形理解:
-
精准率和召回率的优势(相较于分类准确度):
-
手动实现TN/FP/FN/TP/混淆矩阵/精准率/召回率
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
digits = datasets.load_digits()
X = digits.data
y = digits.target.copy()
y[digits.target==9] = 1 # 将所有值为9样本(即极少数样本)标签改为1,非9样本(即极多数样本)标签改为0,实现极度偏斜数据
y[digits.target!=9] = 0
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=666)
log_reg = LogisticRegression(solver='liblinear')
log_reg.fit(X_train,y_train)
y_log_predict = log_reg.predict(X_test)
print(log_reg.score(X_test, y_test))
y_nomodel_predict = np.zeros(len(y_test),dtype='int')
print(sum(y_nomodel_predict == y_test) / len(y_test)) # 注:即使不用回归算法,由于数据偏斜,直接预测全部会0也有90%准确率
0.9755555555555555
0.9
def TN(y_true,y_predict): # 关注的没发生,预测没发生
assert len(y_true)==len(y_predict),'the size of y_true must be equal to y_predict'
return np.sum((y_true==0)&(y_predict==0)) # 利用fancy indexing和位运算,同时为真才计数为1
TN(y_test,y_log_predict)
403
def FP(y_true,y_predict): # 关注的没发生,预测没发生
assert len(y_true)==len(y_predict),'the size of y_true must be equal to y_predict'
return np.sum((y_true==0)&(y_predict==1)) # 利用fancy indexing和位运算,同时为真才计数为1
FP(y_test,y_log_predict)
2
def FN(y_true,y_predict): # 关注的发生,预测没发生
assert len(y_true)==len(y_predict),'the size of y_true must be equal to y_predict'
return np.sum((y_true==1)&(y_predict==0)) # 利用fancy indexing和位运算,同时为真才计数为1
FN(y_test,y_log_predict)
9
def TP(y_true,y_predict): # 关注的发生,预测发生
assert len(y_true)==len(y_predict),'the size of y_true must be equal to y_predict'
return np.sum((y_true==1)&(y_predict==1)) # 利用fancy indexing和位运算,同时为真才计数为1
TP(y_test,y_log_predict)
36
def confusion_matrix(y_true,y_predict): # 混淆矩阵
return np.array([
[TN(y_true,y_predict),FP(y_true,y_predict)],
[FN(y_true,y_predict),TP(y_true,y_predict)],
])
print(confusion_matrix(y_test,y_log_predict))
print(confusion_matrix(y_test,y_nomodel_predict))
[[403 2]
[ 9 36]]
[[405 0]
[ 45 0]]
def precision_score(y_true,y_predict): # 精准率计算函数
tp = int(TP(y_true,y_predict))
fp = int(FP(y_true,y_predict))
try: # 注意TP和FP可能都为0,造成分母为0,此时精准率应该直接为0
return tp / (tp + fp)
except:
return 0.0
print(precision_score(y_test,y_log_predict))
print(precision_score(y_test,y_nomodel_predict))
0.9473684210526315
0.0
def recall_score(y_true,y_predict): # 召回率计算函数
tp = TP(y_true,y_predict)
fn = FN(y_true,y_predict)
try: # 注意TP和FN可能都为0,造成分母为0,此时精准率应该直接为0
return tp/(tp+fn)
except:
return 0.0
print(recall_score(y_test,y_log_predict))
print(recall_score(y_test,y_nomodel_predict))
0.8
0.0
- sklearn实现TN/FP/FN/TP/混淆矩阵/精准率/召回率
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test,y_log_predict)
array([[403, 2],
[ 9, 36]], dtype=int64)
from sklearn.metrics import precision_score
precision_score(y_test,y_log_predict)
0.9473684210526315
from sklearn.metrics import recall_score
recall_score(y_test,y_log_predict)
0.8
2.2、F1指标
def f1_score(precision,recall):
try:
return 2*precision*recall/(precision+recall)
except:
return 0.0
precision = 0.5
recall = 0.5
f1_score(precision,recall)
0.5
precision = 0.1
recall = 0.9
f1_score(precision,recall)
0.18000000000000002
precision = 0.0
recall = 1.0
f1_score(precision,recall)
0.0
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
print(precision_score(y_test,y_log_predict))
print(recall_score(y_test,y_log_predict))
print(f1_score(y_test,y_log_predict)) # f1-score兼顾了精准率和召回率
0.9473684210526315
0.8
0.8674698795180723
2.3、精准率与召回率的平衡
print(log_reg.decision_function(X_test)[:10]) # 调用逻辑回归模型中decision_function获得决策分数
print(log_reg.predict(X_test)[:10]) # 即根据上述决策分数与默认决策边界0判断得到的对应预测值
[-22.05700117 -33.02940957 -16.21334087 -80.3791447 -48.25125396
-24.54005629 -44.39168773 -25.04292757 -0.97829292 -19.7174399 ]
[0 0 0 0 0 0 0 0 0 0]
decision_scores = log_reg.decision_function(X_test) # 获得决策分数
y_log_predict1 = np.array(decision_scores>=-5,dtype='int') # 由接下来三个程序块可以看到,precision与recall是动态平衡的
print(precision_score(y_test,y_log_predict1))
print(recall_score(y_test,y_log_predict1))
print(f1_score(y_test,y_log_predict1))
0.7272727272727273
0.8888888888888888
0.7999999999999999
y_log_predict2 = np.array(decision_scores>=0,dtype='int')
print(precision_score(y_test,y_log_predict2))
print(recall_score(y_test,y_log_predict2))
print(f1_score(y_test,y_log_predict2))
0.9473684210526315
0.8
0.8674698795180723
y_log_predict3 = np.array(decision_scores>=5,dtype='int')
print(precision_score(y_test,y_log_predict3))
print(recall_score(y_test,y_log_predict3))
print(f1_score(y_test,y_log_predict3))
0.96
0.5333333333333333
0.6857142857142858
2.4、精准率和召回率曲线的绘制(如何平衡二者选取最好的决策边界)
- 精准率和召回率曲线的代码实现
precisions = [] # 存储不同决策边界下的精准率
recalls = [] # 存储不同决策边界下的召回率
thresholds = np.arange(np.min(decision_scores),np.max(decision_scores),0.1)
for threshold in thresholds:
y_log_predict_current = np.array(decision_scores>=threshold,dtype='int')
precisions.append(precision_score(y_test,y_log_predict_current))
recalls.append(recall_score(y_test,y_log_predict_current))
plt.plot(thresholds, precisions) # 绘制精准率曲线,随着决策边界不断增大,精准率不断提高
plt.plot(thresholds, recalls) # 绘制召回率曲线,随着决策边界不断增大,召回率不断下降
plt.show()
plt.plot(precisions,recalls) # 绘制精准率-召回率曲线
plt.show()
- scklearn中实现精准率和召回率曲线
from sklearn.metrics import precision_recall_curve
precisions,recalls,thresholds = precision_recall_curve(y_test,decision_scores)
plt.plot(thresholds, precisions[:-1]) # 注意scklearn中精准率和召回率曲线中默认最后一项分别为1和0,故参数不计算少一项,所以这里绘制需去掉
plt.plot(thresholds, recalls[:-1])
plt.show()
plt.plot(precisions,recalls) # 绘制精准率-召回率曲线
plt.show()
三、TPR与FPR及ROC曲线
- 调用自己实现的metrics包绘制ROC曲线
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
digits = datasets.load_digits()
X = digits.data
y = digits.target.copy()
y[digits.target==9] = 1
y[digits.target!=9] = 0
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=666)
log_reg = LogisticRegression(solver='liblinear')
log_reg.fit(X_train,y_train)
decision_scores = log_reg.decision_function(X_test)
from py_for_ML.ClassificationPerformanceMeasure.metrics import FPR,TPR
fprs = [] # 存储不同决策边界下的反正例率率
tprs = [] # 存储不同决策边界下的真正例率率
thresholds = np.arange(np.min(decision_scores),np.max(decision_scores),0.1)
for threshold in thresholds:
y_predict = np.array(decision_scores>=threshold,dtype='int')
fprs.append(FPR(y_test,y_predict))
tprs.append(TPR(y_test,y_predict))
plt.plot(fprs,tprs) # 绘制ROC曲线
plt.show()
- 调用sklearn绘制ROC曲线,及计算ROC曲线面积AUC
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score
sklearn_fprs,sklearn_tprs,sklearn_threshold = roc_curve(y_test,decision_scores)
plt.plot(sklearn_fprs,sklearn_tprs) # 绘制ROC曲线
plt.show()
roc_auc_score(y_test,decision_scores) 计算AUC大小
File "<ipython-input-34-124bbc800dd0>", line 1
roc_auc_score(y_test,decision_scores) 计算AUC大小
^
SyntaxError: invalid syntax
五、多分类问题的评价标准
- 混淆矩阵
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
digits = datasets.load_digits()
X = digits.data
y = digits.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, random_state=666)
log_reg = LogisticRegression(solver='liblinear',multi_class='auto')
log_reg.fit(X_train, y_train)
log_reg.score(X_test, y_test)
y_predict = log_reg.predict(X_test)
confusion_matrix(y_test, y_predict) # 输出该多(十)分类问题的混淆矩阵(10*10)
array([[147, 0, 1, 0, 0, 1, 0, 0, 0, 0],
[ 0, 123, 1, 2, 0, 0, 0, 3, 4, 10],
[ 0, 0, 134, 1, 0, 0, 0, 0, 1, 0],
[ 0, 0, 0, 138, 0, 5, 0, 1, 5, 0],
[ 2, 5, 0, 0, 139, 0, 0, 3, 0, 1],
[ 1, 3, 1, 0, 0, 146, 0, 0, 1, 0],
[ 0, 2, 0, 0, 0, 1, 131, 0, 2, 0],
[ 0, 0, 0, 1, 0, 0, 0, 132, 1, 2],
[ 1, 9, 2, 3, 2, 4, 0, 0, 115, 4],
[ 0, 1, 0, 5, 0, 3, 0, 2, 2, 134]], dtype=int64)
cfm = confusion_matrix(y_test, y_predict) # 可视化该混淆矩阵,
plt.matshow(cfm, cmap=plt.cm.gray) # 参数cmap为指定绘制颜色为灰度颜色,数值越高越白,可以发现对角线即关注发生,预测也发生数量最多
plt.show()
row_sums = np.sum(cfm, axis=1) # 在列的方向求和,即求出每一行和,即发生标签为i事件数量和
err_matrix = cfm / row_sums # 计算预测事件犯错的百分比
np.fill_diagonal(err_matrix, 0) # 去除对角线准确率,防止颜色过百,与其他无法区分
plt.matshow(err_matrix, cmap=plt.cm.gray) # 如下图(1,9)和(8,1)代表将很多真值为1错误预测为9,真值为8预测为1
# 从而可以微调(1,9)和(8,1)这个二分类问题中的阈值,或者查看样本标签为1和8的数据查看数据是否有问题,来提高整体多分类问题的准确性
plt.show()
- 精准率、召回率、F1score
from sklearn.metrics import precision_score
precision_score(y_test, y_predict,average='macro') # 注:默认的average='binary'用于计算二分类问题,故需要修改默认值适用于多分类
0.9317444200458842
from sklearn.metrics import recall_score
recall_score(y_test, y_predict,average='macro')
0.9312195364786084
from sklearn.metrics import f1_score
f1_score(y_test, y_predict,average='macro')
0.9311365480583508