code
#实现混淆矩阵,精准率和召回率
import numpy as np
from sklearn import datasets
digits = datasets.load_digits()
X = digits.data
y = digits.target.copy()
y[digits.target == 9] = 1
y[digits.target != 9] = 0
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=666)
from sklearn.linear_model import LogisticRegression
log_reg = LogisticRegression()
log_reg.fit(X_train,y_train)
decision_score = log_reg.decision_function(X_test)
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
precisions = []
recalls = []
thresholds = np.arange(np.min(decision_score),np.max(decision_score))
# print(thresholds)
for threshold in thresholds:
y_predict = np.array(decision_score >= threshold,dtype='int')
precisions.append(precision_score(y_test,y_predict))
recalls.append(recall_score(y_test,y_predict))
绘制曲线
#绘制曲线
import matplotlib.pyplot as plt
plt.plot(thresholds,precisions)
plt.plot(thresholds,recalls)
plt.show()
#precision-recall曲线
plt.plot(precisions,recalls)
#scikit-learn中的precision-recall 曲线
from sklearn.metrics import precision_recall_curve
precisions,recalls,thresholds = precision_recall_curve(y_test,decision_score)
precisions.shape
(149,)
recalls.shape
(149,)
thresholds.shape
(148,)
plt.plot(thresholds,precisions[:-1])
plt.plot(thresholds,recalls[:-1])
plt.plot(precisions,recalls)
外面曲线模型优于里面的模型,recall和precisoin都大于里面的。。
实际上经常用ROC曲线来判断模型优劣