模型的评价很多,对于多分类模型,如准确率、精准率、召回率、F1值、ROC曲线、AUC值等而回归问题应该采用其他合适的评估指标,例如最常用的MSE值、r2等。
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import datasets
from sklearn import svm
### 1. 测试集上分类准确性评价
X, y = datasets.load_iris(return_X_y=True)
# 随机打乱顺序,再按比例划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0)
# 训练集上训练模型
clf = svm.SVC(kernel='linear', C=1).fit(X_train, y_train)
# 测试集上评价模型
clf.score(X_test, y_test)
### 2. 交叉验证中模型评价
## 所有的样本分成cv份,n-1份是训练集,1份为测试集
from sklearn.model_selection import cross_val_score
clf = svm.SVC(kernel='linear', C=1, random_state=0)
# accuracy
scores = cross_val_score(clf, X, y, cv=5)
print(scores) # 五个值
# 统计平均值和方差
print("%0.2f accuracy with a standard deviation of %0.2f" % (scores.mean(), scores.std()))
## permutation_test_score检测分类分数的重要性
from sklearn.svm import SVC
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import permutation_test_score
clf = SVC(kernel="linear", random_state=7)
cv = StratifiedKFold(2, shuffle=True, random_state=0)
score_iris, perm_scores_iris, pvalue_iris = permutation_test_score(
clf, X, y, scoring="accuracy", cv=cv, n_permutations=1000
)
print(score_iris)
#print(perm_scores_iris)
print(pvalue_iris)
# 作图
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
ax.hist(perm_scores_iris, bins=20, density=True)
ax.axvline(score_iris, ls="--", color="r")
score_label = f"Score on original\ndata: {score_iris:.2f}\n(p-value: {pvalue_iris:.3f})"
ax.text(0.7, 10, score_label, fontsize=12)
ax.set_xlabel("Accuracy score")
_ = ax.set_ylabel("Probability")
## 样本顺序打乱再划分,重复n_splits次,
from sklearn.model_selection import ShuffleSplit
# n_samples = X.shape[0]
cv = ShuffleSplit(n_splits=5, test_size=0.3, random_state=0)
scores = cross_val_score(clf, X, y, cv=cv)
print(scores)
## cross_validate 函数指定多个评价指标分数
from sklearn.model_selection import cross_validate
from sklearn.metrics import recall_score
from sklearn import metrics
scoring = ['precision_macro', 'recall_macro','f1_macro'] #
clf = svm.SVC(kernel='linear', C=1, random_state=0)
scores = cross_validate(clf, X, y, scoring=scoring)
print(sorted(scores.keys()))
print(scores['test_precision_macro'])
print(scores['test_recall_macro'])
print(scores['test_f1_macro'])
## 自定义评价指标
from sklearn.model_selection import cross_validate
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import make_scorer
# A sample toy binary classification dataset
X, y = datasets.make_classification(n_classes=2, random_state=0)
clf = svm.LinearSVC(random_state=0)
def tn(y_true, y_pred): return confusion_matrix(y_true, y_pred)[0, 0]
def fp(y_true, y_pred): return confusion_matrix(y_true, y_pred)[0, 1]
def fn(y_true, y_pred): return confusion_matrix(y_true, y_pred)[1, 0]
def tp(y_true, y_pred): return confusion_matrix(y_true, y_pred)[1, 1]
scoring = {'tp': make_scorer(tp), 'tn': make_scorer(tn),
'fp': make_scorer(fp), 'fn': make_scorer(fn),
'accuracy': make_scorer(accuracy_score),
'rec_macro': make_scorer(recall_score, average='macro')}
# return_train_score=True:返回训练集上的结果
cv_results = cross_validate(clf.fit(X, y), X, y,
scoring=scoring, cv=5,return_train_score=True)
print(sorted(cv_results.keys()))
print(cv_results['test_fn'])
print(cv_results['test_fp'])
print(cv_results['test_rec_macro'])
print(cv_results['test_accuracy'])
print(cv_results['train_accuracy'])
参考:
https://scikit-learn.org/stable/modules/cross_validation.html#permutation-test-score