模型评估、选择与验证——性能度量

分类问题中的性能度量

accuracy_score

from sklearn.metrics import accuracy_score
y_true=[1,1,1,1,1,0,0,0,0,0]
y_pred=[0,0,1,1,0,0,1,1,0,0]
print('Accuracy Score(normalize=True):%s\nAccuracy Score(normalize=True):%s'%
      (accuracy_score(y_true,y_pred,normalize=True),accuracy_score(y_true,y_pred,normalize=False)))

precision_score

from sklearn.metrics import accuracy_score,precision_score
y_true=[1,1,1,1,1,0,0,0,0,0]
y_pred=[0,0,1,1,0,0,0,0,0,0]
print('Accuracy Score:%s\nPrecision Score:%s'%
      (accuracy_score(y_true,y_pred,normalize=True),precision_score(y_true,y_pred)))

recall_score

from sklearn.metrics import accuracy_score,precision_score,recall_score
y_true=[1,1,1,1,1,0,0,0,0,0]
y_pred=[0,0,1,1,0,0,0,0,0,0]
print('Accuracy Score:%s\nPrecision Score:%s\nRecall Score:%s'%
      (accuracy_score(y_true,y_pred,normalize=True),precision_score(y_true,y_pred),recall_score(y_true,y_pred)))

f1_score

from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score
y_true=[1,1,1,1,1,0,0,0,0,0]
y_pred=[0,0,1,1,0,0,0,0,0,0]
print('Accuracy Score:%s\nPrecision Score:%s\nRecall Score:%s\nF1 Score:%s'%
      (accuracy_score(y_true,y_pred,normalize=True),precision_score(y_true,y_pred),
       recall_score(y_true,y_pred),f1_score(y_true,y_pred)))

fbeta_score

from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score,fbeta_score
y_true=[1,1,1,1,1,0,0,0,0,0]
y_pred=[0,0,1,1,0,0,0,0,0,0]
print('Accuracy Score:%s\nPrecision Score:%s\nRecall Score:%s\nF1 Score:%s\n'%
      (accuracy_score(y_true,y_pred,normalize=True),precision_score(y_true,y_pred),
       recall_score(y_true,y_pred),f1_score(y_true,y_pred)))
print('Fbeta Score(beta=0.001):%s\nFbeta Score(beta=1):%s\nFbeta Score(beta=10):%s\nFbeta Score(beta=10000):%s'%
      (fbeta_score(y_true,y_pred,beta=0.001),
       fbeta_score(y_true,y_pred,beta=1),
        fbeta_score(y_true,y_pred,beta=10),
        fbeta_score(y_true,y_pred,beta=10000)))

classification_report

from sklearn.metrics import classification_report
y_true=[1,1,1,1,1,0,0,0,0,0]
y_pred=[0,0,1,1,0,0,0,0,0,0]
print('Classification Report:\n',classification_report(y_true,y_pred,target_names=['class_0','class_1']))

confusion_matrix

from sklearn.metrics import confusion_matrix
y_true=[1,1,1,1,1,0,0,0,0,0]
y_pred=[0,0,1,1,0,0,0,0,0,0]
print('Confusion Matrix:\n',confusion_matrix(y_true,y_pred,labels=[0,1]))

precision_recall_curve

from sklearn.metrics import precision_recall_curve
from sklearn.datasets import load_iris
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.preprocessing import label_binarize
import numpy as np

#加载数据
iris=load_iris()
X,y=iris.data,iris.target

#二元化标记
y=label_binarize(y,classes=[0,1,2])
n_classes=y.shape[1]

#添加噪声
np.random.seed(0)
n_samples,n_features=X.shape
X=np.c_[X,np.random.randn(n_samples,200*n_features)]

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.5,random_state=0)

#训练模型
clf=OneVsRestClassifier(SVC(kernel='linear',probability=True,random_state=0))
clf.fit(X_train,y_train)
y_score=clf.fit(X_train,y_train).decision_function(X_test)

#获取P-R
fig=plt.figure()
ax=fig.add_subplot(1,1,1)
precision=dict()
recall=dict()
for i in range(n_classes):
    precision[i],recall[i],_=precision_recall_curve(y_test[:,i],y_score[:,i])
    ax.plot(recall[i],precision[i],label='target=%s'%i)
ax.set_xlabel('Recall Score')
ax.set_ylabel('Precision Score')
ax.set_title('P-R')
ax.legend(loc='best')
ax.set_xlim(0,1.1)
ax.set_ylim(0,1.1)
ax.grid()
plt.show()

roc_curve & roc_auc_score

from sklearn.metrics import roc_curve,roc_auc_score
from sklearn.datasets import load_iris
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.preprocessing import label_binarize
import numpy as np

#加载数据
iris=load_iris()
X,y=iris.data,iris.target

#二元化标记
y=label_binarize(y,classes=[0,1,2])
n_classes=y.shape[1]

#添加噪声
np.random.seed(0)
n_samples,n_features=X.shape
X=np.c_[X,np.random.randn(n_samples,200*n_features)]

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.5,random_state=0)

#训练模型
clf=OneVsRestClassifier(SVC(kernel='linear',probability=True,random_state=0))
clf.fit(X_train,y_train)
y_score=clf.fit(X_train,y_train).decision_function(X_test)

#获取ROC
fig=plt.figure()
ax=fig.add_subplot(1,1,1)
for i in range(n_classes):
    fpr,tpr,_=roc_curve(y_test[:,i],y_score[:,i])
    roc_auc=roc_auc_score(y_test[:,i],y_score[:,i])
    ax.plot(fpr,tpr,label='target=%s,auc=%s'%(i,roc_auc))
ax.plot([0,1],[0,1],'k--')
ax.set_xlabel('FPR')
ax.set_ylabel('TPR')
ax.set_title('ROC')
ax.legend(loc='best')
ax.set_xlim(0,1.1)
ax.set_ylim(0,1.1)
ax.grid()
plt.show()

回归问题的性能度量

mean_absolute_error

from sklearn.metrics import mean_absolute_error

y_true=[1,1,1,1,1,2,2,2,0,0]
y_pred=[0,0,0,1,1,1,0,0,0,0]

print('Mean Absolute Error:',mean_absolute_error(y_true,y_pred))

mean_squared_error

from sklearn.metrics import mean_squared_error,mean_absolute_error

print('Mean Absolute Error:%s\nMean Squared Error:%s'%
    (mean_absolute_error(y_true,y_pred),mean_squared_error(y_true,y_pred)))

验证曲线

import matplotlib.pyplot as plt
import numpy as np
from sklearn.datasets import load_digits
from sklearn.svm import LinearSVC
from sklearn.model_selection import validation_curve

#加载数据
digits=load_digits()
X,y=digits.data,digits.target

#获取验证曲线
param_name='C'
param_range=np.logspace(-2,2)
train_scores,test_scores=validation_curve(LinearSVC(),X,y,param_name=param_name,param_range=param_range,cv=10,
                                          scoring='accuracy')

#对每个C,获取10折交叉上的预测得分上的均值和方差
train_scores_mean=np.mean(train_scores,axis=1)
train_scores_std=np.std(train_scores,axis=1)
test_scores_mean=np.mean(test_scores,axis=1)
test_scores_std=np.std(test_scores,axis=1)

#绘图
fig=plt.figure()
ax=fig.add_subplot(1,1,1)

ax.semilogx(param_range,train_scores_mean,label='Training Accuracy',color='r')
ax.fill_between(param_range,train_scores_mean-train_scores_std,train_scores_mean+train_scores_std,alpha=0.2,color='r')

ax.semilogx(param_range,test_scores_mean,label='Testing Accuracy',color='g')
ax.fill_between(param_range,test_scores_mean-test_scores_std,test_scores_mean+test_scores_std,alpha=0.2,color='g')

ax.set_title('Validation Curve with LinearSVC')
ax.set_xlabel('C')
ax.set_ylabel('Score')
ax.set_ylim(0,1.1)
ax.legend(loc='best')
plt.show()

学习曲线

import matplotlib.pyplot as plt
import numpy as np
from sklearn.datasets import load_digits
from sklearn.svm import LinearSVC
from sklearn.model_selection import learning_curve

#加载数据
digits=load_digits()
X,y=digits.data,digits.target

#获取学习曲线
train_sizes=np.linspace(0.1,1.0,endpoint=True,dtype='float')
abs_trains_sizes,train_scores,test_scores=learning_curve(LinearSVC(),X,y,cv=10,scoring='accuracy',
                                                         train_sizes=train_sizes)

#对每个C,获取10折交叉上的预测得分上的均值和方差
train_scores_mean=np.mean(train_scores,axis=1)
train_scores_std=np.std(train_scores,axis=1)
test_scores_mean=np.mean(test_scores,axis=1)
test_scores_std=np.std(test_scores,axis=1)

#绘图
fig=plt.figure()
ax=fig.add_subplot(1,1,1)

ax.plot(abs_trains_sizes,train_scores_mean,label='Training Accuracy',color='r')
ax.fill_between(param_range,train_scores_mean-train_scores_std,train_scores_mean+train_scores_std,alpha=0.2,color='r')

ax.plot(abs_trains_sizes,test_scores_mean,label='Testing Accuracy',color='g')
ax.fill_between(param_range,test_scores_mean-test_scores_std,test_scores_mean+test_scores_std,alpha=0.2,color='g')

ax.set_title('Learning Curve with LinearSVC')
ax.set_xlabel('Sample Nums')
ax.set_ylabel('Score')
ax.set_ylim(0,1.1)
ax.legend(loc='best')
plt.show()
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值