定义绘制以训练样本数为横坐标的学习曲线的函数
完整代码
import numpy as np
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split,learning_curve,KFold
from xgboost import XGBRegressor as xgbr
import matplotlib.pyplot as plt
data=load_boston().data
target=load_boston().target
X_train,X_test,y_train,y_test=train_test_split(data,target,test_size=0.3,random_state=420)
def plot_learning_curve(estimator,title,x,y,
ax=None,
ylim=None,
cv=None,
n_jobs=None):
train_sizes,train_scores,test_scores=learning_curve(estimator,x,y,
shuffle=True,
cv=cv,
n_jobs=n_jobs,
# random_state=420,
)
if ax==None:
ax=plt.gca()
ax.set_title(title)
if ylim is not None:
ax.set_ylim(*ylim)
ax.set_xlabel('Training Example')
ax.set_ylabel('score')
ax.grid()
#axis=1:按行计算的均值
ax.plot(train_sizes,np.mean(train_scores,axis=1),'o-',c="r",label="Train__score")
ax.plot(train_sizes,np.mean(test_scores,axis=1),'o-',c="g",label="Train__score")
ax.legend(loc="best")
return ax
cv = KFold(n_splits=5, shuffle = True, random_state=42)
plot_learning_curve(xgbr(n_estimators=100,random_state=420),
"XGB",X_train,y_train,ax=None,cv=cv)
plt.show()
细化学习曲线,找出最佳n_estimators
n_est=range(10,1010,100)
rs=[]
def plot_cvs(model):
for i in n_est:
reg=model(n_estimators=i,random_state=420)
rs.append(cross_val_score(reg,X_train,y_train).mean())
plot_cvs(xgbr)
plt.figure(figsize=(3,4))
plt.title('XGB')
print('n_est:',n_est)
print('rs:',rs)
plt.plot(n_est,rs,c="red",label="XGB")
plt.legend()
plt.show()