学习曲线
学习曲线用来选数据量
验证曲线用来选择超参数
学习曲线用于评估多大的样本量用于训练才能达到最佳效果
sklearn.model_selection.learning_curve()
from sklearn.model_selection import learning_curve# 导入学习曲线
# 设置训练集大小
size=np.linspace(0.1,1,10)
train_sizes,train_scores,test_scores=learning_curve(Ridge(),x,y,train_sizes=size,cv=10)
print(train_sizes)
[ 45 91 136 182 227 273 318 364 409 455]
# 绘制学习曲线
plt.scatter(train_sizes,np.mean(train_scores,axis=1))
plt.scatter(train_sizes,np.mean(test_scores,axis=1))
第一个点明显过拟合,不可选
size=np.linspace(0.1,1,100)
size=np.linspace(0.1,1,100)
train_sizes,train_scores,test_scores=learning_curve(Ridge(),x,y,train_sizes=size,cv=10)# 绘制学习曲线
plt.scatter(train_sizes,np.mean(train_scores,axis=1))
plt.scatter(train_sizes,np.mean(test_scores,axis=1))
size=np.linspace(0.1,1,1000)
train_sizes,train_scores,test_scores=learning_curve(Ridge(),x,y,train_sizes=size,cv=10)# 绘制学习曲线
plt.scatter(train_sizes,np.mean(train_scores,axis=1))
plt.scatter(train_sizes,np.mean(test_scores,axis=1))