验证曲线，学习曲线以及网格搜索

最新推荐文章于 2022-07-22 18:08:53 发布

小柒sy

最新推荐文章于 2022-07-22 18:08:53 发布

阅读量688

点赞数

分类专栏： Python 机器学习

本文链接：https://blog.csdn.net/weixin_41811413/article/details/86561768

版权

Python 同时被 2 个专栏收录

17 篇文章 0 订阅

订阅专栏

机器学习

9 篇文章 0 订阅

订阅专栏

from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from sklearn.model_selection import validation_curve,learning_curve,GridSearchCV

def PolynomialRegression(degree=2,**kwargs):
    return make_pipeline(PolynomialFeatures(degree),LinearRegression(**kwargs))

def make_data(N,err=1.0,rseed=1):
    #随机抽样数据
    rng = np.random.RandomState(rseed)
    x = rng.rand(N,1)**2
    y = 10-1/(x.ravel()+0.1)
    if err>0:
        y+=err*rng.randn(N)
    return x,y

x,y = make_data(40)

x_test = np.linspace(-0.1,1.1,500)[:,np.newaxis]
# plt.scatter(x.ravel(),y,color='black')
# axis = plt.axis()
# for degree in [1,3,5]:
#     y_test = PolynomialRegression(degree).fit(x,y).predict(x_test)
#     plt.plot(x_test.ravel(),y_test,label='degree={0}'.format(degree))
#
# plt.xlim(-0.1,1.0)
# plt.ylim(-2,12)
# plt.legend(loc='best')
# plt.show()


#验证曲线的使用情况
# degree = np.arange(0,21)
#返回训练和验证的准确度
# train_score,val_score = validation_curve(PolynomialRegression(),x,y,'polynomialfeatures__degree',degree,cv=7)
# print(train_score.shape)
# print(val_score)

# plt.plot(degree,np.median(train_score,1),color='blue',label='train score')
# plt.plot(degree,np.median(val_score,1),color='red',label='validation score')
# plt.legend(loc='best')
# plt.ylim(0,1)
# plt.xlabel('degree')
# plt.ylabel('score')

# 通过验证曲线得到最好结果是指数为3的情况，画出指数为3的情况
# plt.scatter(x.ravel(),y)
# lim = plt.axis()
# y_test = PolynomialRegression(3).fit(x,y).predict(x_test)
# plt.plot(x_test.ravel(),y_test,color='red')
# plt.axis(lim)


# 学习曲线的使用案例
# fig,ax = plt.subplots(1,2,figsize=(16,6))
# fig.subplots_adjust(left=0.0625,right=0.95,wspace=0.1)
#这里分别计算指数为2和9的情况，学习曲线是通过增加训练集的数量来观察模型的学习情况，其中train_sizes参数表示训练集的数量
#返回一个集合，集合中的每个值表示每次训练集的数量，还有返回训练和验证的 准确度
# for i,degree in enumerate([2,9]):
#     N,train_lc,val_lc = learning_curve(PolynomialRegression(degree),x,y,cv=7,train_sizes=np.linspace(0.3,1,25))
#     print(N)
#     ax[i].plot(N,np.mean(train_lc,1),color='blue',label='training score')
#     ax[i].plot(N,np.mean(val_lc,1),color='red',label='validation score')
#     ax[i].hlines(np.mean([train_lc[-1],val_lc[-1]]),N[0],N[-1],color='green',linestyle='dashed')
#
#     ax[i].set_ylim(0,1)
#     ax[i].set_xlim(N[0],N[-1])
#     ax[i].set_xlabel('training size')
#     ax[i].set_ylabel('score')
#     ax[i].set_title('degree={0}'.format(degree),size=14)
#     ax[i].legend(loc='best')
#
# plt.show()


#网格使用案例
param_grid = {'polynomialfeatures__degree':np.arange(21),
              'linearregression__fit_intercept':[True,False],
              'linearregression__normalize':[True,False]}
grid = GridSearchCV(PolynomialRegression(),param_grid,cv=7)
grid.fit(x,y)
#得到最佳参数
print(grid.best_params_)
#得到最佳模型
model = grid.best_estimator_
plt.scatter(x.ravel(),y)
lim = plt.axis()
y_test = model.fit(x,y).predict(x_test)
plt.plot(x_test.ravel(),y_test,hold=True)
plt.axis(lim)
plt.show()

小柒sy

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
验证曲线，学习曲线以及网格搜索

from sklearn.pipeline import make_pipelinefrom sklearn.preprocessing import PolynomialFeaturesfrom sklearn.linear_model import LinearRegressionimport numpy as npimport matplotlib.pyplot as pltfro...
复制链接

扫一扫