from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from sklearn.model_selection import validation_curve,learning_curve,GridSearchCV
def PolynomialRegression(degree=2,**kwargs):
return make_pipeline(PolynomialFeatures(degree),LinearRegression(**kwargs))
def make_data(N,err=1.0,rseed=1):
#随机抽样数据
rng = np.random.RandomState(rseed)
x = rng.rand(N,1)**2
y = 10-1/(x.ravel()+0.1)
if err>0:
y+=err*rng.randn(N)
return x,y
x,y = make_data(40)
x_test = np.linspace(-0.1,1.1,500)[:,np.newaxis]
# plt.scatter(x.ravel(),y,color='black')
# axis = plt.axis()
# for degree in [1,3,5]:
# y_test = PolynomialRegression(degree).fit(x,y).predict(x_test)
# plt.plot(x_test.ravel(),y_test,label='degree={0}'.format(degree))
#
# plt.xlim(-0.1,1.0)
# plt.ylim(-2,12)
# plt.legend(loc='best')
# plt.show()
#验证曲线的使用情况
# degree = np.arange(0,21)
#返回训练和验证的准确度
# train_score,val_score = validation_curve(PolynomialRegression(),x,y,'polynomialfeatures__degree',degree,cv=7)
# print(train_score.shape)
# print(val_score)
# plt.plot(degree,np.median(train_score,1),color='blue',label='train score')
# plt.plot(degree,np.median(val_score,1),color='red',label='validation score')
# plt.legend(loc='best')
# plt.ylim(0,1)
# plt.xlabel('degree')
# plt.ylabel('score')
# 通过验证曲线得到最好结果是指数为3的情况,画出指数为3的情况
# plt.scatter(x.ravel(),y)
# lim = plt.axis()
# y_test = PolynomialRegression(3).fit(x,y).predict(x_test)
# plt.plot(x_test.ravel(),y_test,color='red')
# plt.axis(lim)
# 学习曲线的使用案例
# fig,ax = plt.subplots(1,2,figsize=(16,6))
# fig.subplots_adjust(left=0.0625,right=0.95,wspace=0.1)
#这里分别计算指数为2和9的情况,学习曲线是通过增加训练集的数量来观察模型的学习情况,其中train_sizes参数表示训练集的数量
#返回一个集合,集合中的每个值表示每次训练集的数量,还有返回训练和验证的 准确度
# for i,degree in enumerate([2,9]):
# N,train_lc,val_lc = learning_curve(PolynomialRegression(degree),x,y,cv=7,train_sizes=np.linspace(0.3,1,25))
# print(N)
# ax[i].plot(N,np.mean(train_lc,1),color='blue',label='training score')
# ax[i].plot(N,np.mean(val_lc,1),color='red',label='validation score')
# ax[i].hlines(np.mean([train_lc[-1],val_lc[-1]]),N[0],N[-1],color='green',linestyle='dashed')
#
# ax[i].set_ylim(0,1)
# ax[i].set_xlim(N[0],N[-1])
# ax[i].set_xlabel('training size')
# ax[i].set_ylabel('score')
# ax[i].set_title('degree={0}'.format(degree),size=14)
# ax[i].legend(loc='best')
#
# plt.show()
#网格使用案例
param_grid = {'polynomialfeatures__degree':np.arange(21),
'linearregression__fit_intercept':[True,False],
'linearregression__normalize':[True,False]}
grid = GridSearchCV(PolynomialRegression(),param_grid,cv=7)
grid.fit(x,y)
#得到最佳参数
print(grid.best_params_)
#得到最佳模型
model = grid.best_estimator_
plt.scatter(x.ravel(),y)
lim = plt.axis()
y_test = model.fit(x,y).predict(x_test)
plt.plot(x_test.ravel(),y_test,hold=True)
plt.axis(lim)
plt.show()
验证曲线,学习曲线以及网格搜索
最新推荐文章于 2022-07-22 18:08:53 发布