import sklearn
import numpy as np
from sklearn import datasets, linear_model
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
print(sklearn.__version__)
0.20.3
定义训练和测试函数,训练函数输出拟合误差,测试函数返回预测结果与预测得分
# 模型回归结果
def trainLinearRegression(X, y):
model = linear_model.LinearRegression()
model.fit(X,y)
print('E(|y-wTx)|^2):' + str(np.mean((model.predict(X)-y)**2)))
return model
# 岭回归
def trainRidgeRegression(X,y,coeff):
model = linear_model.Ridge(alpha = coeff)
model.fit(X,y)
print('E(|y-wTx|^2):' + str(np.mean((model.predict(X) - y)**2)))
return model
# lasso回归
def trainLassoRegression(X,y,coeff):
model = linear_model.Lasso(alpha = coeff)
model.fit(X,y)
print('E(|y-wTx|^2):' + str(np.mean((model.predict(X) - y)**2)))
return model
# 模型测试得分
def testModel(X_test, y_test, model):
score = model.score(X_test, y_test)
y_pred = model.predict(X_test)
print('model test score is '+str(score))
return y_pred, score
利用数据波士顿房价预测
data, label = datasets.load_boston(return_X_y = True)
print(data.shape, label.shape)
(506, 13) (506,)
#数据切分
X_train, X_test, y_train, y_test = train_test_split(data, label, test_size = 0.2)
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)
(404, 13) (404,) (102, 13) (102,)
linearModel = trainLinearRegression(X_train, y_train)
ridgeModel = trainRidgeRegression(X_train, y_train, 0.5)
lassoModel = trainLassoRegression(X_train, y_train, 0.01)
E(|y-wTx)|^2):23.035621901695357
E(|y-wTx|^2):23.106381821638468
E(|y-wTx|^2):23.072139176720263
结果可知:线性拟合较好,lasso次之,岭回归拟合相对最差,可能是有了正则化,而牺牲了精度
y_pred_linear, score_linear = testModel(X_test, y_test, linearModel)
y_pred_ridge, score_ridge = testModel(X_test, y_test, ridgeModel)
y_pred_lasso, score_lasso = testModel(X_test, y_test, lassoModel)
model test score is 0.711048675536282
model test score is 0.7073389448825249
model test score is 0.7092400405765255
结果可知:从测试集上看,线性预测最好,岭回归次之,而lasso变成最差
plt.figure()
plt.plot(range(30), y_pred_linear[:30], 'bo-', y_test[:30], 'y.--')
plt.title('linear model prediction', fontsize = 'large')
plt.figure()
plt.plot(range(30), y_pred_ridge[:30], 'r>-', y_test[:30], 'y.--')
plt.title('linear model prediction', fontsize = 'large')
plt.figure()
plt.plot(range(30), y_pred_lasso[:30], 'ks-', y_test[:30], 'y.--')
plt.title('linear model prediction', fontsize = 'large')
plt.show()