数据介绍
回归性能评估
均⽅误差(Mean Squared Error)MSE)评价机制:
- 注:yi为预测值,y_ 为真实值
sklearn.metrics.mean_squared_error(y_true, y_pred)
- 均⽅误差回归损失
- y_true : 真实值
- y_pred : 预测值
- return : 浮点数结果
代码实现
# 导入模块
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, SGDRegressor, RidgeCV, Ridge
from sklearn.metrics import mean_squared_error
# 消除警告
import warnings
warnings.filterwarnings('ignore')
正规方程法
def linear_model1():
"""
线性回归: 正规⽅程
return: None
"""
# 1.获取数据
data = load_boston()
# 2.数据集划分
x_train, x_test, y_train, y_test = train_test_split(data.data, data.target, random_state=22)
# 3.特征⼯程-标准化
transfer = StandardScaler()
x_train = transfer.fit_transform(x_train)
x_test = transfer.fit_transform(x_test)
# 4.机器学习-线性回归(正规⽅程)
estimator = LinearRegression()
estimator.fit(x_train, y_train)
# 5.模型评估
# 5.1 获取系数等值
y_predict = estimator.predict(x_test)
print("预测值为:\n", y_predict)
print("模型中的系数为:\n", estimator.coef_)
print("模型中的偏置为:\n", estimator.intercept_)
# 5.2 评价
# 均⽅误差
error = mean_squared_error(y_test, y_predict)
print("误差为:\n", error)
return None
linear_model1()
预测值为: [28.14790667 31.30481159 20.5173895 31.4803076 19.01576648 18.26058425 20.57439825 18.45232382 18.46065155 32.93661269 20.3603692 27.24886071 14.81691426 19.20872297 37.01503458 18.32036009 7.71389628 17.56196944 30.18543811 23.60655873 18.14917545 33.84385342 28.48976083 16.9967041 34.76065063 26.22246312 34.83857168 26.62310118 18.64402278 13.21154037 30.37364532 14.70785748 37.18173708 8.88049446 15.06699441 16.14502168 7.19990762 19.17049423 39.56848262 28.23663 24.62411509 16.75182833 37.84465582 5.71770376 21.21547924 24.63882018 18.8561516 19.93416672 15.19839712 26.29892968 7.4274177 27.14300763 29.18745146 16.27895854 7.99799673 35.46394958 32.38905222 20.83161049 16.41464618 20.87141783 22.92150844 23.60828508 19.32245804 38.33751529 23.87463642 18.98494066 12.63480997 6.12915396 41.44675745 21.08894595 16.27561572 21.48546861 40.74502107 20.4839158 36.82098808 27.0452329 19.79437176 19.64484428 24.58763105 21.08454269 30.91968983 19.3326693 22.30088735 31.0904808 26.36418084 20.25648139 28.81879823 20.82632806 26.01779216 19.37871837 24.9599814 22.31091614 18.94468902 18.77414161 14.07143768 17.44450331 24.19727889 15.86077811 20.09007025 26.51946463 20.1336741 17.02456077 23.86647679 22.84428441 21.00754322 36.17169898 14.67959839 20.5656347 32.46704858 33.24183156 19.81162376 26.55899048 20.90676734 16.42301853 20.76605527 20.54658755 26.86304808 24.14176193 23.23824644 13.81640493 15.37727091 2.79513898 28.89744167 19.80407672 21.50002831 27.5410586 28.54270527] 模型中的系数为: [-0.64817766 1.14673408 -0.05949444 0.74216553 -1.95515269 2.70902585 -0.07737374 -3.29889391 2.50267196 -1.85679269 -1.75044624 0.87341624 -3.91336869] 模型中的偏置为: 22.62137203166228 误差为: 20.06219399035981
梯度下降法
def linear_model2():
"""
线性回归: 梯度下降法
return:
"""
# 1.获取数据
boston = load_boston()
# 2.数据基本处理
# 2.1 分割数据
x_train, x_test, y_train, y_test = train_test_split(boston.data, boston.target, test_size=0.2)
# 3.特征工程-标准化
transfer = StandardScaler()
x_train = transfer.fit_transform(x_train)
x_test = transfer.fit_transform(x_test)
# 4.机器学习-线性回归
estimator = SGDRegressor(learning_rate="constant",eta0=0.001)
estimator.fit(x_train, y_train)
print("这个模型的偏置是:\n", estimator.intercept_)
print("这个模型的系数是:\n", estimator.coef_)
# 5.模型评估
# 5.1 预测值
y_pre = estimator.predict(x_test)
print("预测值是:\n", y_pre)
# 5.2 均方误差
ret = mean_squared_error(y_test, y_pre)
print("均方误差:\n", ret)
linear_model2()
这个模型的偏置是: [22.53785633] 这个模型的系数是: [-0.5657386 0.70682636 -0.44752191 0.49161809 -1.13923085 3.18284855 0.09292622 -2.51057748 1.46790822 -0.9150509 -1.7153456 1.07308367 -4.03308649] 预测值是: [20.2307036 17.34030784 28.84970725 15.12964942 20.73230342 16.05157047 27.35050076 -0.24123335 32.22109828 18.40122228 19.07441934 31.23622593 18.43376067 13.36870565 41.20227521 27.60613585 20.24730639 27.18075059 26.56568514 -0.06488369 19.35199274 17.61394056 22.2197533 20.55657737 30.84683542 20.11906671 16.06055045 9.28365422 23.29706745 20.70186697 32.86806208 19.51574316 8.12096886 30.87191179 23.30604495 12.78951548 23.52194533 24.18405454 18.31745177 15.50051473 19.11139457 15.41904326 13.52745702 19.08337335 26.76429344 26.91321877 42.26340761 20.92918497 28.94475205 18.96444444 37.32450693 18.35963272 10.28188576 15.08387525 14.53300701 19.12089663 20.67637451 40.10012593 29.36142013 20.52850964 15.2555233 26.04895848 25.20131219 17.60953512 29.93436899 20.727217 34.20539254 16.29439065 22.15055158 18.39997557 28.43965601 9.23233515 19.35695087 22.02136146 21.93292495 38.57661965 17.51551117 17.35349302 32.7796047 21.2880705 21.24059392 31.45720825 15.47318603 25.28692474 23.84024913 34.10699912 19.96598714 25.05772885 31.0689929 20.84722388 26.6405891 25.2961062 23.65868742 23.8624314 28.79786161 30.4138941 2.73242432 30.36074484 43.68867889 24.34402337 21.95117042 27.1913537 ] 均方误差: 32.53165018998423