内容目录
一、岭回归建模二、线性回归建模三、模型使用joblib原文见公众号:python宝
一、岭回归
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error
from sklearn.externals import joblib
def linear3():
#岭回归对波士顿房价进行预测
#1、加载数据
boston = load_boston()
print("特征数量:\n", boston.data.shape)
#2 、划分数据集
x_train, x_test, y_train, y_test = train_test_split(boston.data, boston.target, random_state=22)
# 3、标准化
transfer = StandardScaler()
x_train = transfer.fit_transform(x_train)
x_test = transfer.transform(x_test)
# 4、预估器
estimator = Ridge(alpha=0.5, max_iter=10000)
estimator.fit(x_train, y_train)
# 保存模型
joblib.dump(estimator, ".\my_ridge.pkl")
# 加载模型
# estimator = joblib.load("my_ridge.pkl")
# 5、得出模型
print("岭回归-权重系数为:\n", estimator.coef_)
print("岭回归-偏置为:\n", estimator.intercept_)
# 6、模型评估
y_predict = estimator.predict(x_test)
print("预测房价:\n", y_predict)
error = mean_squared_error(y_test, y_predict)
print("岭回归-均方误差为:\n", error)
return None
if __name__ == "__main__":
linear3()
特征数量:
(506, 13)
岭回归-权重系数为:
[-0.64193209 1.13369189 -0.07675643 0.74427624 -1.93681163 2.71424838
-0.08171268 -3.27871121 2.45697934 -1.81200596 -1.74659067 0.87272606
-3.90544403]
岭回归-偏置为:
22.62137203166228
预测房价:
[28.22536271 31.50554479 21.13191715 32.65799504 20.02127243 19.07245621
21.10832868 19.61646071 19.63294981 32.85629282 20.99521805 27.5039205
15.55295503 19.79534148 36.87534254 18.80312973 9.39151837 18.50769876
30.66823994 24.3042416 19.08011554 34.10075629 29.79356171 17.51074566
34.89376386 26.53739131 34.68266415 27.42811508 19.08866098 14.98888119
30.85920064 15.82430706 37.18223651 7.77072879 16.25978968 17.17327251
7.44393003 19.99708381 40.57013125 28.94670553 25.25487557 17.75476957
38.77349313 6.87948646 21.78603146 25.27475292 20.4507104 20.47911411
17.25121804 26.12109499 8.54773286 27.48936704 30.58050833 16.56570322
9.40627771 35.52573005 32.2505845 21.8734037 17.61137983 22.08222631
23.49713296 24.09419259 20.15174912 38.49803353 24.63926151 19.77214318
13.95001219 6.7578343 42.03931243 21.92262496 16.89673286 22.59476215
40.75560357 21.42352637 36.88420001 27.18201696 21.03801678 20.39349944
25.35646095 22.27374662 31.142768 20.39361408 23.99587493 31.54490413
26.76213545 20.8977756 29.0705695 21.99584672 26.30581808 20.10938421
25.47834262 24.08620166 19.90788343 16.41215513 15.26575844 18.40106165
24.82285704 16.61995784 20.87907604 26.70640134 20.75218143 17.88976552
24.27287641 23.36686439 21.57861455 36.78815164 15.88447635 21.47747831
32.80013402 33.71367379 20.61690009 26.83175792 22.69265611 17.38149366
21.67395385 21.67101719 27.6669245 25.06785897 23.73251233 14.65355067
15.19441045 3.81755887 29.1743764 20.68219692 22.33163756 28.01411044
28.55668351]
岭回归-均方误差为:
20.64177160618091
二、线性回归
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge,LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.externals import joblib
from sklearn.metrics import confusion_matrix,roc_curve, auc, recall_score, classification_report
def linear3():
#岭回归对波士顿房价进行预测
#1、加载数据
boston = load_boston()
print("特征数量:\n", boston.data.shape)
#2 、划分数据集
x_train, x_test, y_train, y_test = train_test_split(boston.data, boston.target, random_state=22)
# 3、标准化
transfer = StandardScaler()
x_train = transfer.fit_transform(x_train)
x_test = transfer.transform(x_test)
# 4、预估器
estimator = LinearRegression()
estimator.fit(x_train, y_train)
# 保存模型
joblib.dump(estimator, ".\my_Line.pkl")
# 加载模型
# estimator = joblib.load("my_Line.pkl")
# 5、模型评估
y_predict = estimator.predict(x_test)
# 训练集的混淆矩阵
error = mean_squared_error(y_test, y_predict)
print("线性-均方误差为:\n", error)
return None
if __name__ == "__main__":
linear3()
特征数量:
(506, 13)
线性-均方误差为:
20.6275137630954
三、模型使用
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge,LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.externals import joblib
import numpy as np
np.set_printoptions(suppress=True)
boston = load_boston()
x_train, x_test, y_train, y_test = train_test_split(boston.data, boston.target, random_state=22)
type(x_test[1])
type(x_test)
# 加载模型
estimator = joblib.load(".\my_Line.pkl")
transfer = StandardScaler()
a = np.array([[6.6420e-02, 0.0000e+00, 4.0500e+00, 0.0000e+00, 5.1000e-01,
6.8600e+00, 7.4400e+01, 2.9153e+00, 5.0000e+00, 2.9600e+02,
1.6600e+01, 3.9127e+02, 6.9200e+00]])
a = transfer.fit_transform(a)
pre1 = estimator.predict(a)
true1 = y_test[1]
print(pre1,true1)
[22.62137203] 29.9
About Me:小婷儿
● 本文作者:小婷儿,专注于python、数据分析、数据挖掘、机器学习相关技术,也注重技术的运用
● 作者博客地址:https://blog.csdn.net/u010986753
● 本系列题目来源于作者的学习笔记,部分整理自网络,若有侵权或不当之处还请谅解
● 版权所有,欢迎分享本文,转载请保留出处
● 微信:tinghai87605025 联系我加微信群
● QQ:87605025
● QQ交流群py_data :483766429
● 公众号:python宝 或 DB宝
● 提供OCP、OCM和高可用最实用的技能培训
● 题目解答若有不当之处,还望各位朋友批评指正,共同进步
如果您觉得到文章对您有帮助,欢迎赞赏哦!有您的支持,小婷儿一定会越来越好!