1. 数据集 —— 波士顿房价
给定的这些特征,是专家们得出的影响房价的结果属性。我们此阶段不需要自己去探究特征是否有用,只需要使用这些特征。到后面量化很多特征需要我们自己去寻找
2. 实现
2.1 代码
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression,SGDRegressor
from sklearn.metrics import mean_squared_error
def linear():
'''
正规方程的优化方法对波士顿房价进行预测
:return:
'''
# 1. 获取数据集
boston = load_boston()
# 2. 划分数据集
x_train, x_test, y_train, y_test = train_test_split(boston.data, boston.target, test_size=0.2, random_state=22)
# 3. 特征工程——标准化
transfer = StandardScaler()
x_train = transfer.fit_transform(x_train)
x_test = transfer.transform(x_test)
# 4. 线性回归预估器
estimator = LinearRegression()
estimator.fit(x_train, y_train)
# 5. 得到模型
print('权重系数:\n', estimator.coef_) # 系数w
print('偏置为:\n', estimator.intercept_) # 偏移量b
# 6. 模型优化(评价指标,均方差MSE)
y_predict = estimator.predict(x_test)
print('预测房价:\n', y_predict)
mse = mean_squared_error(y_test, y_predict)
print('正规方程-均方误差为:\n', mse)
return None
def SGD_linear():
'''
梯度下降的优化方式对波士顿房价进行预测
:return:
'''
boston = load_boston()
x_train, x_test, y_train, y_test = train_test_split(boston.data, boston.target, test_size=0.2, random_state=22)
transfer = StandardScaler()
x_train = transfer.fit_transform(x_train)
x_test = transfer.transform(x_test)
estimator = SGDRegressor(eta0=0.005, max_iter=10000)
estimator.fit(x_train, y_train)
print('权重系数:\n', estimator.coef_) # 系数w
print('偏置为:\n', estimator.intercept_) # 偏移量b
y_predict = estimator.predict(x_test)
print('预测房价:\n', y_predict)
mse = mean_squared_error(y_test, y_predict)
print('梯度下降-均方误差为:\n', mse)
if __name__ == '__main__':
linear()
SGD_linear()