线性回归:
寻找一种能预测 的 趋势。
算法,策略,优化:
代码:(两种方式预测房价)
from sklearn.linear_model import LinearRegression,SGDRegressor
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_boston
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
def boston_regression():
# 获取数据
lb = load_boston()
# 数据处理
x_train,x_test,y_train,y_test = train_test_split(lb.data,lb.target,test_size=0.2,random_state=22)
# 特征工程
## 特征值和目标值都要标准化(不管有没有标准化,都要乘同一个w,为了不影响吗,目标值
scaler_x = StandardScaler()
x_train = scaler_x.fit_transform(x_train)
x_test = scaler_x.transform(x_test)
scaler_y = StandardScaler()
y_train = scaler_y.fit_transform(y_train)
y_test = scaler_y.transform(y_test)
# 机器学习
## 正规方程
estimator = LinearRegression()
estimator.fit(x_train,y_train)
y_pre = scaler_y.inverse_transform(estimator.predict(x_test))
print('预测值为:\n',y_pre)
plt.plot(y_pre)
plt.show()
def boston_regressor():
# 获取数据
lb = load_boston()
# 数据处理
x_train,x_test,y_train,y_test = train_test_split(lb.data,lb.target,test_size=0.2,random_state=22)
# 特征工程
## 特征值和目标值都要标准化(不管有没有标准化,都要乘同一个w,为了不影响吗,目标值
scaler_x = StandardScaler()
x_train = scaler_x.fit_transform(x_train)
x_test = scaler_x.transform(x_test)
scaler_y = StandardScaler()
y_train = scaler_y.fit_transform(y_train.reshape(-1,1))
y_test = scaler_y.transform(y_test.reshape(-1,1))
# 机器学习
## 正规方程
estimator = SGDRegressor()
estimator.fit(x_train,y_train)
print(estimator.coef_)
y_pre = scaler_y.inverse_transform(estimator.predict(x_test))
print('预测值为:\n',y_pre)
plt.plot(y_pre)
plt.show()
if __name__ == '__main__':
# boston_regression()
boston_regressor()
回归性能评估:
均方误差
对比:
过拟合和欠拟合:
岭回归:
·岭回归:回归得到的回归系数更符合实际,更可靠。另外,能让估计参数的波动范围变小,变的更稳定。在存在病态数据偏多的研究中有较大的实用价值。