1. 线性回归模型
from sklearn import datasets
from sklearn.linear_model import LinearRegression
boston = datasets.load_boston()
# LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)
lr = LinearRegression()
lr.fit(boston.data, boston.target)
predictions = lr.predict(boston.data)
#残差(误差)平均值
import numpy as np
np.mean(predictions - boston.target)
>>>-4.7743978292180645e-16
#均方误差
def MSE(target, predictions):
squared_deviation = np.power(target - predictions, 2)
return np.mean(squared_deviation)
#平均绝对误差
def MAE(target, predictions):
absolute_deviation = np.abs(target - predictions)
return np.mean(absolute_deviation)
print(MSE(boston.target, predictions))
print(MAE(boston.target, predictions))
>>>21.8977792177
>>>3.272944638
2. 用岭回归弥补线性回归的不足
from sklearn.datasets import make_regression
import numpy as np
# 建一个有3个自变量的数据集,但是其秩为2,因此3个自变量中有两个自变量存在相关性。
reg_data, reg_target = make_regression(n_samples=2000, n_features=3, effective_rank=2, noise=10)
# 使用回归求参数
def fit_2_regression(lr):
n_bootstraps = 1000
coefs = np.ones((n_bootstraps, 3))
len_data = len(reg_data)
subsample_size = np.int(0.75 * len_data)
subsample = lambda: np.random.choice(np.arange(0, len_data), size=subsample_size)
for i in range(n_bootstraps):
subsample_idx = subsample()
subsample_X = reg_data[subsample_idx]