- 优点:具有很强的解释性
一元线性回归
-
一个自变量和一个因变量,两个变量之前的关系用一条直线来模拟
-
h θ ( x ) = θ 0 + θ 1 x , 其 中 θ 1 = ∑ i = 1 m ( x ( i ) − x ‾ ) ( y ( i ) − y ‾ ) ∑ i = 1 m ( x ( i ) − x ‾ ) 2 , θ 0 = y ‾ − θ 1 x ‾ h_\theta(x) = \theta_0+\theta_1x,其中 \theta_1 = \frac{\sum_{i=1}^{m}(x^{(i)} - \overline{x})(y^{(i)} - \overline{y})}{\sum_{i=1}^{m}(x^{(i)} - \overline{x})^2},\theta_0 = \overline{y} - \theta_1\overline{x} hθ(x)=θ0+θ1x,其中θ1=∑i=1m(x(i)−x)2∑i=1m(x(i)−x)(y(i)−y),θ0=y−θ1x
代码实现
- 一般操作
# coding=utf-8
import numpy as np
class SimpleLinearRegression1:
def __init__(self):
self.a_ = None
self.b_ = None
def fit(self, x_train, y_train):
assert x_train.ndim == 1, '必须是一维的'
x_mean = np.mean(x_train)
y_mean = np.mean(y_train)
num = 0.
d = 0.
for x_i, y_i in zip(x_train, y_train):
num += (x_i - x_mean) * (y_i - y_mean)
d += (x_i - x_mean) ** 2
self.a_ = num / d
self.b_ = y_mean - self.a_ * x_mean
return self
def predict(self, x_predict):
assert x_predict.ndim == 1, '必须一维数组'
assert self.a_ is not None and self.b_ is not None,\
"must fit before predict"
return np.array([self._predict(x) for x in x_predict])
def _predict(self, x):
return self.a_ * x + self.b_
def __repr__(self):
return "SimpleLinearRegression()"
- 向量化计算
# coding=utf-8
import numpy as np
# 向量化计算
class SimpleLinearRegression2:
def __init__(self):
self.a_ = None
self.b_ = None
def fit(self, x_train, y_train):
assert x_train.ndim == 1, '必须是一维的'
x_mean = np.mean(x_train)
y_mean = np.mean(y_train)
num = (x_train - x_mean).dot(y_train - y_mean)
d = (x_train - x_mean).dot(x_train - x_mean)
self.a_ = num / d
self.b_ = y_mean - self.a_ * x_mean
return self
def predict(self, x_predict):
assert self.a_ is not None and self.b_ is not None,\
"must fit before predict"
return np.array([self._predict(x) for x in x_predict])
def _predict(self, x):
return self.a_ * x + self.b_
def __repr__(self):
return "SimpleLinearRegression()"