实现Simple Linear Regression
原始数据:
import numpy as np
import matplotlib.pyplot as plt
x = np.array([1., 2., 3., 4., 5.])
y = np.array([1., 3., 2., 3., 5.])
plt.scatter(x, y)
plt.axis([0, 6, 0, 6])
plt.show()
计算a和b,拟合直线:
x_mean = np.mean(x)
y_mean = np.mean(y)
num = 0.0
d = 0.0
for x_i, y_i in zip(x, y):
num += (x_i - x_mean) * (y_i - y_mean)
d += (x_i - x_mean) ** 2
a = num / d
b = y_mean - a * x_mean
y_hat = a * x + b
plt.scatter(x, y)
plt.plot(x, y_hat, color='r')
plt.axis([0, 6, 0, 6])
plt.show()
封装
import numpy as np
class SimpleLinearRegression:
def __init__(self):
self.a_ = None
self.b_ = None
def fit(self, x_train, y_train):
"""根据训练集x_train,y_train训练SimpleLinearRegression模型"""
assert x_train.ndim == 1, \
"Simple Linear Regression can not only solve single feature training data."
assert len(x_train) == len(y_train), \
"the size of x_train must be equal to the size of y_train"
x_mean = np.mean(x_train)
y_mean = np.mean(y_train)
num = 0.0
d = 0.0
for x, y in zip(x_train, y_train):
num += (x - x_mean) * (y - y_mean)
d += (x - x_mean) ** 2
self.a_ = num / d
self.b_ = y_mean - self.a * x_mean
return self
def predict(self, x_predict):
"""给定预测数据集x_predict,返回表示x_predict的结果向量"""
assert x_predict.ndim == 1, \
"Simple Linear Regression can not only solve single feature training data."
assert self.a_ is not None and self.b_ is not None, \
"must fit before predict!"
return np.array(self._predict(x) for x in x_predict)
def _predict(self, x_singel):
"""给定单个预测数据x_single,返回x_single的预测结果值"""
return self.a_ * x_singel + self.b_
若想提升计算性能,可以利用向量化计算:
线性回归之向量化