import numpy as np
import matplotlib.pyplot as plt
def linearRegressionModule(x, a, b):
"""
线性回归模块
"""
y_hat = a * x + b
return y_hat
def differentialFunction(x, y, y_hat, a, b, alpha):
"""
微分模块
"""
n = len(y)
sum_a = 0
sum_b = 0
for i in range(n):
sum_a += (y_hat[i] - y[i])
sum_b += x[i] * (y_hat[i] - y[i])
# 更新参数
a = a - alpha * sum_a / n
b = b - alpha * sum_b / n
return a, b
def lossFunction(y, y_hat):
'''
损失函数模块
'''
n = len(y)
sum = 0
for i in range(n):
sum += pow((y[i] - y_hat[i]), 2)
# 损失函数定义为 MSE/2
L = (sum) / (2 * n)
return L
def evaluateModule(y_set, y_hat):
"""
R^2 = SSR / SST
SST:总偏差平方和
SSE:残差平方和
SSR:回归平方和
SST = SSR + SSE
"""
y_aver = np.mean(y_set)
ssr = (y_hat - y_aver).sum()
sst = (y_set - y_aver).sum()
return ssr / sst
if __name__ == "__main__":
# 初始化参数a、b
a = 0
b = 0
# 学习率 太大会导致loss变为inf
alpha = 0.00001
# 自变量
x_set = np.array([12, 33, 41 ,43, 54 , 55, 76, 73, 108, 18, 297, 312, 514, 242, 279, 264, 257])
y_set = np.array([131, 141, 342, 255, 653, 797, 1153, 989, 540, 1287, 1477, 1876, 2318, 1344, 1586, 875, 903])
# 加入归一化
x_set_max = x_set.max()
x_set = x_set / x_set_max
# print(x_set)
y_set_max = y_set.max()
y_set = y_set / y_set.max()
# print(y_set)
# 代入线性回归模型得到预测值
y_hat = linearRegressionModule(x_set, a, b)
# 计算损失
loss = lossFunction(y_set, y_hat)
print(loss)
# 设定阈值
step = 0
while(loss >= 0.015):
# 更新参数a、b
a, b = differentialFunction(x_set, y_set, y_hat, a, b, alpha)
y_hat = linearRegressionModule(x_set, a, b)
loss = lossFunction(y_set, y_hat)
step += 1
if step % 1000 == 0:
print("loss: ", loss)
# 画出散点图和回归曲线图 查看分布
fig_x = np.linspace(0.0, 1.0, 10) # 取闭区间[0, 1]之间的等差数列,列表长度为10
fig_y = a * fig_x + b
plt.scatter(x_set, y_set)
plt.plot(fig_x, fig_y)
plt.show()
# 模型评价
score = evaluateModule(y_set, y_hat)
print("score: ", score)
python 实现一元线性回归模型
最新推荐文章于 2024-02-26 10:31:04 发布