手写笔记
线性回归算法实现
""" @File : linear
@Author : BabyMuu
@Time : 2022/3/23 13:40
"""
import numpy as np
class LinearRegression:
def __init__(self, feature, labels):
self.labels = labels
self.data, col, row = self.standardized_data(feature)
self.theta = np.array([0 for i in range(col + 1)])
self.loss_history = []
self.theta_history = []
@staticmethod
def standardized_data(data):
"""
:param data: 原始数据
:return: 在原始数据的基础上 在最前面添加一列1
"""
data = np.copy(data)
col = 1 if len(data.shape) == 1 else data.shape[1]
row = data.shape[0]
data = np.concatenate((np.ones(row).reshape(row, 1), data.reshape(row, col)), axis=1)
return data, col, row
def fit(self, alpha=0.01, n_iterations=500):
"""
批量梯度下降
:param alpha: 学习率
:param n_iterations: 迭代次数
:return:
"""
for i in range(n_iterations):
self.step(alpha)
return self.theta, self.loss_history
def step(self, alpha):
"""
目标函数: j(θ) = 1/ m.gv * 累加(i=1, m.gv)(y~(i) - h_θ(x~(i))) ** 2
梯度下降参数更新的计算方法, 矩阵运算
调用一次 更新一次 theta
theta = theta - alpha * (1/n_example) * 累加((n个预测值-n个真实值).T 点乘 数据列)
:param alpha: 学习率 / 步长
:return: 更新后的 theta
"""
if alpha >= 0.092:
print(self.theta)
n_example = self.data.shape[0]
gradients = (self.data.dot(self.theta) - self.labels).T.dot(self.data)
self.theta = self.theta - alpha * gradients / n_example
self.theta_history.append(self.theta)
self.loss_history.append(self.cost_function(self.data, self.labels))
return self.loss_history[-1]
def fit_loss(self, alpha, loss_target):
while True:
loss = self.step(alpha)
if loss < loss_target:
break
return self.theta, self.loss_history, len(self.loss_history)
def cost_function(self, data, labels):
"""
损失函数
:param data:
:param labels:
:return:
"""
n_example = data.shape[0]
delta = LinearRegression.hypothesis(self.data, self.theta) - labels
cost = (1 / 2) * np.dot(delta.T, delta) / n_example # 计算损失函数(自己定义的损失函数, 均方误差)
return cost # cost: [[cost_value]]
def predict(self, data):
data = self.standardized_data(data)[0]
return self.hypothesis(data, self.theta)
@staticmethod
def hypothesis(data, theta):
return data.dot(theta)
def get_cost(self, data, labels):
data = self.standardized_data(data)[0]
return self.cost_function(data, labels)
简单测试及可视化
""" @File : ok
@Author : BabyMuu
@Time : 2022/3/23 15:21
"""
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from linear import LinearRegression as LR
plt.rcParams['font.family'] = ['sans-serif']
plt.rcParams['font.sans-serif'] = ['SimHei']
# 静态数据
BEGIN = 0
END = 8
N_ITERATION = 1000
ALPHA = 0.08
DATA_LENGTH = 100
# 模拟数据
data = np.linspace(BEGIN, END, DATA_LENGTH)
target = 3 * data + 1.25 + np.random.rand(DATA_LENGTH) * 2
# 数据集切片
x_train, x_test, y_train, y_test = train_test_split(data, target, test_size=0.4, random_state=2020)
# 训练模型
lr = LR(x_train, y_train)
theta, loss_history = lr.fit(ALPHA, N_ITERATION)
theta_history = np.array(lr.theta_history)
print(f'函数方程 y = {theta[1]:.6f} * x + {theta[0]:.6f}')
print("优化前的损失函数: ", loss_history[0])
print("优化后的损失函数: ", loss_history[-1])
x_axis = np.linspace(x_train.min(), x_train.max(), 200)
y_axis = lr.predict(x_axis)
# 画图
# 基础
# plt.figure('MyLR', facecolor='lightgray')
plt.figure(figsize=(6, 6))
plt.figure(1)
ax1 = plt.subplot(111)
plt.title('MyLR')
plt.xlabel("data")
plt.ylabel("target")
# 散点
plt.scatter(x_train, y_train, s=40, marker='o', color='dodgerblue', label='训练集')
plt.scatter(x_test, y_test, s=40, marker='*', color='orange', label='测试集')
# 预测曲线
plt.plot(x_axis, y_axis, color="black", linewidth=3, label=f'y = {theta[1]:.2f} * x + {theta[0]:.2f}, ')
plt.ylim(y_train.min() - 3, y_train.max() + 5)
plt.legend()
# 回归参数曲线
plt.figure(2, figsize=(9, 4))
plt.plot(np.linspace(BEGIN, END, N_ITERATION), theta_history[:, 0], label='误差项参数')
plt.plot(np.linspace(BEGIN, END, N_ITERATION), theta_history[:, 1], label='一次项参数')
# 显示图形3w4a
plt.legend()
plt.ylim(0, 5)
plt.xlim(0, 0.5)
plt.xlabel('迭代次数/10^3')
plt.ylabel('参数值')
# 损失函数曲线
plt.figure(3, figsize=(9, 4))
plt.plot(np.linspace(BEGIN, END, N_ITERATION), loss_history, color='r', linewidth=2, label="损失函数曲线")
plt.ylim(0, 10)
plt.xlim(0, 2)
plt.xlabel('迭代次数/10^3')
plt.ylabel('损失值')
plt.legend()
plt.show()
plt.savefig("myLR.png")
可视化结果