机器学习-回归

最新推荐文章于 2024-07-24 11:37:51 发布

BabyMuu

最新推荐文章于 2024-07-24 11:37:51 发布

阅读量199

点赞数

分类专栏： # 手写算法模型

-------本文为博主Muu原创,未经博主允许禁止转载, 如有问题,欢迎指正-------

本文链接：https://blog.csdn.net/tenyearsWait/article/details/124648374

版权

线性回归批量梯度下降数据可视化损失函数 Python实现

关键词由CSDN通过智能技术生成

手写算法模型专栏收录该内容

4 篇文章 0 订阅

订阅专栏

手写笔记

在这里插入图片描述

线性回归算法实现

""" @File   : linear
    
    @Author : BabyMuu
    @Time   : 2022/3/23 13:40
"""

import numpy as np


class LinearRegression:
    def __init__(self, feature, labels):
        self.labels = labels
        self.data, col, row = self.standardized_data(feature)
        self.theta = np.array([0 for i in range(col + 1)])
        self.loss_history = []
        self.theta_history = []

    @staticmethod
    def standardized_data(data):
        """
        :param data: 原始数据
        :return:  在原始数据的基础上 在最前面添加一列1
        """
        data = np.copy(data)
        col = 1 if len(data.shape) == 1 else data.shape[1]
        row = data.shape[0]
        data = np.concatenate((np.ones(row).reshape(row, 1), data.reshape(row, col)), axis=1)
        return data, col, row

    def fit(self, alpha=0.01, n_iterations=500):
        """
        批量梯度下降
        :param alpha:   学习率
        :param n_iterations: 迭代次数
        :return:
        """
        for i in range(n_iterations):
            self.step(alpha)
        return self.theta, self.loss_history

    def step(self, alpha):
        """
        目标函数: j(θ) = 1/ m.gv  * 累加(i=1, m.gv)(y~(i) - h_θ(x~(i))) ** 2
        梯度下降参数更新的计算方法, 矩阵运算
        调用一次 更新一次 theta
        theta = theta - alpha * (1/n_example) * 累加((n个预测值-n个真实值).T 点乘 数据列)
        :param alpha: 学习率 / 步长
        :return: 更新后的 theta
        """
        if alpha >= 0.092:
            print(self.theta)
        n_example = self.data.shape[0]
        gradients = (self.data.dot(self.theta) - self.labels).T.dot(self.data)
        self.theta = self.theta - alpha * gradients / n_example
        self.theta_history.append(self.theta)
        self.loss_history.append(self.cost_function(self.data, self.labels))
        return self.loss_history[-1]

    def fit_loss(self, alpha, loss_target):
        while True:
            loss = self.step(alpha)
            if loss < loss_target:
                break
        return self.theta, self.loss_history, len(self.loss_history)

    def cost_function(self, data, labels):
        """
        损失函数
        :param data:
        :param labels:
        :return:
        """
        n_example = data.shape[0]
        delta = LinearRegression.hypothesis(self.data, self.theta) - labels
        cost = (1 / 2) * np.dot(delta.T, delta) / n_example  # 计算损失函数(自己定义的损失函数, 均方误差)
        return cost  # cost: [[cost_value]]

    def predict(self, data):
        data = self.standardized_data(data)[0]
        return self.hypothesis(data, self.theta)

    @staticmethod
    def hypothesis(data, theta):
        return data.dot(theta)

    def get_cost(self, data, labels):
        data = self.standardized_data(data)[0]
        return self.cost_function(data, labels)

简单测试及可视化

""" @File   : ok
    
    @Author : BabyMuu
    @Time   : 2022/3/23 15:21
"""
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from linear import LinearRegression as LR

plt.rcParams['font.family'] = ['sans-serif']
plt.rcParams['font.sans-serif'] = ['SimHei']

# 静态数据
BEGIN = 0
END = 8
N_ITERATION = 1000
ALPHA = 0.08
DATA_LENGTH = 100

# 模拟数据
data = np.linspace(BEGIN, END, DATA_LENGTH)
target = 3 * data + 1.25 + np.random.rand(DATA_LENGTH) * 2
# 数据集切片
x_train, x_test, y_train, y_test = train_test_split(data, target, test_size=0.4, random_state=2020)

# 训练模型
lr = LR(x_train, y_train)
theta, loss_history = lr.fit(ALPHA, N_ITERATION)
theta_history = np.array(lr.theta_history)
print(f'函数方程 y = {theta[1]:.6f} * x + {theta[0]:.6f}')
print("优化前的损失函数: ", loss_history[0])
print("优化后的损失函数: ", loss_history[-1])
x_axis = np.linspace(x_train.min(), x_train.max(), 200)
y_axis = lr.predict(x_axis)
# 画图
# 基础
# plt.figure('MyLR', facecolor='lightgray')
plt.figure(figsize=(6, 6))

plt.figure(1)
ax1 = plt.subplot(111)
plt.title('MyLR')
plt.xlabel("data")
plt.ylabel("target")
# 散点
plt.scatter(x_train, y_train, s=40, marker='o', color='dodgerblue', label='训练集')
plt.scatter(x_test, y_test, s=40, marker='*', color='orange', label='测试集')
# 预测曲线
plt.plot(x_axis, y_axis, color="black", linewidth=3, label=f'y = {theta[1]:.2f} * x + {theta[0]:.2f}, ')
plt.ylim(y_train.min() - 3, y_train.max() + 5)
plt.legend()
# 回归参数曲线
plt.figure(2, figsize=(9, 4))
plt.plot(np.linspace(BEGIN, END, N_ITERATION), theta_history[:, 0], label='误差项参数')
plt.plot(np.linspace(BEGIN, END, N_ITERATION), theta_history[:, 1], label='一次项参数')
# 显示图形3w4a
plt.legend()
plt.ylim(0, 5)
plt.xlim(0, 0.5)
plt.xlabel('迭代次数/10^3')
plt.ylabel('参数值')
# 损失函数曲线
plt.figure(3, figsize=(9, 4))
plt.plot(np.linspace(BEGIN, END, N_ITERATION), loss_history, color='r', linewidth=2, label="损失函数曲线")
plt.ylim(0, 10)
plt.xlim(0, 2)
plt.xlabel('迭代次数/10^3')
plt.ylabel('损失值')
plt.legend()
plt.show()
plt.savefig("myLR.png")