【线性回归学习记录】

最新推荐文章于 2024-07-31 14:30:10 发布

雪寻花

最新推荐文章于 2024-07-31 14:30:10 发布

阅读量90

点赞数

文章标签： python 开发语言线性回归

原文链接：https://blog.csdn.net/qq_41487299/article/details/90574683?ops_request_misc=&request_id=&biz_id=102&utm_term=class%20LinearRegression:%20%20?ops_request_misc=&request_id=&biz_id=102&utm_term=class%20LinearRegression:%20%20&utm_medium=distribute.pc_search_

版权

线性回归学习代码

转载于：https://blog.csdn.net/qq_41487299/article/details/90574683

import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

class LinearRegression:  # 类名
    def _init_(self):  # 初始化
        pass  # 什么也不做，只是单纯地防止语句错误

    def train_gradient_descent(self, x, y, learning_rate=0.01, n_iters=100):  # 梯度下降法训练，x，y不用解释，
        # 学习率就是学习的速度，n_iters是迭代的次数
        n_samples, n_features = x.shape  # 将x的大小x=500，y=1分别分给样品和特征
        self.weights = np.zeros(shape=(n_features, 1))  # 以下都是第零步，给权值赋为0,这里的n_features==1
        self.bias = 0  # 偏差赋值为0
        costs = []  # 申请一个损失数组
        for i in range(n_iters):  # 迭代n_iters次
            y_predict = np.dot(x, self.weights) + self.bias  # 第一步：y_predict=X*w+b
            cost = (1 / n_samples) * np.sum((y_predict - y) ** 2)  # 第二步，得训练集的损失
            costs.append(cost)  # 将损失加到损失数组里面
            if i % 100 == 0:  # 每过一百次输出一下损失
                print(f"Cost at iteration{i}:{cost}")
            dJ_dw = (2 / n_samples) * np.dot(x.T, (y_predict - y))  # 第三步 第一个公式，得对应偏导数的梯度
            dJ_db = (2 / n_samples) * np.sum((y_predict - y))  # 第三步 第二个公式
            self.weights = self.weights - learning_rate * dJ_dw  # 第四步 第一个公式，刷新权值
            self.bias = self.bias - learning_rate * dJ_db  # 第四步 第二个公式，刷新偏差
        return self.weights, self.bias, costs  # 返回所得参数

    def train_normal_equation(self, x, y):  # 正规的方程训练
        self.weights = np.dot(np.dot(np.linalg.inv(np.dot(x.T, x)), x.T), y)  # 正规方程公式
        self.bias = 0
        return self.weights, self.bias

    def predict(self, x):
        return np.dot(x, self.weights) + self.bias

np.random.seed(100)
x = 2*np.random.rand(500, 1)  # 随机产生500×1的[0,1)之间的矩阵
y = 5*x + 3 + np.random.randn(500, 1)
plt.figure(figsize=(10, 10), dpi=100)
plt.title('My Linear Data')
# plt.xticks(ticks=np.linspace(0, 2, 10), labels=['a', 'b', 'c', 'd', 'e', 'a', 'b', 'c', 'd', 'e'])
plt.scatter(x, y)

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)  # 随机分离出训练集和测试集

regressor = LinearRegression()  # 梯度下降法的一个实例化对象
# w_trained和b_trained,costs分别为y=wx+b中的w与b，costs为训练损失函数
w_trained, b_trained, costs = regressor.train_gradient_descent(x_train, y_train, learning_rate=0.005, n_iters=600)

n_samples, _ = x_train.shape  # 这里想要的只有训练集的行数，_代表的也是一个变量名，只是为1，没什么用
n_samples_test, _ = x_test.shape  # 这里想要的是测试集的行数

y_p_train = regressor.predict(x_train)  # 计算训练集中的特征与权值的线性组合，借鉴梯度下降法中的第一步
y_p_test = regressor.predict(x_test)  # 计算测试集中的特征与权值的线性组合
error_train = (1/n_samples)*np.sum((y_p_train-y_train)**2)  # 这里计算的是训练集的的误差
error_test = (1/n_samples_test)*np.sum((y_p_test-y_test)**2)  # 这里计算的是测试集的的误差
print(f"error on training set:{np.round(error_train,4)}")  # 输出训练集的误差，保留四位小数
print(f"error on testing set:{np.round(error_test)}")  # 输出测试集的误差
y_linear = w_trained*x + b_trained
plt.plot(x, y_linear)

# 正态方程块训练
regressor_normal = LinearRegression()
x_train_normal = np.c_[np.ones(n_samples), x_train]
y_test_normal = np.c_[np.ones(n_samples_test), x_test]
normal_weights, normal_bias = regressor_normal.train_normal_equation(x_train_normal, y_train)

print(x_train_normal.shape, normal_weights.shape)
y_p_train_normal = regressor_normal.predict(x_train_normal)  # 计算训练集中的特征与权值的线性组合，借鉴梯度下降法中的第一步
y_p_test_normal = regressor_normal.predict(y_test_normal)  # 计算测试集中的特征与权值的线性组合
error_train_normal = (1/n_samples)*np.sum((y_p_train_normal-y_train)**2)  # 这里计算的是训练集的的误差
error_test_normal = (1/n_samples_test)*np.sum((y_p_test_normal-y_test)**2)  # 这里计算的是测试集的的误差
print("正规方程的误差：")
print(f"error on training set:{np.round(error_train_normal,4)}")  # 输出训练集的误差，保留四位小数
print(f"error on testing set:{np.round(error_test_normal)}")  # 输出测试集的误差

fig = plt.figure(figsize=(8, 6))  # 设置画布大小
plt.plot(np.arange(600), costs)  # 设置绘画内容，x轴为迭代次数，y轴为训练集的损失,随着迭代次数增大，训练集损失会减小
plt.title("Development of cost during training")  # 标题
plt.xlabel("Numbers of iterations: ")  # x轴标题
plt.ylabel("Cost")  # y轴标题
plt.show()

在这里插入图片描述

雪寻花

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
【线性回归学习记录】

线性回归学习代码转载于：https://blog.csdn.net/qq_41487299/article/details/90574683import numpy as npimport matplotlib.pyplot as pltfrom sklearn.model_selection import train_test_splitclass LinearRegression: # 类名 def _init_(self): # 初始化 pass # 什么也不
复制链接

扫一扫