线性回归算法-从零开始

最新推荐文章于 2024-05-22 10:31:55 发布

Philo`

最新推荐文章于 2024-05-22 10:31:55 发布

阅读量374

点赞数

分类专栏： Pytorch 文章标签：算法线性回归机器学习

本文链接：https://blog.csdn.net/qq_44864833/article/details/126690312

版权

Pytorch 专栏收录该内容

8 篇文章 0 订阅

订阅专栏

线性回归算法

1.线性回归

在这里插入图片描述
不进行梯度清除

进行梯度清除

2.随机梯度下降

在这里插入图片描述

3.线性回归

在这里插入图片描述

import torch
def synthetic_data(w, b, num_examples):           
    """生成y=Xw+b+噪声"""
    X = torch.normal(0, 1, (num_examples, len(w)))   # 这里是从一个mean为0，std为1独立正态分布中获取，size为(num_size,len(w))的随机张量
    y = torch.matmul(X, w) + b       # 这里根据真实w,b生成label值
    y += torch.normal(0, 0.01, y.shape)    # 这里是生成一个误差项，形成观测误差
    return X, y.reshape((-1, 1))  # 返回数据集，X为num_examples*2的张量，Y为num_examples*1的张量

true_w = torch.tensor([2, -3.4])   # 这是我们真实的w和b
true_b = 4.2
features, labels = synthetic_data(true_w, true_b, 1000)    # 生成数据集
print(features.shape)
print(labels.shape)

在这里插入图片描述

import random
def data_iter(batch_size, features, labels):
    num_examples = len(features)  # 一共是1000个
    indices = list(range(num_examples))   # 这里是初始化list列表，为1000个数，从0到999
    # 这些样本是随机读取的，没有特定的顺序
    random.shuffle(indices)    # 然后再打乱indices
    #print(indices)
    for i in range(0, num_examples, batch_size):   # 从0开始，到999遍历，每次跨度10个数据
        batch_indices = torch.tensor(    # 每次开始，对列表进行切片，例如，i=0，从indices获取[0:10]前闭后开，一共batch_size大小的数据
            indices[i: min(i + batch_size, num_examples)])
        yield features[batch_indices], labels[batch_indices]     #batch_indices为tensor数据类型，然后从features进行选择10返回
        # 这里在for循环中进行返回，所以只能用迭代进行获取数据

batch_size = 10

for X, y in data_iter(batch_size, features, labels):   # 将数据传入到加载器中，batch_size为每次拿出的大小
    print(X, '\n', y)
    break

下面是对返回的features[batch_indices], labels[batch_indices] 的辅助理解
在这里插入图片描述

w = torch.normal(mean=0, std=0.01, size=(2,1), requires_grad=True)  # 从均值为0，标准差为0.01的正态分布中获取大小为size的张量，且需要梯度
b = torch.zeros(1, requires_grad=True)  # 初始化偏置量为0，大小为1，需要梯度，且之哟float和conplemx数据类型才有requires_grad属性

def linreg(X, w, b):
    return torch.matmul(X, w) + b

在这里插入图片描述

lr = 0.03
num_epochs = 3
net = linreg
loss = squared_loss

for epoch in range(num_epochs):
    for X, y in data_iter(batch_size, features, labels):
        l = loss(net(X, w, b), y)   # 进行net运算结果，然后和y进行计算损失函数
#         print("batch_size中的loss为",l,l.grad)
        l.sum().backward()   # 这里将所有的loss相加进行反向传播,用来跟新w, b的梯度的
        sgd([w, b], lr, batch_size)
    
    with torch.no_grad():
        train_l = loss(net(features, w, b), labels)
        print(f"epoch{epoch + 1},loss{float(train_l.mean()):f}")

print(w,b)