从零实现线性回归（自用）

cat_lovely

已于 2023-09-19 10:57:48 修改

阅读量45

点赞数

分类专栏：深度学习李沐文章标签：线性回归算法回归

于 2023-09-19 10:49:06 首次发布

本文链接：https://blog.csdn.net/cat_lovely/article/details/133017736

版权

深度学习李沐专栏收录该内容

1 篇文章 0 订阅

订阅专栏

代码来源于李沐深度学习中不适用框架实现深度学习的那个视频。
因为自己太菜了，很多不太理解，所以写了很多注释和自己的思考方便以后查阅和回顾。

from matplotlib import pyplot as plt
import torch
import random
from d2l import torch as d2l

'''构造人造数据集'''
def synthetic_data(w,b,num_examples):
    #num_examples=>n个样本的数量
    """生成 y = Xw +b +噪声"""
    #torch.normal(mean,std,size=())函数：
    #    返回一个张量；是从一个给定mean（均值），std（方差）的正态分布中抽取随机数。
    # mean和std都是属于张量类型的；
    # 参数：
    # mean:均值；
    # std：标准差；
    # out：输出张量；
    # size：张量的大小；
    X = torch.normal(0,1,(num_examples,len(w)))
    #适应不同场合的得到矩阵乘积的函数，除了标*标，会用广播
    #如果两个张量都是一维的，则返回点积（标量）。
    # 如果两个参数都是二维的，则返回矩阵-矩阵乘积。
    # 如果第一个参数是一维的，第二个参数是二维的，为了矩阵乘法的目的，
    #                      在它的维数前面加上一个 1。在矩阵相乘之后，前置维度被移除。
    # 如果第一个参数是二维的，第二个参数是一维的，则返回矩阵向量积。
    # 如果两个参数至少为一维且至少一个参数为 N 维（其中 N > 2），则返回批处理矩阵乘法
    # 如果第一个参数是一维的，则将 1 添加到其维度，以便批量矩阵相乘并在之后删除。
    y = torch.matmul(X,w)+b#+b用到了广播，所以y是一个列向量
    #加上噪声
    y += torch.normal(0,0.01,y.shape)
    #y.reshape(-1,1)=>https://blog.csdn.net/qq_43511299/article/details/117259662
    #reshape(-1,1)中的-1代表无意义
    #reshape(-1,1)代表将二维数组重整为一个一列的二维数组
    return X,y.reshape(-1,1)


'''自己设置参数'''
true_w = torch.tensor([2,-3.4])
true_b = 4.2
features, labels = synthetic_data(true_w,true_b,1000)
#lable是真实值，features是预测label的两个因素
print('features:',features[0],'\nlabels',labels[0])
#detach分离出数值，不含梯度
# plt.scatter(features[:,1].detach().numpy(),
#             lables.detach().numpy(),1)
# plt.show()
'''第二列数据呈现负相关，因为w的第二个参数是负'''
'''用d2l'''
# d2l.set_figsize()
# d2l.plt.scatter(features[:,1].detach().numpy(),
#             lables.detach().numpy(),1)
# d2l.plt.show()

'''生成小批量'''
def data_iter(batch_size, features, labels):
    num_example = len(features)#注意这里用的是len
    indices = list(range(num_example)) #生成索引
    #将索引打乱
    random.shuffle(indices)#将原列表中的元素打乱，不返回新列表
    '''这个for循环的目的：随机抽取样本。为什么要这样？
       因为如果直接从样本里随机抽，有两种方法
       1.直接随机样本的下标，然后抽出来的样本组成新样本=>耗费空间还费时，
         因为一个样本的数据可能很大
       2.随机一个起始下标i，直接从样本里抽从i->i+batch_size,达不到随机的目的
       所以最省时且能够达到随机目的的方法就是：
           得到样本数量n，构造一个0->n-1的list，打乱list后从list中抽取i->i+batch_size
           即list中保存的是样本下标，打乱后的list[i:i+batch_size]保存的就是对应打乱后的
           样本的下标
    '''
    for i in range(0, num_example, batch_size):#从0->num_examples,每次跳batch_size
        #batch_indices是一个list保存的是样本中被抽到批量的下标。
        #min的作用是防止越界，即要么抽indices[i:i+batch_size];要么抽indices[i:num_example]
        batch_indices = torch.tensor(indices[i:min(i+batch_size,num_example)])
        #好神奇还能这样返回,索引居然还能是向量
        '''return返回以后函数终止，yield是返回一个后继续执行这个函数
            如果调用时g = data_iter(batch_size, features, labels)时，不会执行，
            只有当用到g时才会执行
            即当用到print(g)时才会真的去执行函数
        '''
        yield features[batch_indices],labels[batch_indices]

batch_size = 10
for X,y in data_iter(batch_size,features,labels):
    print(X,'\n',y)
    break

'''定义初始化模型参数'''
#弹幕划重点：w，b需要进行更新，所以才将requires_grad设置为True
w = torch.normal(0,0.01,size=(2,1),requires_grad=True)
b = torch.zeros(1,requires_grad=True)#偏差的初始值为0

'''定义线性回归模型'''
def linreg(X,w,b):
    return torch.matmul(X,w) + b

'''定义损失函数——均方误差'''
def squared_loss(y_hat, y):
    #y_hat:估计值，y是真实值
    #虽然y_hat, y都是一维向量，但y可能是行或者列向量，以防万一，将y reshape成y_hat的shape
    #另外注意：均方误差的公式里除了/2还要/n，这里把n放到参数更新里去了，就是放到sgd里去了
    #平方后返回的是向量还是数？
    #Ans：形状(batch_size,1)
    return (y_hat-y.reshape(y_hat.shape))**2/2

'''定义优化算法'''
#params是一个列表，包括w和b
#lr是学习率
def sgd(params, lr, batch_size):
    #更新的时候不需要梯度计算？
    with torch.no_grad():
        '''对with torch.no_grad()的理解
            前面对w和b的required_grad设置为True了，PyTorch会默认跟踪并计算张量的梯度
            只关心前向传播的结果，而不需要计算梯度，这时就可以使用torch.no_grad()来关闭
            自动求导功能。
            在torch.no_grad()的上下文中执行的张量运算不会被跟踪，也不会产生梯度信息，从而
            提高计算效率并节省内存。
            在推断（inference）阶段和梯度裁剪(grad clip)阶段使用
            
            1.在更新params时，代码块是放在了with torch.no_grad()当中的，这个上下文管理器
            的作用是阻止自动求导机制对params的继续跟踪（不会继续构建计算图），
            因为如果我们没有这个上下文管理器，直接在循环里进行
            params -= learning_rate * params.grad的话，我们实际上是对叶子节点params 
            进行了原地操作（注意是-=），这是不允许的，因为这会导致
            a leaf Variable that requires grad is being used in an in-place operation，
            也就是我们不能修改叶子节点的值，不然会导致其中的梯度grad信息与节点的值不再有计算
            上的对应关系。
            来源：https://zhuanlan.zhihu.com/p/509649964
        '''
        for param in params:
            #我想起我感觉不对劲的地方了，如果有两个函数
            #y1 = a*param
            #y2 = b*param
            #那这个param.grad是对谁求导
            #param = param - lr * param.grad/batch_size
            #这两个好像有区别
            param -= lr * param.grad/batch_size
            param.grad.zero_()#防止下一次梯度计算和这次梯度计算相关

'''main'''

lr = 0.03#超参：学习率,太大了，可能会出现nan，因为求导时出现了除零
num_epochs = 3#整个函数，数据扫三遍
net = linreg
loss = squared_loss 
#这样方便以后换成别的模型

for epoch in range(num_epochs):
    for X,y in data_iter(batch_size,features,labels):
       l = loss(net(X,w,b),y)
       #l是向量，(batch_size,1),l.sum()求和后对所有设置为required_grad = True的参数求导
       #定义里面l就是标量，前面没求和这里才求和
       #以此计算w和b的梯度
       l.sum().backward()
       sgd([w,b],lr,batch_size)
    with torch.no_grad():
        #看一下进度
        train_l = loss(net(features,w,b),labels)
        #输入f就表示花括号里的表达式可以用表达式的值代替
        print(f'epoch {epoch+1},loss {float(train_l.mean()):f}')
        #看一下参数的误差
    print(f'w的估计误差：{true_w-w.reshape(true_w.shape)}')
    print(f'b的估计误差：{true_b-b}')

无注释完整版

import torch
import matplotlib as plt
from d2l import torch as d2l
import random

'''构造人造数据集'''
def sythetic_data(w,b,num_examples):
    X = torch.normal(0,1,(num_examples,len(w)))
    y = torch.matmul(X,w)+b
    y += torch.normal(0,0.01,y.shape)
    return X,y.reshape(-1,1)

true_w = torch.tensor([2,-3.4])
true_b = 4.2
features,labels = sythetic_data(true_w,true_b,1000)
# d2l.set_figsize()
# '''记得分离变量'''
# d2l.plt.scatter(features[:,1].detach().numpy(),
#                 label.detach().numpy(),1)
# d2l.plt.show()

def data_iter(batch_size,features,labels):
    num_examples = len(features)
    indices = list(range(num_examples))
    random.shuffle(indices)
    for i in range(0,num_examples,batch_size):
        batch_indices = torch.tensor(indices[i:min(i+batch_size,num_examples-1)])
        #要不要减-1？视频里这里没有-1
        yield features[batch_indices],labels[batch_indices]

batch_size = 10
for X,y in data_iter(batch_size,features,labels):
    print(X,'\n',y)
    break

'''定义初始化模型参数'''
w = torch.normal(0,0.01,size=(2,1),requires_grad=True)
b = torch.zeros(1,requires_grad=True)

'''定义线性回归模型'''
def linreg(X,w,b):
    return torch.matmul(X,w)+b

'''定义均方误差'''
def squared_loss(y_hat,y):
    return (y_hat-y.reshape(y_hat.shape))**2/2

'''小批量随机梯度下降'''
def sgd(params,lr,batch_size):
    with torch.no_grad():
        for param in params:
            param -= lr * param.grad /batch_size
            param.grad.zero_()

lr = 0.03
num_epochs = 3
net = linreg
loss = squared_loss

for epoch in range(num_epochs):
    for X,y in data_iter(batch_size,features,labels):
       l = loss(net(X,w,b),y)
       l.sum().backward()
       sgd([w, b], lr, batch_size)
    with torch.no_grad():
        train_l = loss(net(features, w, b), labels)
        print(f'epoch {epoch + 1},loss {float(train_l.mean()):f}')
    print(f'w的估计误差：{true_w - w.reshape(true_w.shape)}')
    print(f'b的估计误差：{true_b - b}')