深度学习与神经网络Pytorch版 3.2 线性回归从零开始实现 1.生成数据集 2.2. 读取数据集3. 初始化模型参数4. 定义模型5. 定义损失函数6. 定义优化算法7. 训练_d2l.plt.scatter(features[:, (1)].detach().numpy(),-CSDN博客

# 生成数据集

# 导入必要的库
import matplotlib.pyplot as plt
import random
import torch
from d2l import torch as d2l

# 定义一个生成合成数据的函数
def synthetic_data(w, b, num_examples):    # 函数参数包括权重w、偏置b和数据点数量num_examples
    # 生成y=Xw+b+噪声满足线性关系y=Xw+b的数据，并添加噪声
    X = torch.normal(0, 1, (num_examples, len(w)))  # 创建一个形状为(num_examples, len(w))的张量X，元素值为从标准正态分布中抽取的随机数
    y = torch.matmul(X, w) + b  # 使用矩阵乘法计算y的值，y = X * w + b
    y += torch.normal(0, 0.01, y.shape)  # 在y的值上添加从标准正态分布中抽取的随机噪声，噪声的标准差为0.01
    return X, y.reshape((-1, 1))  # 返回X和y。y被重新整形为(-1, 1)的形状，这是因为matplotlib在绘图时需要这样的形状


# 定义真实的权重和偏置值
true_w = torch.tensor([2, -3.4])  # 真实的权重w为[2, -3.4]的张量
true_b = 4.2  # 真实的偏置b为4.2的标量


# 使用上面定义的函数生成数据集
features, labels = synthetic_data(true_w, true_b, 1000)  # 生成1000个数据点作为训练或测试样本，特征为X，标签为y（即labels）

print('features:', features[0],'\nlabel:', labels[0])
#d2l.set_figsize()
#d2l.plt.scatter(features[:, (1)].detach().numpy(), labels.detach().numpy(), 1)
    # 这行代码也是从d2l库中调用的。它使用散点图来可视化特征和标签。#
    # features[:, (1)].detach().numpy()选取了所有数据点的第二个特征（索引为1，因为索引是从0开始的）并转换为NumPy数组。
    # #.detach()是PyTorch中的方法，用于从计算图中分离张量，这样张量就不会追踪其历史计算，这在进行绘图等操作时是很有用的。
    # labels.detach().numpy()将标签转换为NumPy数组。这里的1表示散点的大小。
#plt.show()

2. 读取数据集

# 2. 读取数据集
def data_iter(batch_size, features, labels):
    num_examples = len(features)
    #print(num_examples)
    indices = list(range(num_examples)) # indices 是一个包含从0到num_examples-1的整数的列表，
    # 这些整数可以用来索引特征数据和标签数据，以便于将它们分批提取出来。
    # 这些样本是随机读取的，没有特定的顺序
    random.shuffle(indices) # 使用了Python的random.shuffle()函数来随机化indices列表中的元素顺序。
    # 这个循环用于遍历所有的批次
    for i in range(0, num_examples, batch_size):
        # 这一行代码在每次循环中创建一个新的批次索引列表。它从indices列表中提取一个子列表，该子列表从i开始，
        # 到i + batch_size结束（但不超过num_examples）。
        # 然后，使用PyTorch的torch.tensor()函数将这个子列表转换为张量。
        batch_indices = torch.tensor(indices[i: min(i + batch_size, num_examples)])
        # 这一行代码使用前面创建的batch_indices来从features和labels中提取一个批次的特征和标签。
        # 然后，使用yield关键字返回这些数据。
        yield features[batch_indices], labels[batch_indices]
batch_size = 10
total_batch_size = (len(features) + batch_size - 1) // batch_size  # 计算总批次数
print(total_batch_size)

for i, (X, y) in enumerate(data_iter(batch_size, features, labels)):
    if i == 4:  # 当批次索引为4时，即第五个批次
        print(X, '\n', y)
        break  # 退出循环

3. 初始化模型参数

# 3. 初始化模型参数
w = torch.normal(0, 0.01, size=(2,1), requires_grad=True)
b = torch.zeros(1, requires_grad=True)
print('123')
print(w.shape)
print(w)

3.1 可视化w 和 b

 # 将张量w可视化出来
import matplotlib.pyplot as plt
import torch

# 创建张量w
w = torch.normal(0, 0.01, size=(2, 1), requires_grad=True)

# 将张量转换为numpy数组以供matplotlib使用
w_np = w.detach().numpy()

# 创建matplotlib图形
plt.figure(figsize=(4, 2))

# 绘制张量w的矩阵表示形式
plt.imshow(w_np, cmap='viridis', aspect='auto', origin='lower')
plt.colorbar()
plt.title('Visualization of Tensor w')
plt.xlabel('Row')
plt.ylabel('Column')

# 显示图形
plt.show()

# 将张量b可视化出来
import matplotlib.pyplot as plt
import numpy as np
import torch

# 创建张量 b
b = torch.zeros(1, requires_grad=True)

# 将张量转换为 NumPy 数组
b_np = b.detach().numpy()

# 可视化 b
plt.figure(figsize=(4, 2))
plt.plot(b_np)
plt.title('Visualization of Tensor b')
plt.xlabel('Index')
plt.ylabel('Value')
plt.show()

4. 定义模型

# 4. 定义模型
def linreg(X, w, b): #@save
    # 线性回归模型
    return torch.matmul(X, w) + b

5. 定义损失函数

# 5. 定义损失函数
def squared_loss(y_hat, y): #
    #均方损失
    return (y_hat - y.reshape(y_hat.shape)) ** 2 / 2

6. 定义优化算法

# 6. 定义优化算法
def sgd(params, lr, batch_size): #
    #小批量随机梯度下降
    
    with torch.no_grad():
        for param in params:
            param -= lr * param.grad / batch_size
            param.grad.zero_()

7. 训练

# 7. 训练
lr = 0.03
num_epochs = 3
net = linreg
loss = squared_loss

for epoch in range(num_epochs):
    for X, y in data_iter(batch_size, features, labels):
        l = loss(net(X, w, b), y) # X和y的小批量损失
        # 因为l形状是(batch_size,1)，而不是一个标量。l中的所有元素被加到一起，
        # 并以此计算关于[w,b]的梯度
        l.sum().backward()
        sgd([w, b], lr, batch_size) # 使用参数的梯度更新参数
    with torch.no_grad():
        train_l = loss(net(features, w, b), labels)
        print(f'epoch {epoch + 1}, loss {float(train_l.mean()):f}')

print(f'w的估计误差: {true_w - w.reshape(true_w.shape)}')
print(f'b的估计误差: {true_b - b}')

2. 完整代码

# 线性回归从零开始实现
# 生成数据集

# 导入必要的库
import matplotlib.pyplot as plt
import random
import torch
from d2l import torch as d2l

# 定义一个生成合成数据的函数
def synthetic_data(w, b, num_examples):    # 函数参数包括权重w、偏置b和数据点数量num_examples
    # 生成y=Xw+b+噪声满足线性关系y=Xw+b的数据，并添加噪声
    X = torch.normal(0, 1, (num_examples, len(w)))  # 创建一个形状为(num_examples, len(w))的张量X，元素值为从标准正态分布中抽取的随机数
    y = torch.matmul(X, w) + b  # 使用矩阵乘法计算y的值，y = X * w + b
    y += torch.normal(0, 0.01, y.shape)  # 在y的值上添加从标准正态分布中抽取的随机噪声，噪声的标准差为0.01
    return X, y.reshape((-1, 1))  # 返回X和y。y被重新整形为(-1, 1)的形状，这是因为matplotlib在绘图时需要这样的形状

# 定义真实的权重和偏置值
true_w = torch.tensor([2, -3.4])  # 真实的权重w为[2, -3.4]的张量
true_b = 4.2  # 真实的偏置b为4.2的标量

# 使用上面定义的函数生成数据集
features, labels = synthetic_data(true_w, true_b, 1000)  # 生成1000个数据点作为训练或测试样本，特征为X，标签为y（即labels）

print('features:', features[0],'\nlabel:', labels[0])
#d2l.set_figsize()
#d2l.plt.scatter(features[:, (1)].detach().numpy(), labels.detach().numpy(), 1)
    # 这行代码也是从d2l库中调用的。它使用散点图来可视化特征和标签。#
    # features[:, (1)].detach().numpy()选取了所有数据点的第二个特征（索引为1，因为索引是从0开始的）并转换为NumPy数组。
    # #.detach()是PyTorch中的方法，用于从计算图中分离张量，这样张量就不会追踪其历史计算，这在进行绘图等操作时是很有用的。
    # labels.detach().numpy()将标签转换为NumPy数组。这里的1表示散点的大小。
#plt.show()

# 2. 读取数据集
def data_iter(batch_size, features, labels):
    num_examples = len(features)
    #print(num_examples)
    indices = list(range(num_examples)) # indices 是一个包含从0到num_examples-1的整数的列表，
    # 这些整数可以用来索引特征数据和标签数据，以便于将它们分批提取出来。
    # 这些样本是随机读取的，没有特定的顺序
    random.shuffle(indices) # 使用了Python的random.shuffle()函数来随机化indices列表中的元素顺序。
    # 这个循环用于遍历所有的批次
    for i in range(0, num_examples, batch_size):
        # 这一行代码在每次循环中创建一个新的批次索引列表。它从indices列表中提取一个子列表，该子列表从i开始，
        # 到i + batch_size结束（但不超过num_examples）。
        # 然后，使用PyTorch的torch.tensor()函数将这个子列表转换为张量。
        batch_indices = torch.tensor(indices[i: min(i + batch_size, num_examples)])
        # 这一行代码使用前面创建的batch_indices来从features和labels中提取一个批次的特征和标签。
        # 然后，使用yield关键字返回这些数据。
        yield features[batch_indices], labels[batch_indices]
batch_size = 10
total_batch_size = (len(features) + batch_size - 1) // batch_size  # 计算总批次数
print(total_batch_size)

for i, (X, y) in enumerate(data_iter(batch_size, features, labels)):
    if i == 4:  # 当批次索引为4时，即第五个批次
        print(X, '\n', y)
        break  # 退出循环

# 3. 初始化模型参数
w = torch.normal(0, 0.01, size=(2,1), requires_grad=True)
b = torch.zeros(1, requires_grad=True)
print('123')
print(w.shape)
print(w)

# 4. 定义模型
def linreg(X, w, b): #@save
    # 线性回归模型
    return torch.matmul(X, w) + b

# 5. 定义损失函数
def squared_loss(y_hat, y): #
    #均方损失
    return (y_hat - y.reshape(y_hat.shape)) ** 2 / 2

# 6. 定义优化算法
def sgd(params, lr, batch_size): #
    #小批量随机梯度下降
    
    with torch.no_grad():
        for param in params:
            param -= lr * param.grad / batch_size
            param.grad.zero_()

# 7. 训练
lr = 0.03
num_epochs = 3
net = linreg
loss = squared_loss

for epoch in range(num_epochs):
    for X, y in data_iter(batch_size, features, labels):
        l = loss(net(X, w, b), y) # X和y的小批量损失
        # 因为l形状是(batch_size,1)，而不是一个标量。l中的所有元素被加到一起，
        # 并以此计算关于[w,b]的梯度
        l.sum().backward()
        sgd([w, b], lr, batch_size) # 使用参数的梯度更新参数
    with torch.no_grad():
        train_l = loss(net(features, w, b), labels)
        print(f'epoch {epoch + 1}, loss {float(train_l.mean()):f}')

print(f'w的估计误差: {true_w - w.reshape(true_w.shape)}')
print(f'b的估计误差: {true_b - b}')

3. 小结

我正在学习《动手学深度学习》文中的代码参照书中代码，也可以认为这是一片笔记我将代码运行了运行，添加了一些注释，感觉对代码的了解还不是很深入，只能说在心里大致有一个轮廓，之后我会不断加强学习，争取推陈出新。写出更加有深度的文章分享给大家。我心中一直有一个问题，新手如何学习编写代码，以我的水平最多能看懂一点代码，但是让我写，一点也写不出来，总是感觉自己学习的方式方法有问题。对于本文中的线性回归，我的问题与不足是，损失函数需要更加深入的了解，定义优化算法中的小批量随机梯度下降问题不理解不能掌握，我要在之后继续深入学习。