作者使用的IDE:VSCode
Python版本:python3.8.8
本文适用学习阶段:入门小白
首先,我们需要导入本节课需要的库:
%matplotlib inline
import random #随机梯度下降,初始化权重
import torch
from d2l import torch as d2l #把实行过的算法和函数放到d2l包里面
根据带有噪声线性模型构造一个人造数据集,我们使用线性模型参数w=[2, -3.4]和b=4.2和噪声项σ生成数据集及其标签:
y = Xw + b + σ
def synthetic_data(w,b,num_examples):
"""生成y = Xw + b + 噪声"""
x = torch.normal(0,1,(num_examples,len(w))) #生成x,均值为0方差为1的随机数
y = torch.matmul(x,w) + b
y += torch.normal(0,0.01,y.shape) #生成均值为0方差为0.01的随机数噪音,和y的形状一样并加到y里面去
return x,y.reshape((-1,1))
true_w = torch.tensor([2, -3.4])
true_b = 4.2
features,labels = synthetic_data(true_w,true_b,1000)
features中每行都包含一个二维数据样本,labels中每行都包含一维标签值
print('features:',features[0],'\nlabels:',labels[0])
features: tensor([-1.1803, -0.9632])
labels: tensor([5.1059])
把它给画出来
d2l.set_figsize()
d2l.plt.scatter(features[:,1].detach().numpy(),labels.detach().numpy(),1)
<matplotlib.collections.PathCollection at 0x26938d72e20>
定义一个data_iter 函数,该函数接收批量大小、特征矩阵和标签向量作为输入,生成大小为batch_size的小批量
def data_iter(batch_size, features, labels):
num_examples = len(features)
indices = list(range(num_examples))
random.shuffle(indices) # 随机打乱样本顺序
for i in range(0, num_examples, batch_size):
batch_indices = torch.tensor(indices[i:min(i + batch_size, num_examples)]) # 获取当前批次的样本索引
yield features[batch_indices], labels[batch_indices] # 返回当前批次的特征和标签
batch_size = 10
for X, y in data_iter(batch_size, features, labels):
print(X, '\n', y) # 打印当前批次的特征和标签
break
tensor([[ 0.4318, 0.5893],
[ 0.2956, -0.1592],
[ 0.5685, -0.0713],
[-1.0636, -0.8737],
...
[-0.9419, 0.6235],
[-0.0412, 0.4643]])
tensor([[ 3.0551],
[ 5.3334],
[ 5.5834],
...
[ 0.2026],
[ 2.5343]])
定义初始化模型参数
# 生成均值为0,标准差为0.01的2x1张量,并设置requires_grad为True
w = torch.normal(0, 0.01, size=(2, 1), requires_grad=True)
# 生成形状为1的全零张量,并设置requires_grad为True
b = torch.zeros(1, requires_grad=True)
定义模型
def linreg(X, w, b):
"""线性回归模型"""
return torch.matmul(X, w) + b
定义损失函数
def squared_loss(y_hat, y):
"""均方损失"""
return (y_hat - y.reshape(y_hat.shape)) ** 2 / 2
定义优化算法
def sgd(params, lr, batch_size):
"""小批量随机梯度下降"""
with torch.no_grad():
for param in params:
param -= lr * param.grad / batch_size
param.grad.zero_()
训练过程
lr = 0.05
num_epochs = 100
net = linreg # 神经网络模型
loss = squared_loss # 损失函数
for epoch in range(num_epochs): # 遍历每个epoch
for X, y in data_iter(batch_size, features, labels): # 遍历训练数据集中的每个batch
l = loss(net(X, w, b), y).mean() # 计算模型预测值与真实值之间的损失,并取均值
l.sum().backward() # 反向传播,计算损失函数关于参数的梯度
sgd([w, b], lr, batch_size) # 使用随机梯度下降更新参数w和b
with torch.no_grad(): # 关闭梯度计算
train_l = loss(net(features, w, b), labels) # 使用训练集计算模型在当前epoch上的损失
print(f'epoch {epoch + 1}, loss {float(train_l.mean()):f}') # 打印当前epoch的损失
epoch 1, loss 6.087923
epoch 2, loss 2.364102
epoch 3, loss 0.921060
epoch 4, loss 0.359953
epoch 5, loss 0.141125
...
epoch 98, loss 0.000054
epoch 99, loss 0.000054
epoch 100, loss 0.000054
比较真实参数和通过训练学到的参数来评估训练的成功程度
print(f'w的估计误差:{true_w - w.reshape(true_w.shape)}')
print(f'b的估计误差:{true_b - b}')
w的估计误差:tensor([0.0004, 0.0003], grad_fn=<SubBackward0>)
b的估计误差:tensor([-3.8147e-05], grad_fn=<RsubBackward1>)