一、前言
从零实现
- 构造数据集生成迭代器
- 定义回归模型
- 定义损失函数【均方误差】
- 定义优化算法【SGD】
- 训练
简洁实现
- 构造数据集生成迭代器
- 使用 pytoch 的模块实现回归模型
- 使用 pytoch 的损失函数【均方误差】
- 使用 pytoch 的优化算法【SGD】
- 训练
ps:感谢李沐老师,虽然他不认识我(旺柴)
二、实现
1.从零实现
import random
import torch
from d2l import torch as d2l
# 模拟数据集 --------------------------------------------
def synthetic_data(w, b, num_examples):
"""
生成 y = Xw + b + 噪声
:return:
"""
# 模拟特征x
x = torch.normal(0, 1, (num_examples, len(w))) # 均值为0方差为1,num_examples行,len(w)列【行代表有多少组特征,列代表每组特征的特征个数】
# 模拟标签y
y = torch.matmul(x, w) + b # 通过线性函数模拟【y值代表真实结果】
y += torch.normal(0, 0.01, y.shape) # 为y值增加均值为0方差为0.01的噪声
y = y.reshape((-1, 1)) # 转为列向量
return x, y
# 构造数据集
true_w = torch.tensor([2, -3.4])
true_b = 4.2
features, labels = synthetic_data(true_w, true_b, 1000)
print(features)
# tensor([[-0.6286, 1.0225],
# [ 1.0482, -0.6137],
# [-0.3066, 0.6919],
# ...,
# [ 0.3020, -0.2817],
# [ 1.3083, 1.4064],
# [ 0.1264, 0.9362]])
print(labels)
# tensor([[-5.2968e-01],
# [ 8.3766e+00],
# [ 1.2278e+00],
# ...,
# [ 5.7573e+00],
# [ 2.0380e+00],
# [ 1.2725e+00]])
# # 查看数据分布 ------------------------------------------------------------------
# d2l.set_figsize()
# d2l.plt.scatter(features[:, (1)].detach().numpy(), labels.detach().numpy(), 1)
# d2l.plt.show()
# 批次遍历器 ----------------------------------
def data_iter(batch_size, features, labels):
# 获取全部数据的下标值,打乱顺序
num_examples = len(features) # 此处也可以用 len(labels) ,都是一样的长度
indices = list(range(num_examples)) # 得到全部下标
random.shuffle(indices) # 乱序
# 遍历 features, labels 按 indices 下标以每批次 batch_size 个数据返回
for i in range(0, num_examples, batch_size):
batch_indices = torch.tensor(indices[i:min(i + batch_size, num_examples)]) # min(i + batch_size, num_examples) 意思是最后一批数据可能会超过数据集的大小,这时候就不按批次返回了,直接返回剩余全部数据即可
yield features[batch_indices], labels[batch_indices]
batch_size = 10
# 打印一组数据康康
for x, y in data_iter(batch_size, features, labels):
print("{}\n{}".format(x, y))
# tensor([[ 1.8326, 0.7707],
# [ 0.2930, 1.7733],
# [-1.6511, 1.1953],
# [ 0.3137, -0.1215],
# [ 0.2067, 0.4356],
# [ 0.2309, 1.3944],
# [-1.7035, 1.0994],
# [ 0.2823, 0.0139],
# [-1.1442, 1.4188],
# [ 0.1316, 1.3700]])
# tensor([[ 5.2469],
# [-1.2503],
# [-3.1862],
# [ 5.2271],
# [ 3.1401],
# [-0.0835],
# [-2.9385],
# [ 4.7317],
# [-2.9028],
# [-0.1943]])
break
# 定义回归模型 -----------------------
def lin_reg(X, w, b):
return torch.matmul(X, w) + b
# 初始化超参数 w,b -------------------------------------------------
w = torch.normal(0, 0.01, size=(2, 1), requires_grad=True) # 均值为0方差为0.01,两行一列,记录梯度
b = torch.zeros(1, requires_grad=True) # 初始化为0,记录梯度
print(w)
# tensor([[ 0.0084],
# [-0.0166]], requires_grad=True)
print(b)
# tensor([0.], requires_grad=True)
# 损失函数 ------------------------------------------
def square_loss(y_hat, y, batch_size):
""" 均方误差 """
return (y_hat - y.reshape(y_hat.shape))**2 / 2 / batch_size # 1.除以2是为了更方便的求导,2.除以 batch_size 求平均
# 定义优化算法SGD -------------------------------------
def sgd(params, lr):
""" 小批量随机梯度下降 """
with torch.no_grad(): # 更新参数时不计算梯度
for param in params:
param -= lr * param.grad # w1 = w0 - 学习率 * 损失函数关于w0的梯度
param.grad.zero_() # 梯度归0
# 训练过程 ----------
lr = 0.03
num_epochs = 3
net = lin_reg
loss = square_loss
for epoch in range(num_epochs):
for x, y in data_iter(batch_size, features, labels):
l = loss(net(x, w, b), y, batch_size) # 前向传播,计算损失
l.sum().backward() # 结果求和,反向传播计算梯度
sgd([w, b], lr) # 更新参数
# 每个 epoch 打印更新参数后的当前损失值
with torch.no_grad():
train_l = loss(net(features, w, b), labels, batch_size)
print("epoch: {}, loss: {}".format(epoch+1, train_l.mean()))
# epoch: 1, loss: 0.0036771001759916544
# epoch: 2, loss: 1.2870842510892544e-05
# epoch: 3, loss: 4.923712822346715e-06
# 比较真实值与预测值 ---------------
print("w预测误差:{}".format(true_w - w.reshape(true_w.shape)))
print("b预测误差:{}".format(true_b - b))
# w预测误差:tensor([5.1260e-06, 3.6597e-04], grad_fn=<SubBackward0>)
# b预测误差:tensor([0.0010], grad_fn=<RsubBackward1>)
2.简洁实现【使用pytorch提供的方法】
import random
import torch
from d2l import torch as d2l
# 模拟数据集 --------------------------------------------
def synthetic_data(w, b, num_examples):
"""
生成 y = Xw + b + 噪声
:return:
"""
# 模拟特征x
x = torch.normal(0, 1, (num_examples, len(w))) # 均值为0方差为1,num_examples行,len(w)列【行代表有多少组特征,列代表每组特征的特征个数】
# 模拟标签y
y = torch.matmul(x, w) + b # 通过线性函数模拟【y值代表真实结果】
y += torch.normal(0, 0.01, y.shape) # 为y值增加均值为0方差为0.01的噪声
y = y.reshape((-1, 1)) # 转为列向量
return x, y
# 构造数据集
true_w = torch.tensor([2, -3.4])
true_b = 4.2
features, labels = synthetic_data(true_w, true_b, 1000)
print(features)
# tensor([[-0.6286, 1.0225],
# [ 1.0482, -0.6137],
# [-0.3066, 0.6919],
# ...,
# [ 0.3020, -0.2817],
# [ 1.3083, 1.4064],
# [ 0.1264, 0.9362]])
print(labels)
# tensor([[-5.2968e-01],
# [ 8.3766e+00],
# [ 1.2278e+00],
# ...,
# [ 5.7573e+00],
# [ 2.0380e+00],
# [ 1.2725e+00]])
# # 查看数据分布 ------------------------------------------------------------------
# d2l.set_figsize()
# d2l.plt.scatter(features[:, (1)].detach().numpy(), labels.detach().numpy(), 1)
# d2l.plt.show()
# 批次遍历器 ----------------------------------
from torch.utils import data
batch_size = 10
data_iter = data.DataLoader(
data.TensorDataset(*(features, labels)), # 转成Tensor迭代器
batch_size, # 批次大小
shuffle=True # 是否乱序
)
# 打印一组数据康康【真实训练时不要打印出来康,否则会导致迭代器在训练数据前就自行迭代了一次,浪费一批数据】
print(next(iter(data_iter)))
# [
# tensor([[-0.1174, 0.9516],
# [-0.6164, 0.3813],
# [-0.9736, 1.9661],
# [-1.2993, 0.5336],
# [-1.0567, 1.6242],
# [ 0.5484, 0.3681],
# [ 1.3446, 0.2446],
# [-1.4124, -0.5372],
# [-1.3326, -0.3083],
# [ 1.8234, -0.0221]]),
# tensor([[ 0.7119],
# [ 1.6712],
# [-4.4386],
# [-0.2142],
# [-3.4445],
# [ 4.0427],
# [ 6.0445],
# [ 3.2062],
# [ 2.5751],
# [ 7.9381]])
# ]
# 定义回归模型 -----------------------
from torch import nn
net = nn.Sequential(nn.Linear(2, 1)) # Linear线性层或者叫全连接层,输入是2维,输出是1维
# 初始化超参数 w,b -------------------
# net[0] 就是访问 Sequential 中的第一个 Linear
net[0].weight.data.normal_(0, 0.01) # w均值0方差0.01
net[0].bias.data.fill_(0) # b初始为0
# 损失函数 ------------------------------------------
loss = nn.MSELoss()
# 定义优化算法SGD -------------------------------------
trainer = torch.optim.SGD(
net.parameters(), # net的所有可训练参数【w, b】
lr=0.03 # 学习率
)
# 训练过程 -----------------------
num_epochs = 3
for epoch in range(num_epochs):
for x, y in data_iter:
l = loss(net(x), y) # 前向传播,计算损失
trainer.zero_grad() # 梯度清零
l.backward() # 反向传播计算梯度【pytorch已经自动sum了,所以这里不用求和】
trainer.step() # 更新参数
# 每个 epoch 打印更新参数后的当前损失值
l = loss(net(features), labels)
print("epoch: {}, loss: {}".format(epoch+1, l))
# epoch: 1, loss: 0.00029666078626178205
# epoch: 2, loss: 0.00010394241689937189
# epoch: 3, loss: 0.00010382977779954672