学习参考:
08 线性回归 + 基础优化算法【动手学深度学习v2】_哔哩哔哩_bilibili
1. 根据给定模型生成带有噪声的线性模型
def synthetic_data(w, b, num_examples):
"""输入参数为模型的参数及待生成数据集的大小"""
X = torch.normal(0, 1, (num_examples, len(w))
# 样本特征矩阵为X,行数为样本个数,列数为特征维数
y = torch.matmul(X, w) + b
y += torch.nomal(0, 0.01, y.shape)
return X, y
在定义模型参数和数据集大小的前提下,调用生成函数
true_w = torch.tensor([2, -3.4])
true_b = 4.2
# 偏置设置为标量,后续运算借助广播机制
num_examples = 1000
features, labels = synthetic_data(true_w, true_b, num_examples)
2. 批量数据生成器
def data_iter(batch_size, features, labels):
""" 输入参数为样本特征矩阵,样本标签,超参数batch_size"""
num_samples = len(features)
indices = list(range(num_examples))
random.shuffle(indices)
for i in range(0, num_samples, batch_size):
batch_indices = torch.tensor([i : min(i+batch_size, num_examples)])
yield features[batch_indices], labels[batch_indices]
在确定batch_size的前提下,调用生成器,仅输出一个批次的样本
batch_size
for X, y in data_iter(batch_size, features, labels):
print(X, '\n', y)
break
# 如果要遍历全部批次,去掉break
3. 定义三大件:模型、损失函数和优化算法
后续使用不需要从零实现,但这里为了强化理解和记忆,手写一遍内容
"""训练模型之前需要初始化待训练的模型参数"""
w = torch.normal(0, 0.01, size = (2, 1), requires_grad = True)
b = torch.zeros(1, requires_grad = True)
"""构建线性回归模型"""
def lingre(X, w, b):
return torch.matmul(X, w) + b
"""定义损失函数 均方误差损失"""
def squared_loss(y_hat, y):
"""输出仍是y_hat.shape的向量"""
return (y_hat - y.reshape(y_hat.shape)) ** 2 / 2
"""小批量随机梯度下降"""
def sgd(params, lr, batch_size):
"""梯度下降涉及到的超参数包括学习率和批量大小"""
with torch.no_grad():
for param in params:
param -= lr * param.grad / batch_size
param.grad.zero_()
4. 训练过程
"""定义部分超参数"""
lr, num_epochs, batch_size = 0.03, 3, 10
"""定义模型、损失函数和优化算法"""
# 优化算法被直接写入模型中,不在这里定义赋值
net = linreg
loss = squared_loss
"""训练过程(多epochs)"""
for epoch in range(epochs):
for X, y in data_iter(batch_size, features, labels):
l = loss(net(X, w, b), b)
l.sum().backward()
sgd([w, b], lr, batch_size)
"""完成一轮epoch需要输出当前损失"""
with torch.no_gard():
train_l = loss(net(feature, w, b), labels)
print(f'epoch: {epoch+1}, loss: {float(train_l.mean()):f}')
5. 设定 数据读取的进程数
def get_dataloader_workers():
"""CPU一般设定为0"""
return 0
6. 从零实现损失函数
def squared_loss(y_hat, y):
"""均方损失"""
return (y_hat - y.reshape(y_hat.shape)) ** 2 / 2
def cross_entropy(y_hat, y):
"""交叉熵损失"""
return -torch.log(y_hat[range(len(y_hat)), y])
7. 评估模型性能
def accuracy(y_hat, y):
"""将预测值与参考值作比较,统计预测正确的数量"""
if len(y_hat.shape) > 1 and y_hat.shape[1] > 1:
"""确定数据集中包含多个数据样本,且为多分类问题(大于1)"""
# 找到每一行概率最大的索引,即预测的类别标签
y_hat = y_hat.argmax(axis = 1)
# 比较预测值和参考值
cmp = y_hat.type(y.dtype) == y
return float(cmp.type(y.dtype).sum())
# 也可以不对cmp数据类型进行转换
# return float(cmp.sum())
def evaluate_accuracy(net, data_iter):
"""计算模型在指定数据集上的预测准确率(预测正确的样本个数/样本总数)"""
if isintance(net, torch.nn.Module):
"""检查net是否为torch.nn.Module实例
通过调用net.eval()设置为评估模式"""
# 评估模式下,会关闭诸如Dropout和BatchNorm等
net.eval()
"""初始化累加器"""
metric = Accumulator(2)
for X, y in data_iter:
metric.add(accuracy(net(X), y), y.numel())
return metric[0] / metric[1]
8. 辅助函数 / 类
class Accumulator:
def __init__(self, n):
self.data = [0.0] * n
def add(self, *args):
self.data = [a + float(b) for a, b in zip(self.data, args)]
def reset(self):
self.data = [0.0] * len(self.data)
def __getitem__(self, idx):
return self.data[idx]
9. 封装优化器
def sgd(params, lr, batch_size):
wiht torch.no_grad():
for param in params:
param -= lr * param.grad / batch_size
param.grad.zero_()
def updater(batch_size):
return d2l.sgd([W, b], lr, batch_size)