本代码基于《动手学深度学习》Pytorch版,第三章线性回归网络,第七节softmax回归的简洁实现。对代码进行修改,增加注释,供学习使用。
导入相关库
import matplotlib_inline
import matplotlib.pyplot as plt
import IPython
import torch
import torchvision
def use_svg_display():
matplotlib_inline.backend_inline.set_matplotlib_formats('svg')
设置Matplotlib图形的轴属性,并在轴上启用网格线
def set_axes(axes, xlabel, ylabel, xlim, ylim, xscale, yscale, legend):
axes.set_xlabel(xlabel)
axes.set_ylabel(ylabel)
axes.set_xscale(xscale)
axes.set_yscale(yscale)
axes.set_xlim(xlim)
axes.set_ylim(ylim)
if legend:
axes.legend(legend)
axes.grid()
在动画中绘制图表,在训练过程中实时显示数据,在动画中绘制图表指在计算机动画或交互式图形应用程序中实时显示数据变化过程
class Animator:
def __init__(self, xlabel = None, ylabel = None, legend = None, xlim = None, ylim = None, xscale = 'linear',
yscale = 'linear', fmts = ('-', 'm--', 'g-.', 'r:'), nrows = 1, ncols = 1, figsize = (3.5, 2.5)):
if legend is None:
legend = []
use_svg_display()
self.fig, self.axes = plt.subplots(nrows, ncols, figsize = figsize)
if nrows * ncols == 1:
self.axes = [self.axes, ]
self.config_axes = lambda: set_axes(self.axes[0], xlabel, ylabel, xlim, ylim, xscale, yscale, legend)
self.X, self.Y, self.fmts = None, None, fmts
def add(self, x, y):
if not hasattr(y, '__len__'):
y = [y]
n = len(y)
if not hasattr(x, '__len__'):
x = [x] * n
if not self.X:
self.X = [[] for _ in range(n)]
if not self.Y:
self.Y = [[] for _ in range(n)]
for i, (a, b) in enumerate(zip(x, y)):
if a is not None and b is not None:
self.X[i].append(a)
self.Y[i].append(b)
self.axes[0].cla()
for x, y, fmt in zip(self.X, self.Y, self.fmts):
self.axes[0].plot(x, y, fmt)
self.config_axes()
IPython.display.display(self.fig)
IPython.display.clear_output(wait = True)
累加数值
class Accumulator:
def __init__(self, n):
self.data = [0.0] * n
def add(self, *args):
self.data = [a + float(b) for a, b in zip(self.data, args)]
def reset(self):
self.data = [0.0] * len(self.data)
def __getitem__(self, idx):
return self.data[idx]
计算预测正确的数量
def accuracy(y_hat, y):
if len(y_hat.shape) > 1 and y_hat.shape[1] > 1:
y_hat = y_hat.argmax(axis = 1)
cmp = y_hat.type(y.dtype) == y
return float(cmp.type(y.dtype).sum())
评估在任意模型上对任意数据迭代器可访问的数据集的精度
def evaluate_accuracy(net, data_iter):
if isinstance(net, torch.nn.Module):
net.eval()
metric = Accumulator(2)
with torch.no_grad():
for x, y in data_iter:
metric.add(accuracy(net(x), y), y.numel())
return metric[0] / metric[1]
加载并处理Fashion-MNIST数据集并构建迭代器
def dataset(batch_size, resize = None):
trans = [torchvision.transforms.ToTensor()]
if resize:
trans.insert(0, torchvision.transforms.Resize(resize))
trans = torchvision.transforms.Compose(trans)
train = torchvision.datasets.FashionMNIST(root = 'C:\\Users\\kongbai\\study\\数据集\\fashionMNIST', train = True, transform = trans, download = True)
test = torchvision.datasets.FashionMNIST(root = 'C:\\Users\\kongbai\\study\\数据集\\fashionMNIST', train = False, transform = trans, download = True)
return torch.utils.data.DataLoader(train, batch_size, shuffle = True), torch.utils.data.DataLoader(test, batch_size, shuffle = False)
构建神经网络
net = torch.nn.Sequential(torch.nn.Flatten(), torch.nn.Linear(784, 10))
# Sequential(*args)构建顺序神经网络模型的容器,允许将多个层按照顺序组合在一起,形成一个完整的神经网络模型
# *args可变参数列表,传递要组合的层
# Flatten(start_dim=1, end_dim=-1)将多维输入张量展平为一维张量
# start_dim和end_dim是可选参数,指定展平操作的起始和结束维度
# 默认start_dim=1表示从第二个维度开始展平,end_dim=-1表示展平到最后一个维度
# Linear(in_features, out_features, bias=True)全连接层(线性层),实现线性变换
# 接受一个输入张量,并将其映射到一个输出张量,输出张量的形状由指定的输出特征数决定
# in_features输入特征数量
# out_features输出特征数量
# bias布尔值,是否在计算中使用偏置项,默认为True
初始化神经网络的参数
def init_weights(m):
if type(m) == torch.nn.Linear:
# type(object)获取对象的类型,接受一个参数,即要检查类型的对象,并返回该对象的类型
torch.nn.init.normal_(m.weight, std = 0.01)
# 在PyTorch中,torch.nn.init模块提供了一系列初始化神经网络权重的方法
# init指初始化(initialization),即设置网络参数初始值
# torch.nn.init模块的方法
# nn.init.xavier_uniform_使用Xavier均匀分布初始化权重
# nn.init.xavier_normal_使用Xavier正态分布初始化权重
# nn.init.kaiming_uniform_使用Kaiming均匀分布初始化权重,特别适用于ReLU激活函数
# nn.init.kaiming_normal_使用Kaiming正态分布初始化权重,特别适用于ReLU激活函数
# nn.init.normal_使用标准正态分布初始化权重
# nn.init.uniform_使用均匀分布初始化权重
net.apply(init_weights)
# apply()递归地应用一个函数到模型的所有子模块(包括模型本身)
# 接受一个函数作为参数,该函数需要接受一个nn.Module类型的参数,并返回一个值
# apply()会遍历模型的所有子模块,并将这个函数应用到每个子模块上
运行结果
Sequential(
(0): Flatten(start_dim=1, end_dim=-1)
(1): Linear(in_features=784, out_features=10, bias=True)
)
构建损失函数
loss = torch.nn.CrossEntropyLoss(reduction = 'none')
# CrossEntropyLoss()损失函数,衡量分类任务中模型预测概率分布与真实标签之间的差异
# 结合了nn.LogSoftmax()和nn.NLLLoss()(负对数似然损失)两个操作
# 计算公式:loss(x, class) = -log(exp(x[class]) / sum(exp(x)))
# x模型输出,class真实标签索引
构建优化算法梯度下降更新参数
trainer = torch.optim.SGD(net.parameters(), lr = 0.1)
# torch.optim.SGD()优化器,实现了随机梯度下降(StochasticGradientDescent,SGD)算法
# SGD优化算法,更新神经网络的权重以最小化损失函数
# params需要优化的参数列表,通常是模型的parameters()返回的结果
# lr学习率,控制权重更新的步长,默认为0.01
# momentum动量系数,加速收敛并减少震荡,默认为0
# weight_decay权重衰减(L2正则化),防止过拟合,默认为0
# dampening动量阻尼系数,默认为0
# nesterov是否使用Nesterov动量,默认为False
训练模型一轮
def train_epoch(net, train, loss, updater):
if isinstance(net, torch.nn.Module):
net.train()
metric = Accumulator(3)
for x, y in train:
y_hat = net(x)
l = loss(y_hat, y)
if isinstance(updater, torch.optim.Optimizer):
updater.zero_grad()
l.mean().backward()
updater.step()
else:
l.sum().backward()
updater(x.shape[0])
metric.add(float(l.sum()), accuracy(y_hat, y), y.numel())
return metric[0] / metric[2], metric[1] / metric[2]
训练模型,在训练集迭代器访问到的数据上训练模型,利用Animator可视化训练进度,每个迭代周期结束时,利用测试集迭代器访问到的数据对模型进行评估
def train(net, train, test, loss, n, updater):
animator = Animator(xlabel = 'epoch', xlim = [1, n], ylim = [0.3, 0.9],
legend = ['train loss', 'train acc', 'test acc'])
for epoch in range(n):
train_metrics = train_epoch(net, train, loss, updater)
test_acc = evaluate_accuracy(net, test)
animator.add(epoch + 1, train_metrics + (test_acc,))
train_loss, train_acc = train_metrics
assert train_loss < 0.5, train_loss
assert train_acc <= 1 and train_acc > 0.7, train_acc
assert test_acc <= 1 and test_acc > 0.7, test_acc
实现
batch_size = 256
train_iter, test_iter = dataset(batch_size)
n = 10
train(net, train_iter, test_iter, loss, n, trainer)
运行结果