本代码基于《动手学深度学习》Pytorch版,第四章多层感知机,第二节多层感知机的从零开始实现。对代码进行修改,增加注释,供学习使用。
导入相关库
import matplotlib_inline
import matplotlib.pyplot as plt
import IPython
import torch
from torch import nn
import torchvision
plt.rcParams['font.sans-serif'] = ['SimHei']
def use_svg_display():
matplotlib_inline.backend_inline.set_matplotlib_formats('svg')
设置Matplotlib图形的轴属性,并在轴上启用网格线
def set_axes(axes, xlabel, ylabel, xlim, ylim, xscale, yscale, legend):
axes.set_xlabel(xlabel)
axes.set_ylabel(ylabel)
axes.set_xscale(xscale)
axes.set_yscale(yscale)
axes.set_xlim(xlim)
axes.set_ylim(ylim)
if legend:
axes.legend(legend)
axes.grid()
在动画中绘制图表,在训练过程中实时显示数据,在动画中绘制图表指在计算机动画或交互式图形应用程序中实时显示数据变化过程
class Animator:
def __init__(self, xlabel = None, ylabel = None, legend = None, xlim = None, ylim = None, xscale = 'linear',
yscale = 'linear', fmts = ('-', 'm--', 'g-.', 'r:'), nrows = 1, ncols = 1, figsize = (3.5, 2.5)):
if legend is None:
legend = []
use_svg_display()
self.fig, self.axes = plt.subplots(nrows, ncols, figsize = figsize)
if nrows * ncols == 1:
self.axes = [self.axes, ]
self.config_axes = lambda: set_axes(self.axes[0], xlabel, ylabel, xlim, ylim, xscale, yscale, legend)
self.X, self.Y, self.fmts = None, None, fmts
def add(self, x, y):
if not hasattr(y, '__len__'):
y = [y]
n = len(y)
if not hasattr(x, '__len__'):
x = [x] * n
if not self.X:
self.X = [[] for _ in range(n)]
if not self.Y:
self.Y = [[] for _ in range(n)]
for i, (a, b) in enumerate(zip(x, y)):
if a is not None and b is not None:
self.X[i].append(a)
self.Y[i].append(b)
self.axes[0].cla()
for x, y, fmt in zip(self.X, self.Y, self.fmts):
self.axes[0].plot(x, y, fmt)
self.config_axes()
IPython.display.display(self.fig)
IPython.display.clear_output(wait = True)
累加数值
class Accumulator:
def __init__(self, n):
self.data = [0.0] * n
def add(self, *args):
self.data = [a + float(b) for a, b in zip(self.data, args)]
def reset(self):
self.data = [0.0] * len(self.data)
def __getitem__(self, idx):
return self.data[idx]
计算预测正确的数量
def accuracy(y_hat, y):
if len(y_hat.shape) > 1 and y_hat.shape[1] > 1:
y_hat = y_hat.argmax(axis = 1)
cmp = y_hat.type(y.dtype) == y
return float(cmp.type(y.dtype).sum())
评估在任意模型上对任意数据迭代器可访问的数据集的精度
def evaluate_accuracy(net, data_iter):
if isinstance(net, nn.Module):
net.eval()
metric = Accumulator(2)
with torch.no_grad():
for x, y in data_iter:
metric.add(accuracy(net(x), y), y.numel())
return metric[0] / metric[1]
可视化数据集图像
def show(imgs, rows, cols, titles = None, scale = 1.5):
_, axes = plt.subplots(rows, cols, figsize = (cols * scale, rows * scale))
axes = axes.flatten()
for i, (ax, img) in enumerate(zip(axes, imgs)):
if torch.is_tensor(img):
ax.imshow(img.numpy())
else:
ax.imshow(img)
ax.axes.get_xaxis().set_visible(False)
ax.axes.get_yaxis().set_visible(False)
if titles:
ax.set_title(titles[i])
return axes
返回Fashion-MNIST数据集的文本标签
def get_label(labels):
label = ['T恤', '裤子', '套衫', '连衣裙', '外套', '凉鞋', '衬衫', '运动鞋', '包', '短靴']
return [label[int(i)] for i in labels]
加载并处理Fashion-MNIST数据集并构建迭代器
def dataset(batch_size, resize = None):
trans = [torchvision.transforms.ToTensor()]
if resize:
trans.insert(0, torchvision.transforms.Resize(resize))
trans = torchvision.transforms.Compose(trans)
train = torchvision.datasets.FashionMNIST(root = 'C:\\Users\\kongbai\\study\\数据集\\fashionMNIST', train = True, transform = trans, download = True)
test = torchvision.datasets.FashionMNIST(root = 'C:\\Users\\kongbai\\study\\数据集\\fashionMNIST', train = False, transform = trans, download = True)
return torch.utils.data.DataLoader(train, batch_size, shuffle = True), torch.utils.data.DataLoader(test, batch_size, shuffle = False)
定义ReLU激活函数
def relu(x):
n = torch.zeros_like(x)
# zeros_like()生成与给定张量形状相同的全零张量
# input必需,张量,要生成全零张量的形状
# dtype可选,字符串或torch.dtype类型,生成张量的数据类型,如不提供,根据input的类型决定
# layout可选,字符串,生成张量的布局,如不提供,根据input的类型决定
# device可选,字符串或torch.device类型,生成张量的设备,如不提供,根据input的类型决定
return torch.max(x, n)
定义模型
def net(x):
x = x.reshape(-1, inputs)
h = relu(x@w1 + b1)
return h@w2 + b2
训练模型一轮
def train_epoch(net, train, loss, updater):
if isinstance(net, torch.nn.Module):
net.train()
metric = Accumulator(3)
for x, y in train:
y_hat = net(x)
l = loss(y_hat, y)
if isinstance(updater, torch.optim.Optimizer):
updater.zero_grad()
l.mean().backward()
updater.step()
else:
l.sum().backward()
updater(x.shape[0])
metric.add(float(l.sum()), accuracy(y_hat, y), y.numel())
return metric[0] / metric[2], metric[1] / metric[2]
训练模型,在训练集迭代器访问到的数据上训练模型,利用Animator可视化训练进度,每个迭代周期结束时,利用测试集迭代器访问到的数据对模型进行评估
def train(net, train, test, loss, n, updater):
animator = Animator(xlabel = 'epoch', xlim = [1, n], ylim = [0.3, 0.9],
legend = ['train loss', 'train acc', 'test acc'])
for epoch in range(n):
train_metrics = train_epoch(net, train, loss, updater)
test_acc = evaluate_accuracy(net, test)
animator.add(epoch + 1, train_metrics + (test_acc,))
train_loss, train_acc = train_metrics
assert train_loss < 0.5, train_loss
assert train_acc <= 1 and train_acc > 0.7, train_acc
assert test_acc <= 1 and test_acc > 0.7, test_acc
预测
def predict(net, test, n = 5):
for x, y in test:
break
trues = get_label(y)
preds = get_label(net(x).argmax(axis = 1))
titles = [true + '\n' + pred for true, pred in zip(trues, preds)]
show(x[0 : n].reshape((n, 28, 28)), 1, n, titles = titles[0 : n])
实现
batch_size = 256
train_iter, test_iter = dataset(batch_size)
初始化模型参数
inputs, outputs, hiddens = 784, 10, 256
w1 = nn.Parameter(torch.randn(inputs, hiddens, requires_grad = True) * 0.01)
# Parameter()模型中的可学习参数,可作为模型的属性,自动注册为模型的参数,可设置是否需要计算梯度
# 当在模型中定义了Parameter对象,PyTorch会自动将其注册为模型的参数,并在训练过程中对其进行更新
# data可选,张量,参数的初始值,如不提供,默认为 None
# requires_grad可选,布尔值,是否需要计算该参数的梯度,默认为True
# volatile可选,布尔值,该参数是否是易失的,默认为 False,易失参数不会参与反向传播
# randn()生成服从标准正态分布(均值为 0,标准差为 1)的随机张量
# size可选,整数或整数元组,生成张量的形状,如不提供,默认为None,返回一个标量
# out可选,张量,输出张量的位置,如果提供,生成的张量将存储在此张量中
# dtype可选,字符串或torch.dtype类型,生成张量的数据类型,如不提供,根据size的类型决定
# layout可选,字符串,生成张量的布局,如不提供,根据size的类型决定
# device可选,字符串或torch.device类型,生成张量的设备,如不提供,根据size的类型决定
b1 = nn.Parameter(torch.zeros(hiddens, requires_grad = True))
# zeros()生成全零张量
# size可选,整数或整数元组,生成张量的形状,如不提供,默认为None,返回一个标量
# out可选,张量,输出张量的位置,如果提供,生成的张量将存储在此张量中
# dtype可选,字符串或torch.dtype类型,生成张量的数据类型,如不提供,根据size的类型决定
# layout可选,字符串,生成张量的布局,如不提供,根据size的类型决定
# device可选,字符串或torch.device类型,生成张量的设备,如不提供,根据size的类型决定
w2 = nn.Parameter(torch.randn(hiddens, outputs, requires_grad = True) * 0.01)
b2 = nn.Parameter(torch.zeros(outputs, requires_grad = True))
n, lr = 10, 0.1
params = [w1, b1, w2, b2]
构建交叉熵损失函数
loss = nn.CrossEntropyLoss(reduction = 'none')
# CrossEntropyLoss()损失函数,衡量模型预测概率分布与真实标签间的差异,常用于分类任务,多类别分类问题
# 计算模型预测的概率分布与真实标签之间的交叉熵损失来衡量模型的性能
# 结合nn.LogSoftmax()和nn.NLLLoss()的功能,简化模型训练过程中的损失计算
# weight可选,张量,每个类别的权重,如果提供,将调整不同类别的损失值,如不提供,所有类别的权重默认为1
# ignore_index可选,整数,忽略的类别索引,如果提供,该类别的损失值将被忽略
# reduction可选,字符串,损失的计算方式,可选值包括None,sum,mean,如果不提供,默认为mean
# label_smoothing可选,浮点数,标签平滑的程度,如果提供,将用于平滑真实标签,防止模型过于自信,如不提供,默认为0
构建随机梯度下降优化算法
# SGD()优化器,实现随机梯度下降(StochasticGradientDescent)算法,迭代地更新模型参数最小化损失函数
# 通过计算损失函数关于模型参数的梯度,并按照一定的学习率更新参数,逐步逼近最优解
# 适合处理大规模数据集,每次只使用一个样本或一小批样本来计算梯度,减少了计算量
# 支持多种参数更新策略,包括动量(momentum),自适应学习率(adaptive learning rate)等
# params必需,包含模型参数的可迭代对象,通常为parameters()返回的对象
# lr必需,学习率,每次参数更新时沿梯度方向移动的距离
# momentum可选,动量系数,加速收敛并减少振荡,默认为0
# weight_decay可选,权重衰减(L2正则化),防止过拟合,默认为0
# dampening可选,动量的阻尼系数,控制动量的变化速度,默认为0
# nesterov可选,是否使用Nesterov动量,默认为False
train(net, train_iter, test_iter, loss, n, trainer)
运行结果
predict(net, test_iter)
运行结果