前言
使用Fashion-MNIST数据集,实现softmax分类网络
初始化模型参数
- 设置批量大小
- 读取小批量数据
- 网络输入输出维度(Fashion-MNIST图像尺寸为(1,28,28),这里将图像展平为长度784的一维向量(在softmax模型中实现),输出维度=类别个数10)
- 初始化权重和偏移
batch_size = 256
# train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
num_inputs = 784
num_outputs = 10
w = torch.normal(0, 0.01, size=(num_inputs, num_outputs), requires_grad=True)
b = torch.zeros(num_outputs, requires_grad=True)
train_iter, test_iter = data_iter.load_data_fashion_mnist(batch_size)
def load_data_fashion_mnist(batch_size):
"""读取制作小批量mnist数据集"""
# trans = [transforms.Resize(64), transforms.ToTensor()] # 改变原始图片大小
trans = [transforms.ToTensor()]
trans = transforms.Compose(trans)
mnist_train = torchvision.datasets.FashionMNIST(
root=r"D:\pytorchProject\动手学深度学习\data", train=True, transform=trans, download=True
)
mnist_test = torchvision.datasets.FashionMNIST(
root=r"D:\pytorchProject\动手学深度学习\data", train=False, transform=trans, download=True
)
test_iter = data.DataLoader(mnist_test, batch_size, shuffle=False, num_workers=0)
train_iter = data.DataLoader(mnist_train, batch_size, shuffle=True, num_workers=0)
return train_iter, test_iter
定义softmax操作
softmax由以下3个步骤组成:
- 对每个项求幂(使用exp)
- 对每一行求和(小批量中的每个样本是一行),得到每个样本的规范化常数
- 将每一行除以其规范化常数
def softmax(X):
"""softmax操作"""
X_exp = torch.exp(X)
partition = X_exp.sum(1, keepdim=True) # 对每一行求和(每个样本为一行)
return X_exp / partition
定义模型
def net(X):
return model.softmax(torch.matmul(X.reshape((-1, w.shape[0])), w) + b)
定义损失函数
使用交叉熵损失函数,交叉熵采用实际标签的预测概率的负对数似然。
def cross_entropy(y_hat, y):
return -torch.log(y_hat[range(len(y_hat)), y])
分类精度
分类精度是正确预测数与预测总数之比。(精度的计算不可导,所以不直接优化精度)。通过argmax获得每行中最大元素的索引来获得预测类别,将预测类别与真实类别进行比较,统计预测正确的数量。
def accuracy(y_hat,y):
"""计算预测正确的数量"""
if len(y_hat.shape)>1 and y_hat.shape[1]>1:
y_hat=y_hat.argmax(axis=1)
cmp=y_hat.type(y.dtype)==y
return float(cmp.type(y.dtype).sum())
对于任意迭代器可访问的数据集,我们可以评估模型上的精度
def evaluate_accuracy(net, data_iter):
"""计算在指定数据集上模型的精度"""
if isinstance(net, torch.nn.Module):
net.eval()
# metric=d2l.Accumulator(2)
eva=0.0
y_num=0.0
with torch.no_grad():
for X, y in data_iter:
# metric.add(evaluation.accuracy(net(X),y),y.numel())
eva+=accuracy(net(X),y)
y_num +=y.numel()
return eva/y_num
训练
训练一轮:
- 计算预测值
- 计算损失
- 反向传播
- 更新参数
def train_epoch(net, train_iter, loss, updater):
if isinstance(net, torch.nn.Module):
net.train()
loss_sum = 0.0
accuracy = 0.0
y_num = 0.0
for X, y in train_iter:
y_hat = net(X)
l = loss(y_hat, y)
if isinstance(updater, torch.optim.Optimizer):
# 参数的梯度清零
updater.zero_grad()
l.mean().backward()
updater.step()
else:
# 自己的损失函数只计算批量中的一个样本的损失,所以要对损失值求和后进行反向传播
l.sum().backward()
# X.shape[0]是小批量的样本数量,即batch_size,自己的优化器,为了避免批量大小对效果的影响,要统一步长,除以batch_size
updater(X.shape[0])
# 计算训练批量样本的损失之和、预测准确数量、总的预测量
loss_sum += l.sum()
accuracy += evaluation.accuracy(y_hat, y)
y_num += y.numel()
return loss_sum / y_num, accuracy / y_num
优化器:
def updater(batch_size):
return optimizor.sgd(params=[w, b], lr=lr, batch_size=batch_size)
def sgd(params,lr,batch_size):
"""小批量随机梯度下降"""
"""不需要向后传递"""
with torch.no_grad():
for param in params:
param-=lr*param.grad/batch_size
param.grad.zero_()
总的训练函数,包含多轮训练,并且使用测试数据集对模型进行评估。
def train(net, train_iter, test_iter, loss, num_epochs, updater):
t_r=[]
t_a=[]
te_a=[]
for epoch in range(num_epochs):
train_metric = train_epoch(net, train_iter, loss, updater)
test_acc = evaluation.evaluate_accuracy(net, test_iter)
train_loss, train_acc = train_metric
train_loss=train_loss.detach().numpy()
t_r.append(train_loss)
t_a.append(train_acc)
te_a.append(test_acc)
print(f'epoch: {epoch + 1}, train_loss: {train_loss}, train_acc: {test_acc}')
x=torch.arange(num_epochs)
plt.plot((x+1),t_r,'-',label='train_loss')
plt.plot(x+1,t_a,'--',label='train_acc')
plt.plot(x+1,te_a,'-.',label='test_acc')
plt.legend()
plt.show()
lr = 0.1
num_epochs = 10
loss = loss.cross_entropy
# 传入的参数必须有返回值
train(net, train_iter, test_iter, loss, num_epochs, updater)
预测
给定一系列图片,我们可以比较其标签
def predict(net,test_iter,n=6):
"""预测标签"""
for X,y in test_iter:
break
trues=d2l.get_fashion_mnist_labels(y)
preds=d2l.get_fashion_mnist_labels(net(X).argmax(axis=1))
titles=[true + '\n'+pred for true,pred in zip(trues,preds)]
d2l.show_images(X[0:n].reshape((n,28,28)),1,n,titles=titles[0:n])
plt.show()
predict(net,test_iter)