Pytorch基础教程（5）：实战案例----手写数字分类任务

最新推荐文章于 2024-08-16 15:47:12 发布

废物药浪学代码

最新推荐文章于 2024-08-16 15:47:12 发布

阅读量482

点赞数

分类专栏： Pytorch基础教程文章标签：算法深度学习神经网络人工智能 pytorch

原文链接：https://dsw-dev.data.aliyun.com/?notebookLabId=171743&dataIds=&tianchiFileUrl=http%3A%2F%2Fjupter-oss.oss-cn-hangzhou.aliyuncs.com%2F1095279703477%2F171743%2Fedit.ipynb%3FExpires%3D1615636706%26OSSAccessKeyId%3DLTAI4GGBCQcb7KD7NwKinA3D%26Signature%3DPsNMn

版权

Pytorch基础教程专栏收录该内容

9 篇文章 1 订阅

订阅专栏

Fashion-mnist分类任务

Fashion-mnist

经典的MNIST数据集包含了大量的手写数字。十几年来，来自机器学习、机器视觉、人工智能、深度学习领域的研究员们把这个数据集作为衡量算法的基准之一。你会在很多的会议，期刊的论文中发现这个数据集的身影。实际上，MNIST数据集已经成为算法作者的必测的数据集之一。有人曾经调侃道：“如果一个算法在MNIST不work，那么它就根本没法用；而如果它在MNIST上work，它在其它数据上也可能不work！”
Fashion-mnist的目的是要成为MNIST数据集的一个直接替代品。作为算法作者，你不需要修改任何代码，就可以直接使用这个数据集。Fashion-mnist的图片大小，训练、测试样本数及类别数与经典MNIST完全相同。
这个数据集的样子大致如下（每个类别占三行）：

类别标注

在Fashion-mnist数据集中，每个训练样本都按照以下类别进行了标注：

标注编码	描述
0	T-shirt/top（T恤）
1	Trouser（裤子）
2	Pullover（套衫）
3	Dress（裙子）
4	Coat（外套）
5	Sandal（凉鞋）
6	Shirt（汗衫）
7	Sneaker（运动鞋）
8	Bag（包）
9	Ankle boot（踝靴）

任务描述

Fashion-mnist是一个替代MNIST手写数字集的图像数据集。它是由Zalando（一家德国的时尚科技公司）旗下的研究部门提供。其涵盖了来自10种类别的共7万个不同商品的正面图片。Fashion-mnist的大小、格式和训练集/测试集划分与原始的MNIST完全一致。60000/10000的训练测试数据划分，28×28的灰度图片。你可以直接用它来测试你的机器学习和深度学习算法性能，且不需要改动任何的代码。
本次任务需要针对Fashion-mnist数据集，设计、搭建、训练机器学习模型，能够尽可能准确地分辨出测试数据的标签。

文档说明

数据集文件分为训练集和测试集部分，对应文件如下：
- 训练数据：train-images-idx3-ubyte.gz
- 训练标签：train-labels-idx1-ubyte.gz
- 测试数据：t10k-images-idx3-ubyte.gz

评估说明

评价指标

本次任务采用 ACC（Accuracy）作为模型的评价标准。

代码实现

加载数据集

FashionMNIST是28×28的灰度图片，60000/10000的训练测试数据划分，其涵盖了来自10种类别的共7万个不同商品的正面图片。

import time
import numpy as np
from torch import nn, optim
import torchvision
import torch.nn.functional as F

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

###################fashion mnist数据集加载######################
def load_data_fashion_mnist(batch_size, resize=None, root='./Datasets/FashionMNIST'):
    """Download the fashion mnist dataset and then load into memory."""
    trans = []
    if resize:
        trans.append(torchvision.transforms.Resize(size=resize))
    trans.append(torchvision.transforms.ToTensor())
    
    transform = torchvision.transforms.Compose(trans)
    mnist_train = torchvision.datasets.FashionMNIST(root=root, train=True, download=True, transform=transform)
    mnist_test = torchvision.datasets.FashionMNIST(root=root, train=False, download=True, transform=transform)
    train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True)
    test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False)

    return train_iter, test_iter
#################################################################
batch_size = 32
train_iter, test_iter = load_data_fashion_mnist(batch_size, resize=96)

# 图像展示
from matplotlib import pyplot as plt
from IPython import display

def get_fashion_mnist_labels(labels):
    text_labels = ['t-shirt', 'trouser', 'pullover', 'dress', 'coat',
                   'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot']
    return [text_labels[int(i)] for i in labels]

def show_fashion_mnist(images, labels):
    """Use svg format to display plot in jupyter"""
    display.set_matplotlib_formats('svg')
    # 这里的_表示我们忽略（不使用）的变量
    _, figs = plt.subplots(1, len(images), figsize=(12, 12))
    for f, img, lbl in zip(figs, images, labels):
        f.imshow(img.view((96, 96)).numpy())
        f.set_title(lbl)
        f.axes.get_xaxis().set_visible(False)
        f.axes.get_yaxis().set_visible(False)

# 读取训练数据集中第一个batch的数据
train_data = iter(train_iter)
images, labels = next(train_data)
# 观察训练数据集中前10个样本的图像内容和文本标签
labels = get_fashion_mnist_labels(labels)
show_fashion_mnist(images[:10], labels[:10])
plt.show()

# 模型设计

class Residual(nn.Module):
    # 可以设定输出通道数、是否使用额外的1x1卷积层来修改通道数以及卷积层的步幅。
    def __init__(self, in_c, out_c, c1, c2, c3, c4, use_1x1conv=False, stride=1):
        super(Residual, self).__init__()
        # 线路1，单1 x 1卷积层
        self.p1_1 = nn.Conv2d(in_c, c1, kernel_size=1, stride=stride)
        # 线路2，1 x 1卷积层后接3 x 3卷积层
        self.p2_1 = nn.Conv2d(in_c, c2[0], kernel_size=1, stride=stride)
        self.p2_2 = nn.Conv2d(c2[0], c2[1], kernel_size=3, padding=1)
        # 线路3，1 x 1卷积层后接5 x 5卷积层
        self.p3_1 = nn.Conv2d(in_c, c3[0], kernel_size=1, stride=stride)
        self.p3_2 = nn.Conv2d(c3[0], c3[1], kernel_size=5, padding=2)
        # 线路4，3 x 3最大池化层后接1 x 1卷积层
        self.p4_1 = nn.MaxPool2d(kernel_size=3, stride=1, padding=1)
        self.p4_2 = nn.Conv2d(in_c, c4, kernel_size=1, stride=stride)

        if use_1x1conv:
            self.conv1 = nn.Conv2d(in_c, out_c, kernel_size=1, stride=stride)
        else:
            self.conv1 = None
        self.bn = nn.BatchNorm2d(out_c)

    def forward(self, X):
        p1 = F.relu(self.p1_1(X))
        p2 = F.relu(self.p2_2(F.relu(self.p2_1(X))))
        p3 = F.relu(self.p3_2(F.relu(self.p3_1(X))))
        p4 = F.relu(self.p4_2(self.p4_1(X)))
        Y = self.bn(torch.cat((p1, p2, p3, p4), dim=1))

        if self.conv1:
            X = self.conv1(X)
        return F.relu(Y + X)


def resnet_block(in_c, out_c, c1, c2, c3, c4, num_residuals, first_block=False):
    if first_block:
        assert in_c == out_c # 第一个模块的通道数同输入通道数一致
    blk = []
    for i in range(num_residuals):
        if i == 0 and not first_block:
            blk.append(Residual(in_c, out_c, c1, c2, c3, c4, use_1x1conv=True, stride=2))
        else:
            blk.append(Residual(out_c, out_c, c1, c2, c3, c4))
    return nn.Sequential(*blk)


class GlobalAvgPool2d(nn.Module):
    # 全局平均池化层可通过将池化窗口形状设置成输入的高和宽实现
    def __init__(self):
        super(GlobalAvgPool2d, self).__init__()
    def forward(self, x):
        return F.avg_pool2d(x, kernel_size=x.size()[2:])


class FlattenLayer(torch.nn.Module):  #展平操作
    def forward(self, x):
        return x.view(x.shape[0], -1)


net = nn.Sequential(
        nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),
        nn.BatchNorm2d(32),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
net.add_module("resnet_block1", resnet_block(32, 32, 8, (4, 8), (4, 8), 8, 2, first_block=True))
net.add_module("resnet_block2", resnet_block(32, 80, 16, (16, 32), (8, 16), 16, 2))
net.add_module("resnet_block3", resnet_block(80, 192, 32, (32, 64), (32, 64), 32, 2))
net.add_module("resnet_block4", resnet_block(192, 320, 64, (64, 128), (32, 64), 64, 2))

net.add_module("global_avg_pool", GlobalAvgPool2d()) # GlobalAvgPool2d的输出: (Batch, 256, 1, 1)
net.add_module("fc", nn.Sequential(FlattenLayer(), nn.Linear(320, 10)))
# 模型定义-ResNet
# print(net)

print('打印 1*1*28*28 输入经过每个模块后的shape')
X = torch.rand((1, 1, 96, 96))
for name, layer in net.named_children():
    X = layer(X)
    print(name, ' output shape:\t', X.shape)

打印 1128*28 输入经过每个模块后的shape
0 output shape: torch.Size([1, 32, 96, 96])
1 output shape: torch.Size([1, 32, 96, 96])
2 output shape: torch.Size([1, 32, 96, 96])
3 output shape: torch.Size([1, 32, 48, 48])
resnet_block1 output shape: torch.Size([1, 32, 48, 48])
resnet_block2 output shape: torch.Size([1, 80, 24, 24])
resnet_block3 output shape: torch.Size([1, 192, 12, 12])
resnet_block4 output shape: torch.Size([1, 320, 6, 6])
global_avg_pool output shape: torch.Size([1, 320, 1, 1])
fc output shape: torch.Size([1, 10])

训练模型

def evaluate_accuracy(data_iter, net, device=torch.device('cpu')):
    """Evaluate accuracy of a model on the given data set."""
    acc_sum, n = torch.tensor([0], dtype=torch.float32, device=device), 0
    for X, y in data_iter:
        # If device is the GPU, copy the data to the GPU.
        X, y = X.to(device), y.to(device)
        net.eval()
        with torch.no_grad():
            y = y.long()
            # [[0.2 ,0.4 ,0.5 ,0.6 ,0.8] ,[ 0.1,0.2 ,0.4 ,0.3 ,0.1]] => [ 4 , 2 ]
            acc_sum += torch.sum((torch.argmax(net(X), dim=1) == y))
            n += y.shape[0]
    return acc_sum.item() / n

def train_ch(net, train_iter, test_iter, criterion, num_epochs, device, lr=None):
    """Train and evaluate a model with CPU or GPU."""
    print('training on', device)
    net.to(device)
    optimizer = optim.SGD(net.parameters(), lr=lr, momentum=0.9, weight_decay=5e-4)
    best_test_acc = 0
    for epoch in range(num_epochs):
        train_l_sum = torch.tensor([0.0], dtype=torch.float32, device=device)
        train_acc_sum = torch.tensor([0.0], dtype=torch.float32, device=device)
        n, start = 0, time.time()
        for X, y in train_iter:
            net.train()

            optimizer.zero_grad()
            X, y = X.to(device), y.to(device)
            y_hat = net(X)
            loss = criterion(y_hat, y)
            loss.backward()
            optimizer.step()

            with torch.no_grad():
                y = y.long()
                train_l_sum += loss.float()
                train_acc_sum += (torch.sum((torch.argmax(y_hat, dim=1) == y))).float()
                n += y.shape[0]
        test_acc = evaluate_accuracy(test_iter, net, device)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, '
              'time %.1f sec'
              % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc,
                 time.time() - start))
        if test_acc > best_test_acc:
            print('find best! save at model/best.pth')
            best_test_acc = test_acc
            torch.save(net.state_dict(), 'model/best.pth')

# 超参数设置
lr, num_epochs = 0.001, 7
criterion = nn.CrossEntropyLoss()   #交叉熵描述了两个概率分布之间的距离，交叉熵越小说明两者之间越接近
train_ch(net, train_iter, test_iter, criterion, num_epochs, device, lr)

training on cuda
epoch 1, loss 0.0105, train acc 0.881, test acc 0.868, time 99.9 sec
find best! save at model/best.pth
epoch 2, loss 0.0086, train acc 0.901, test acc 0.884, time 100.1 sec
find best! save at model/best.pth
epoch 3, loss 0.0076, train acc 0.912, test acc 0.907, time 98.1 sec
find best! save at model/best.pth
epoch 4, loss 0.0068, train acc 0.921, test acc 0.908, time 100.4 sec
find best! save at model/best.pth
epoch 5, loss 0.0061, train acc 0.929, test acc 0.915, time 99.4 sec
find best! save at model/best.pth
epoch 6, loss 0.0056, train acc 0.935, test acc 0.907, time 98.6 sec
epoch 7, loss 0.0050, train acc 0.943, test acc 0.916, time 99.6 sec
find best! save at model/best.pth