01-LeNet5模型

fancy6add+1

已于 2023-04-25 13:31:48 修改

阅读量200

点赞数

分类专栏：论文复现之图像分类文章标签：深度学习人工智能

于 2023-04-24 16:29:48 首次发布

本文链接：https://blog.csdn.net/m0_47023255/article/details/130346100

版权

论文复现之图像分类专栏收录该内容

4 篇文章 0 订阅

订阅专栏

文章介绍了LeNet这一经典的卷积神经网络模型，它最初应用于手写数字识别，现在使用PyTorch实现了针对MNIST和CIFAR10数据集的版本。训练代码展示了如何训练LeNet模型，并在CIFAR10上达到约60%的准确率，而MNIST的准确率超过90%。训练过程中，注意到在CIFAR10模型中使用了批量归一化（BN）层以提高性能。

摘要由CSDN通过智能技术生成

1.概述

LeNet在1998年发表，作为CNN的典型代表作，首先应用于手写数字识别任务中，并取得了很好的效果。

论文地址：http://yann.lecun.com/exdb/lenet/index.htmlhttp://yann.lecun.com/exdb/lenet/index.html

网络模型结构如下，由两个卷积层和三个全连接层组成。

2. pytorch实现LeNet5模型

2.1 模型文件 model.py

import torch.nn as nn
import torch.nn.functional as F


class LeNet_MNIST(nn.Module):
    def __init__(self):
        super(LeNet_MNIST, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=5)
        self.pool1 = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5)
        self.pool2 = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(32 * 4 * 4, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool1(x)
        x = F.relu(self.conv2(x))
        x = self.pool2(x)
        x = x.view(-1, 32 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)

        return x


class LeNet_CIFAR(nn.Module):
    def __init__(self):
        super(LeNet_CIFAR, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=5)
        self.pool1 = nn.MaxPool2d(2, 2)

        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5)
        self.pool2 = nn.MaxPool2d(2, 2)

        self.fc1 = nn.Linear(32 * 5 * 5, 120)
        self.bn1 = nn.BatchNorm1d(120)

        self.fc2 = nn.Linear(120, 84)
        self.bn2 = nn.BatchNorm1d(84)

        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool1(x)

        x = F.relu(self.conv2(x))
        x = self.pool2(x)

        x = x.view(-1, 32 * 5 * 5)
        x = F.relu(self.bn1(self.fc1(x)))

        x = F.relu(self.bn2(self.fc2(x)))

        x = self.fc3(x)

        return x

其中分别实现了MNIST和CIFFAR10数据集的识别分类，由于MNIST是单通道的，CIFAR10是三通道的，故模型结构稍有不同。

2.2 模型训练代码 train.py

import sys
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from utils import train, test, show_result
from model import LeNet_CIFAR

sys.path.append('./../')


def main():
    CIFAR_transform = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))]
    )

    MNIST_transform = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize(mean=0.5, std=0.5)]
    )

    train_set = torchvision.datasets.CIFAR10(root='../data_set', train=True, download=True, transform=CIFAR_transform)
    train_loader = torch.utils.data.DataLoader(train_set, batch_size=128, shuffle=True, num_workers=0)

    test_set = torchvision.datasets.CIFAR10(root='../data_set', train=False, download=True, transform=CIFAR_transform)
    test_loader = torch.utils.data.DataLoader(test_set, batch_size=128, shuffle=False, num_workers=0)

    test_data_iter = iter(test_loader)
    test_img, test_label = test_data_iter.next()
    print(test_img.shape, test_label.shape)

    net = LeNet_CIFAR().cuda()
    loss_func = nn.CrossEntropyLoss().cuda()
    optimizer = optim.Adam(net.parameters(), lr=0.0001)

    loss_es = []
    acc_es = []
    best_acc = 0

    for epoch in range(50):
        loss = train(net, loss_func, optimizer, train_loader, epoch)
        acc = test(net=net, test_loader=test_loader)
        loss_es.append(loss)
        acc_es.append(acc)

        if acc > best_acc:
            print('***************** save *********************')
            best_acc = acc
            torch.save(net, './LeNet-CIFAR.pt')

    show_result(loss_es, acc_es, './LeNet5-CIFAI10.jpg')


if __name__ == '__main__':
    main()

其中用到的工具文件 utils.py 如下，里面包含train和test函数以及绘制模型训练的损失曲线和准确率曲线，并保存训练结果图片到指定路径：

import torch
from matplotlib import pyplot as plt
import numpy as np


def train(net, loss_func, optimizer, train_loader, epoch):
    """
    :param net:
    :param loss_func:
    :param optimizer:
    :param train_loader:
    :param epoch:
    :return:
    """
    running_loss = 0
    total = 0
    for step, (inputs, labels) in enumerate(train_loader):
        inputs = inputs.cuda()
        labels = labels.cuda()
        out = net(inputs)
        loss = loss_func(out, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        total += inputs.size(0)
    running_loss /= total

    print('Epoch: {}\tloss: {:.6f}\t'.format(epoch + 1, running_loss), end='')

    return running_loss


def test(net, test_loader):
    """
    :param net: if it is str, laod the pt,  else is a model
    :param test_loader:
    :return:
    """
    if isinstance(net, str):
        model = torch.load(net)
    else:
        model = net
    with torch.no_grad():
        correct = 0
        total = 0
        for _, (inputs, labels) in enumerate(test_loader):
            inputs = inputs.cuda()
            labels = labels.cuda()
            out = model(inputs)
            predict_y = torch.max(out, dim=1)[1]  # get max_value index
            correct += torch.eq(predict_y, labels).sum().item()
            total += inputs.size(0)
        accuracy = correct / total
        print('Accuracy: {:.3f}%.\t'.format(accuracy * 100))

    return accuracy


def show_result(loss_es, acc_es, fig_save_name):
    x = np.arange(len(loss_es))

    plt.figure()
    fig, ax1 = plt.subplots()

    color = 'tab:red'
    ax1.set_xlabel('epoch')
    ax1.set_ylabel('loss', color=color)
    ax1.plot(x, loss_es, label='loss', color=color)
    ax1.tick_params(axis='y', labelcolor=color)
    plt.legend()

    ax2 = ax1.twinx()  # 创建共用x轴的第二个y轴
    color = 'tab:green'
    ax2.set_ylabel('acc', color=color)
    ax2.plot(x, acc_es, label='acc', color=color)
    ax2.tick_params(axis='y', labelcolor=color)

    fig.tight_layout()
    plt.legend()
    plt.savefig(fig_save_name, dpi=300)