LeNet卷积神经网络

本文主要是学习了Dive-into-DL-PyTorch这本书。因此这篇博客的大部分内容来源于此书。框架使用的是pytorch,开发工具是pycharm
参考 动手学深度学习Dive-into-DL-Pytorch
参考链接 https://github.com/ShusenTang/Dive-into-DL-PyTorch
https://github.com/zergtant/pytorch-handbook

卷积层尝试解决两个方面的问题:
一方面,卷积层保留输入形状,使图像的像素在高和宽两个方向上的相关性均可能被有效识别;另一方面,卷积层通过滑动窗口将同一卷积核与不同位置的输入重复计算,从而避免参数尺寸过大。
在这里插入图片描述
卷积层块的基本单位是卷积层后接最大池化层:卷积层用来识别图形的空间模式,如线条和物体局部,之后的最大池化层用来降低卷积层对位置的敏感性。
卷积层块的输出形状为(批量大小,通道,高,宽)。当卷积层块的输出传入全连接层块时,全连接层块会将小批量中的每个样本拍平(flatten)。也就是说,全连接层的输入形状将变成二维,其中第一维是小批量中的样本数,第二维是每个样本拍平后的向量表示,向量长度是通道、高和宽的乘积。

定义LeNet模型

import time
import torch
from torch import nn, optim
import sys
import torchvision
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

#构造模型
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        #卷积层块
        self.conv = nn.Sequential(
            nn.Conv2d(1, 6, 5), # in_channels, out_channels, kernel_size  (输出通道数=卷积核的个数)
            nn.Sigmoid(),#激活函数
            nn.MaxPool2d(2, 2), # kernel_size, stride
            nn.Conv2d(6, 16, 5),
            nn.Sigmoid(),
            nn.MaxPool2d(2, 2)
        )
        self.fc = nn.Sequential( #全连接层块
            nn.Linear(16*4*4, 120),
            nn.Sigmoid(),
            nn.Linear(120, 84),
            nn.Sigmoid(),
            nn.Linear(84, 10)
        )
    def forward(self, img):
        feature = self.conv(img)
        output = self.fc(feature.view(img.shape[0], -1))#在这里img.shape[0]的值是小批量中的样本数,feature.view(小批量的样本数,16*4*4=256)
        return output
net = LeNet() 
print(net)

模型结构:

LeNet(
  (conv): Sequential(
    (0): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
    (1): Sigmoid()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
    (4): Sigmoid()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc): Sequential(
    (0): Linear(in_features=256, out_features=120, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=120, out_features=84, bias=True)
    (3): Sigmoid()
    (4): Linear(in_features=84, out_features=10, bias=True)
  )
)

计算过程:
conv:
(0):28-5+1=24
(1):24
(2):(24-2+2)/2=12
(3):12-5+1=8
(4):8
(5):(8-2+2)/2=8
注意:不管图片输入的形状,卷积层指定的是输入通道数、输出通道数、卷积核形状。池化层指定的是核的大小、步长、padding。在卷积层块转入全连接层块时候需要通过图片输入的形状一步步确定图片最后输出的形状,从而确定linear的第一个参数,输入的特征数(第二个特征是输出的特征数),输入的特征数=通道数x高x宽

对该模型加载数据以及训练完整代码:

import time
import torch
from torch import nn, optim
import sys
import torchvision
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

#构造模型
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(1, 6, 5), # in_channels, out_channels, kernel_size  (输出通道数=卷积核的个数)
            nn.Sigmoid(),
            nn.MaxPool2d(2, 2), # kernel_size, stride
            nn.Conv2d(6, 16, 5),
            nn.Sigmoid(),
            nn.MaxPool2d(2, 2)
        )
        self.fc = nn.Sequential(
            nn.Linear(16*4*4, 120),
            nn.Sigmoid(),
            nn.Linear(120, 84),
            nn.Sigmoid(),
            nn.Linear(84, 10)
        )
    def forward(self, img):
        feature = self.conv(img)
        output = self.fc(feature.view(img.shape[0], -1))
        return output
#评估准确度
def evaluate_accuracy(data_iter, net, device=None):
    if device is None and isinstance(net, torch.nn.Module):
        # 如果没指定device就使用net的device
        device = list(net.parameters())[0].device
    acc_sum, n = 0.0, 0
    with torch.no_grad():
        for X, y in data_iter:
            if isinstance(net, torch.nn.Module):
                net.eval()  # 评估模式, 这会关闭dropout
                acc_sum += (net(X.to(device)).argmax(dim=1) == y.to(device)).float().sum().cpu().item()
                net.train()  # 改回训练模式
            else:  # 自定义的模型 ,不考虑GPU
                if ('is_training' in net.__code__.co_varnames):  # 如果有is_training这个参数
                    # 将is_training设置成False
                    acc_sum += (net(X, is_training=False).argmax(dim=1) == y).float().sum().item()
                else:
                    acc_sum += (net(X).argmax(dim=1) == y).float().sum().item()
            n += y.shape[0]
    return acc_sum / n

#训练函数
def train_ch5(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs):
    net = net.to(device)
    print("training on ", device)
    loss = torch.nn.CrossEntropyLoss()
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n, batch_count, start = 0.0, 0.0, 0, 0, time.time()
        for X, y in train_iter:
            X = X.to(device)
            y = y.to(device)
            y_hat = net(X)
            l = loss(y_hat, y)
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            train_l_sum += l.cpu().item()
            train_acc_sum += (y_hat.argmax(dim=1) == y).sum().cpu().item()
            n += y.shape[0]
            batch_count += 1
        test_acc = evaluate_accuracy(test_iter, net)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, time %.1f sec'
              % (epoch + 1, train_l_sum / batch_count, train_acc_sum / n, test_acc, time.time() - start))

def load_data_fashion_mnist(batch_size, resize=None, root='~/Datasets/FashionMNIST'):
    """Download the fashion mnist dataset and then load into memory."""
    trans = []
    if resize:
        trans.append(torchvision.transforms.Resize(size=resize))
    trans.append(torchvision.transforms.ToTensor())

    transform = torchvision.transforms.Compose(trans)
    mnist_train = torchvision.datasets.FashionMNIST(root=root, train=True, download=True, transform=transform)
    mnist_test = torchvision.datasets.FashionMNIST(root=root, train=False, download=True, transform=transform)
    if sys.platform.startswith('win'):
        num_workers = 0  # 0表示不用额外的进程来加速读取数据
    else:
        num_workers = 4
    train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=num_workers)
    test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=num_workers)

    return train_iter, test_iter

net = LeNet()
#print(net)
batch_size = 256
train_iter, test_iter = load_data_fashion_mnist(batch_size=batch_size)
lr, num_epochs = 0.001, 5
optimizer = torch.optim.Adam(net.parameters(), lr=lr)
train_ch5(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs)

结果:

training on  cpu
epoch 1, loss 1.9338, train acc 0.287, test acc 0.588, time 28.5 sec
epoch 2, loss 0.9594, train acc 0.633, test acc 0.688, time 33.2 sec
epoch 3, loss 0.7737, train acc 0.713, test acc 0.725, time 32.4 sec
epoch 4, loss 0.6868, train acc 0.738, test acc 0.742, time 32.9 sec
epoch 5, loss 0.6318, train acc 0.754, test acc 0.760, time 34.0 sec
  • 1
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值