torch学习 (二十三):卷积神经网络之NIN模型

引入

  NIN意为网络中的网络,提出了串联多个由卷积层和“全连接”层构成的小网络,以此构建一个深层网络 [ 1 ] \color{red}^{[1]} [1]

1 NIN块

  NIN使用 1 × 1 1 \times 1 1×1卷积层来替代全连接层,从而使空间信息能够自然传递到后面的层中去。
  下图对比了NIN同AlexNetVGG网络在结构上的主要区别:

  NIN块是NIN模型中的基础块,其特点如下:
  1)由一个卷积层加两个充当全连接层的 1 × 1 1 \times 1 1×1卷积层串联而成;
  2)第一个卷积层的超参数可以自行设置,余下一般固定。

"""
@author: Inki
@contact: inki.yinji@qq.com
@version: Created in 2020 1221, last modified in 2020 1221.
"""

import time
import torch
import torch.nn as nn
from torch import optim
from torch.nn import functional
from util.SimpleTool import load_data_fashion_mnist, train, FlattenLayer


def nin_block(in_channels, out_channels, kernel_size, stride, padding):
    ret_block = nn.Sequential(nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding),
                              nn.ReLU(),
                              nn.Conv2d(out_channels, out_channels, kernel_size=1),
                              nn.ReLU(),
                              nn.Conv2d(out_channels, out_channels, kernel_size=1),
                              nn.ReLU())

    return ret_block

2 NIN模型

  模型特点:
  1)卷积窗口形状分别为 11 × 11 11 \times 11 11×11 5 × 5 5 \times 5 5×5 3 × 3 3 \times 3 3×3,输出通道与AlexNet一致;
  2)每个NIN块后接一个步幅为 3 3 3、窗口形状为 3 × 3 3 \times 3 3×3的最大池化层;
  3)去掉了AlexNet中最后的 3 3 3个全连接层,而用输出通道数等于标签类别数的NIN块,然后使用全局平均池化层对每个通道中所有元素求平均并直接用于分类;
  4)全局平均池化层即窗口形状等于输入空间形状的平均池化层:可显著减小模型参数尺寸,从而缓解过拟合
  5)该设计可能导致训练时间增加。

class GlobalAvgPool2d(nn.Module):

    def __init__(self):
        super(GlobalAvgPool2d, self).__init__()

    def forward(self, x):
        """
        The forward function.
        """
        return functional.avg_pool2d(x, kernel_size=x.size()[2:])


def get_net():
    ret_net = nn.Sequential(nin_block(1, 96, kernel_size=11, stride=4, padding=0),
                            nn.MaxPool2d(kernel_size=3, stride=2),
                            nin_block(96, 256, kernel_size=5, stride=1, padding=2),
                            nn.MaxPool2d(kernel_size=3, stride=2),
                            nin_block(256, 384, kernel_size=3, stride=1, padding=1),
                            nn.MaxPool2d(kernel_size=3, stride=2),
                            nn.Dropout(0.5),
                            nin_block(384, 10, kernel_size=3, stride=1, padding=1),
                            GlobalAvgPool2d(),
                            FlattenLayer())

    return ret_net


def test1():
    x = torch.rand(1, 1, 224, 224)
    temp_net = get_net()
    for name, block in temp_net.named_children():
        x = block(x)
        print(name, 'output shape:', x.shape)


if __name__ == '__main__':
    test1()

  输出如下:

0 output shape: torch.Size([1, 96, 54, 54])
1 output shape: torch.Size([1, 96, 26, 26])
2 output shape: torch.Size([1, 256, 26, 26])
3 output shape: torch.Size([1, 256, 12, 12])
4 output shape: torch.Size([1, 384, 12, 12])
5 output shape: torch.Size([1, 384, 5, 5])
6 output shape: torch.Size([1, 384, 5, 5])
7 output shape: torch.Size([1, 10, 5, 5])
8 output shape: torch.Size([1, 10, 1, 1])
9 output shape: torch.Size([1, 10])

3 模型训练

def test2():
    temp_batch_size = 128
    temp_resize = 224
    temp_le = 0.002
    temp_num_epochs = 5
    temp_net = get_net()
    temp_tr_iter, temp_te_iter = load_data_fashion_mnist(temp_batch_size, resize=temp_resize)
    temp_optimizer = optim.Adam(temp_net.parameters(), lr=temp_le)
    train(temp_net, temp_tr_iter, temp_te_iter, temp_batch_size, temp_optimizer, num_epochs=temp_num_epochs)


if __name__ == '__main__':
    test2()

完整代码

"""
@author: Inki
@contact: inki.yinji@qq.com
@version: Created in 2020 1221, last modified in 2020 1221.
"""

import time
import torch
import torch.nn as nn
from torch import optim
from torch.nn import functional
from util.SimpleTool import load_data_fashion_mnist, train, FlattenLayer


def nin_block(in_channels, out_channels, kernel_size, stride, padding):
    ret_block = nn.Sequential(nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding),
                              nn.ReLU(),
                              nn.Conv2d(out_channels, out_channels, kernel_size=1),
                              nn.ReLU(),
                              nn.Conv2d(out_channels, out_channels, kernel_size=1),
                              nn.ReLU())

    return ret_block


class GlobalAvgPool2d(nn.Module):

    def __init__(self):
        super(GlobalAvgPool2d, self).__init__()

    def forward(self, x):
        """
        The forward function.
        """
        return functional.avg_pool2d(x, kernel_size=x.size()[2:])


def get_net():
    ret_net = nn.Sequential(nin_block(1, 96, kernel_size=11, stride=4, padding=0),
                            nn.MaxPool2d(kernel_size=3, stride=2),
                            nin_block(96, 256, kernel_size=5, stride=1, padding=2),
                            nn.MaxPool2d(kernel_size=3, stride=2),
                            nin_block(256, 384, kernel_size=3, stride=1, padding=1),
                            nn.MaxPool2d(kernel_size=3, stride=2),
                            nn.Dropout(0.5),
                            nin_block(384, 10, kernel_size=3, stride=1, padding=1),
                            GlobalAvgPool2d(),
                            FlattenLayer())

    return ret_net


def test1():
    x = torch.rand(1, 1, 224, 224)
    temp_net = get_net()
    for name, block in temp_net.named_children():
        x = block(x)
        print(name, 'output shape:', x.shape)


def test2():
    temp_batch_size = 128
    temp_resize = 224
    temp_le = 0.002
    temp_num_epochs = 5
    temp_net = get_net()
    temp_tr_iter, temp_te_iter = load_data_fashion_mnist(temp_batch_size, resize=temp_resize)
    temp_optimizer = optim.Adam(temp_net.parameters(), lr=temp_le)
    train(temp_net, temp_tr_iter, temp_te_iter, temp_batch_size, temp_optimizer, num_epochs=temp_num_epochs)


if __name__ == '__main__':
    test2()

参考库

util.SimpleTool

"""
@author: Inki
@contact: inki.yinji@qq.com
@version: Created in 2020 0903, last modified in 2020 1221.
@note: Some common function, and all given vector data's type must be numpy.array.
"""

import time
import numpy as np
import sys
import scipy.io as scio
import torch
import torchvision.transforms as transforms
import torchvision
from torch import nn
from multiprocessing import cpu_count


def get_iter(tr, tr_lab, te, te_lab):
    """
    Get iterator.
    :param
        tr:
            The training set.
        tr_lab:
            The training set's label.
        te:
            The test set.
        te_lab:
            The test set's label.
    """
    yield tr, tr_lab, te, te_lab


def is_print(para_str, para_is_print=True):
    """
    Is print?
    :param
        para_str:
            The print string.
        para_is_print:
            True print else not.
    """
    if para_is_print:
        print(para_str)


def load_file(para_path):
    """
    Load file.
    :param
        para_file_name:
            The path of the given file.
    :return
        The data.
    """
    temp_type = para_path.split('.')[-1]

    if temp_type == 'mat':
        ret_data = scio.loadmat(para_path)
        return ret_data['data']
    else:
        with open(para_path) as temp_fd:
            ret_data = temp_fd.readlines()

        return ret_data


def load_data_fashion_mnist(batch_size=10, root='D:/Data/Datasets/FashionMNIST', resize=None):
    """
    Download the fashion mnist dataset and then load into memory.
    """
    trans = []
    if resize:
        trans.append(transforms.Resize(size=resize))
    trans.append(transforms.ToTensor())

    transform = transforms.Compose(trans)
    mnist_train = torchvision.datasets.FashionMNIST(root=root, train=True, download=True, transform=transform)
    mnist_test = torchvision.datasets.FashionMNIST(root=root, train=False, download=True, transform=transform)
    if sys.platform.startswith('win'):
        num_workers = 0
    else:
        num_workers = cpu_count()
    train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=num_workers)
    test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=num_workers)

    return train_iter, test_iter


def owa_weight(para_num, para_type='linear_decrease'):
    """
    The ordered weighted averaging operators (OWA) can replace the maximum or minimum operators.
    And the purpose of this function is to generate the owa weights.
    And the more refer is:
    R. R. Yager, J. Kacprzyk, The ordered weighted averaging operators: Theory and applications, Springer Science &
    Business Media, 2012.
    :param
        para_num:
            The length of weights list.
        para_type:
            'linear_decrease';
            'inverse_additive',
            and its default setting is 'linear_decrease'.
    :return
        The owa weights.
    """
    if para_num == 1:
        return np.array([1])
    else:
        if para_type == 'linear_decrease':
            temp_num = 2 / para_num / (para_num + 1)
            return np.array([(para_num - i) * temp_num for i in range(para_num)])
        elif para_type == 'inverse_additive':
            temp_num = np.sum([1 / i for i in range(1, para_num + 1)])
            return np.array([1 / i / temp_num for i in range(1, para_num + 1)])
        else:
            return owa_weight(para_num)


def print_go_round(para_idx, para_str='Program processing'):
    """
    Print the round.
    :param
        para_idx:
            The current index.
        para_str:
            The print words.
    """
    round_list = ["\\", "|", "/", "-"]
    print('\r' + para_str + ': ' + round_list[para_idx % 4], end="")


def print_progress_bar(para_idx, para_len):
    """
    Print the progress bar.
    :param
        para_idx:
            The current index.
        para_len:
            The loop length.
    """
    print('\r' + '▇' * int(para_idx // (para_len / 50)) + str(np.ceil((para_idx + 1) * 100 / para_len)) + '%', end='')


def train(net, tr_iter, te_iter, batch_size, optimizer,
          loss=nn.CrossEntropyLoss(),
          device=torch.device('cuda' if torch.cuda.is_available() else 'cpu'),
          num_epochs=100):
    """
    The train function.
    """
    net = net.to(device)
    temp_batch_count = 0
    print("Training on", device)
    for epoch in range(num_epochs):
        temp_tr_loss_sum, temp_tr_acc_sum, temp_num, temp_start_time = 0., 0., 0, time.time()
        for x, y in tr_iter:
            x = x.to(device)
            y = y.to(device)
            temp_y_pred = net(x)
            temp_loss = loss(temp_y_pred, y)
            optimizer.zero_grad()
            temp_loss.backward()
            optimizer.step()
            temp_tr_loss_sum += temp_loss.cpu().item()
            temp_tr_acc_sum += (temp_y_pred.argmax(dim=1) == y).sum().cpu().item()
            temp_num += y.shape[0]
            temp_batch_count += 1
        test_acc = evaluate_accuracy(te_iter, net)
        print("Epoch %d, loss %.4f, training acc %.3f, test ass %.3f, time %.1f s" %
              (epoch + 1, temp_tr_loss_sum / temp_batch_count, temp_tr_acc_sum / temp_num, test_acc,
               time.time() - temp_start_time))


def evaluate_accuracy(data_iter, net, device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')):
    """
    The evaluate function, and the performance measure is accuracy.
    """
    ret_acc, temp_num = 0., 0
    with torch.no_grad():
        for x, y in data_iter:
            net.eval() # The evaluate mode, and the dropout is closed.
            ret_acc += (net(x.to(device)).argmax(dim=1) == y.to(device)).float().sum().cpu().item()
            net.train()
            temp_num += y.shape[0]

    return ret_acc / temp_num


class Count(dict):
    """
    The count class with dict.
    """
    def __missing__(self, __key):
        return 0


class FlattenLayer(torch.nn.Module):
    def __init__(self):
        super(FlattenLayer, self).__init__()

    def forward(self, x):
        return x.view(x.shape[0], -1)


if __name__ == '__main__':
    load_data_fashion_mnist()


【1】李沐、Aston Zhang等老师的这本《动手学深度学习》一书。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值