相关理论&故事背景书上都有,不打算复述一遍,这里仅作代码记录&分享
此处非直接可用代码,由于学习过程中多次使用相同函数,故而将一些常用函数整理成了工具包,MxNet学习——自定义工具包
两者结合,方可运行代码
# -------------------------------------------------------------------------------
# Description: VGG 使用重复元素的非常深的网络
# Description: VGG 提出了重复使用简单的基础块来构建深度模型的思路
# Description: VGG 关键: 若干个卷积层(若干个相同的小kernel + 1个池化层) + 全连接层
# Description: 第一代 VGG 模型,有8个卷积层 + 3个全连接层,所以也称为 VGG-11
# Description: VGG 常用模型:VGG-16, VGG-19
# Description: LeNet、AlexNet、VGG 在设计上的共同点:
# Description: 1.先用卷积层构成的模块充分抽取空间特征 2.再以由全连接层构成的模块来输出分类结果
# Description: AlexNet、VGG 的改进主要在于如何对这两个模块加宽(增加通道数)和加深。
# Reference:
# Author: Sophia
# Date: 2021/3/11
# -------------------------------------------------------------------------------
from IPython import display
from mxnet import autograd, nd, init, gluon
from mxnet.gluon import data as gdata, loss as gloss, nn
import random, sys, time, matplotlib.pyplot as plt, mxnet as mx
from plt_so import *
'''
VGG 块: 若干个相同的小kernel + 1个池化层
'''
def vgg_block(num_convs, num_channels):
blk = nn.Sequential()
for _ in range(num_convs):
blk.add(nn.Conv2D(num_channels, kernel_size=3, padding=1, activation='relu'))
blk.add(nn.MaxPool2D(pool_size=2, strides=2))
return blk
# blk = vgg_block(2, 128)
# blk.initialize()
# X = nd.random.uniform(shape=(2, 3, 16, 16))
# y = blk(X)
# print(y.shape) # (2, 128, 8, 8)
'''
VGG 网络
'''
def vgg(conv_arch):
net = nn.Sequential()
# 卷积层部分
for (num_convs, num_channels) in conv_arch:
net.add(vgg_block(num_convs, num_channels))
# 全连接层部分
net.add(nn.Dense(4096, activation='relu'), nn.Dropout(0.5),
nn.Dense(4096, activation='relu'), nn.Dropout(0.5),
nn.Dense(10))
return net
# 空间信息(图片大小)不断变小,可以通过 通道数(语义信息)的不断增加 来缓和
conv_arch = ((1, 64), (1, 128), (2, 256), (2, 512), (2, 512))
net = vgg(conv_arch)
# print(net)
# 输出:
# Sequential(
# (0): Sequential(
# (0): Conv2D(None -> 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), Activation(relu))
# (1): MaxPool2D(size=(2, 2), stride=(2, 2), padding=(0, 0), ceil_mode=False, global_pool=False, pool_type=max, layout=NCHW)
# )
# (1): Sequential(
# (0): Conv2D(None -> 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), Activation(relu))
# (1): MaxPool2D(size=(2, 2), stride=(2, 2), padding=(0, 0), ceil_mode=False, global_pool=False, pool_type=max, layout=NCHW)
# )
# (2): Sequential(
# (0): Conv2D(None -> 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), Activation(relu))
# (1): Conv2D(None -> 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), Activation(relu))
# (2): MaxPool2D(size=(2, 2), stride=(2, 2), padding=(0, 0), ceil_mode=False, global_pool=False, pool_type=max, layout=NCHW)
# )
# (3): Sequential(
# (0): Conv2D(None -> 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), Activation(relu))
# (1): Conv2D(None -> 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), Activation(relu))
# (2): MaxPool2D(size=(2, 2), stride=(2, 2), padding=(0, 0), ceil_mode=False, global_pool=False, pool_type=max, layout=NCHW)
# )
# (4): Sequential(
# (0): Conv2D(None -> 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), Activation(relu))
# (1): Conv2D(None -> 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), Activation(relu))
# (2): MaxPool2D(size=(2, 2), stride=(2, 2), padding=(0, 0), ceil_mode=False, global_pool=False, pool_type=max, layout=NCHW)
# )
# (5): Dense(None -> 4096, Activation(relu))
# (6): Dropout(p = 0.5, axes=())
# (7): Dense(None -> 4096, Activation(relu))
# (8): Dropout(p = 0.5, axes=())
# (9): Dense(None -> 10, linear)
# )
# net.initialize()
# X = nd.random.uniform(shape=(1, 1, 224, 224))
# for blk in net:
# X = blk(X)
# print(blk.name, 'output shape:\t', X.shape)
# 输出:
# sequential1 output shape: (1, 64, 112, 112)
# sequential2 output shape: (1, 128, 56, 56)
# sequential3 output shape: (1, 256, 28, 28)
# sequential4 output shape: (1, 512, 14, 14)
# sequential5 output shape: (1, 512, 7, 7)
# dense0 output shape: (1, 4096)
# dropout0 output shape: (1, 4096)
# dense1 output shape: (1, 4096)
# dropout1 output shape: (1, 4096)
# dense2 output shape: (1, 10)
'''
训练模型
'''
# 降低通道数可以方便更快地测试
ratio = 4
small_conv_arch = [(pair[0], pair[1] // ratio) for pair in conv_arch]
net = vgg(small_conv_arch)
lr, num_epochs, batch_size, ctx = 0.05, 5, 128, try_gpu()
net.initialize(ctx=ctx, init=init.Xavier())
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr})
train_iter, test_iter = load_data_fashion_mnist_ch5(batch_size, resize=224)
train_ch5(net, train_iter, test_iter, batch_size, trainer, ctx, num_epochs)