文章目录
MxNet封装实现经典深度网络框架 LeNet,Alex-Net,Google-Net,ResNet
理论部分可参考:
Keras封装实现经典深度网络框架 VGG-16, ZF-Net,Alex-Net,LeNet,Google-Net,ResNet, DenseNet-50
LeNet
import d2l
from mxnet import autograd, gluon, init, np, npx
from mxnet.gluon import nn
npx.set_np()
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size=256)
## 定义模型
net = nn.Sequential()
net.add(nn.Conv2D(channels=6, kernel_size=5, padding=2, activation='sigmoid'),
nn.AvgPool2D(pool_size=2, strides=2),
nn.Conv2D(channels=16, kernel_size=5, activation='sigmoid'),
nn.AvgPool2D(pool_size=2, strides=2),
nn.Dense(120, activation='sigmoid'),
nn.Dense(84, activation='sigmoid'),
nn.Dense(10))
打印每一层的输出形状
X = np.random.uniform(size=(1, 1, 28, 28))
net.initialize()
for layer in net:
X = layer(X)
print(layer.name, 'output shape:\t', X.shape)
conv0 output shape: (1, 6, 28, 28)
pool0 output shape: (1, 6, 14, 14)
conv1 output shape: (1, 16, 10, 10)
pool1 output shape: (1, 16, 5, 5)
dense0 output shape: (1, 120)
dense1 output shape: (1, 84)
dense2 output shape: (1, 10)
在GPU上计算精度
def evaluate_accuracy_gpu(net, data_iter, ctx=None):
if not ctx: # 查看第一个权重的第一个设备。
ctx = list(net.collect_params().values())[0].list_ctx()[0]
metric = d2l.Accumulator(2) # 正确的样本数,总样本数。
for X, y in data_iter:
X, y = X.as_in_context(ctx), y.as_in_context(ctx)
metric.add(d2l.accuracy(net(X), y), y.size)
return metric[0]/metric[1]
在GPU上训练
def train_ch5(net, train_iter, test_iter, num_epochs, lr, ctx=d2l.try_gpu()):
net.initialize(force_reinit=True, ctx=ctx, init=init.Xavier())
loss = gluon.loss.SoftmaxCrossEntropyLoss()
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr})
animator = d2l.Animator(xlabel='epoch', xlim=[0,num_epochs],
legend=['train loss','train acc','test acc'])
timer = d2l.Timer()
for epoch in range(num_epochs):
metric = d2l.Accumulator(3) # 训练损失和,训练精度和,样本数。
for i, (X, y) in enumerate(train_iter):
timer.start()
# 这是跟 train_epoch_ch3 的唯一区别。
X, y = X.as_in_context(ctx), y.as_in_context(ctx)
with autograd.record():
y_hat = net(X)
l = loss(y_hat, y)
l.backward()
trainer.step(X.shape[0])
metric.add(l.sum(), d2l.accuracy(y_hat, y), X.shape[0])
timer.stop()
train_loss, train_acc = metric[0]/metric[2], metric[1]/metric[2]
if (i+1) % 50 == 0:
animator.add(epoch + i/len(train_iter),
(train_loss, train_acc, None))
test_acc = evaluate_accuracy_gpu(net, test_iter)
animator.add(epoch+1, (None, None, test_acc))
print('loss %.3f, train acc %.3f, test acc %.3f' % (
train_loss, train_acc, test_acc))
print('%.1f exampes/sec on %s'%(metric[2]*num_epochs/timer.sum(), ctx))
训练模型
train_ch5(net, train_iter, test_iter, num_epochs=10, lr=0.9)
loss 0.472, train acc 0.822, test acc 0.829
60080.8 exampes/sec on gpu(0)
AlexNet(2012)
import d2l
from mxnet import gluon, np, npx
from mxnet.gluon import nn
npx.set_np()
train_iter, test_iter = d2l.load_data_fashion_mnist(
batch_size=128, resize=224)
定义模型
net = nn.Sequential()
net.add(# 使用较大的11 x 11窗口来捕获大物体。
nn.Conv2D(96, kernel_size=11, strides=4, activation='relu'),
nn.MaxPool2D(pool_size=3, strides=2),
# 减小卷积窗口,使用填充为2来使得输入与输出的高和宽一致,且增大输出通道数
nn.Conv2D(256, kernel_size=5, padding=2, activation='relu'),
nn.MaxPool2D(pool_size=3, strides=2),
# 连续3个卷积层,且使用更小的卷积窗口。除了最后的卷积层外,进一步增大了输出通道数。
# 前两个卷积层后不使用池化层来减小输入的高和宽
nn.Conv2D(384, kernel_size=3, padding=1, activation='relu'),
nn.Conv2D(384, kernel_size=3, padding=1, activation='relu'),
nn.Conv2D(256, kernel_size=3, padding=1, activation='relu'),
nn.MaxPool2D(pool_size=3, strides=2),
# 这里全连接层的输出个数比LeNet中的大数倍。使用丢弃层来缓解过拟合
nn.Dense(4096, activation="relu"), nn.Dropout(0.5),
nn.Dense(4096, activation="relu"), nn.Dropout(0.5),
nn.Dense(10))
查看每一层的输出形状
X = np.random.uniform(size=(1, 1, 224, 224))
net.initialize()
for layer in net:
X = layer(X)
print(layer.name, 'output shape:\t', X.shape)
conv0 output shape: (1, 96, 54, 54)
pool0 output shape: (1, 96, 26, 26)
conv1 output shape: (1, 256, 26, 26)
pool1 output shape: (1, 256, 12, 12)
conv2 output shape: (1, 384, 12, 12)
conv3 output shape: (1, 384, 12, 12)
conv4 output shape: (1, 256, 12, 12)
pool2 output shape: (1, 256, 5, 5)
dense0 output shape: (1, 4096)
dropout0 output shape: (1, 4096)
dense1 output shape: (1, 4096)
dropout1 output shape: (1, 4096)
dense2 output shape: (1, 10)
训练
d2l.train_ch5(net, train_iter, test_iter, num_epochs=10, lr=0.01)
loss 0.335, train acc 0.878, test acc 0.889
4070.5 exampes/sec on gpu(0)
VGG
import d2l
from mxnet import gluon, np, npx
from mxnet.gluon import nn
npx.set_np()
train_iter, test_iter = d2l.load_data_fashion_mnist(
batch_size=128, resize=224)
VGG块
def vgg_block(num_convs, num_channels):
blk = nn.Sequential()
for _ in range(num_convs):
blk.add(nn.Conv2D(num_channels, kernel_size=3,
padding=1, activation='relu'))
blk.add(nn.MaxPool2D(pool_size=2, strides=2))
return blk
定义模型
onv_arch = ((1, 64), (1, 128), (2, 256), (2, 512), (2, 512))
def vgg(conv_arch):
net = nn.Sequential()
# 卷积层部分。
for (num_convs, num_channels) in conv_arch:
net.add(vgg_block(num_convs, num_channels))
# 全连接层部分。
net.add(nn.Dense(4096, activation='relu'), nn.Dropout(0.5),
nn.Dense(4096, activation='relu'), nn.Dropout(0.5),
nn.Dense(10))
return net
net = vgg(conv_arch)
使用一个窄版本来训练
ratio = 4
small_conv_arch = [(pair[0], pair[1] // ratio) for pair in conv_arch]
net = vgg(small_conv_arch)
d2l.train_ch5(net, train_iter, test_iter, num_epochs=10, lr=0.05)
loss 0.174, train acc 0.936, test acc 0.916
1839.6 exampes/sec on gpu(0)
GoogLeNet
import d2l
from mxnet import gluon, np, npx
from mxnet.gluon import nn
npx.set_np()
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size=128, resize=96)
Inception块
class Inception(nn.Block):
# c1 - c4 为每条线路里的层的输出通道数
def __init__(self, c1, c2, c3, c4, **kwargs):
super(Inception, self).__init__(**kwargs)
# 线路1,单1 x 1卷积层
self.p1_1 = nn.Conv2D(c1, kernel_size=1, activation='relu')
# 线路2,1 x 1卷积层后接3 x 3卷积层
self.p2_1 = nn.Conv2D(c2[0], kernel_size=1, activation='relu')
self.p2_2 = nn.Conv2D(c2[1], kernel_size=3, padding=1, activation='relu')
# 线路3,1 x 1卷积层后接5 x 5卷积层
self.p3_1 = nn.Conv2D(c3[0], kernel_size=1, activation='relu')
self.p3_2 = nn.Conv2D(c3[1], kernel_size=5, padding=2, activation='relu')
# 线路4,3 x 3最大池化层后接1 x 1卷积层
self.p4_1 = nn.MaxPool2D(pool_size=3, strides=1, padding=1)
self.p4_2 = nn.Conv2D(c4, kernel_size=1, activation='relu')
def forward(self, x):
p1 = self.p1_1(x)
p2 = self.p2_2(self.p2_1(x))
p3 = self.p3_2(self.p3_1(x))
p4 = self.p4_2(self.p4_1(x))
# 在通道维上连结输出
return np.concatenate((p1, p2, p3, p4), axis=1)
Inception模型,第一阶段
b1 = nn.Sequential()
b1.add(nn.Conv2D(64, kernel_size=7, strides=2, padding=3, activation='relu'),
nn.MaxPool2D(pool_size=3, strides=2, padding=1))
Inception模型,第二阶段
b2 = nn.Sequential()
b2.add(nn.Conv2D(64, kernel_size=1),
nn.Conv2D(192, kernel_size=3, padding=1),
nn.MaxPool2D(pool_size=3, strides=2, padding=1))
Inception模型,第三阶段
b3 = nn.Sequential()
b3.add(Inception(64, (96, 128), (16, 32), 32),
Inception(128, (128, 192), (32, 96), 64),
nn.MaxPool2D(pool_size=3, strides=2, padding=1))
Inception模型,第四阶段
b4 = nn.Sequential()
b4.add(Inception(192, (96, 208), (16, 48), 64),
Inception(160, (112, 224), (24, 64), 64),
Inception(128, (128, 256), (24, 64), 64),
Inception(112, (144, 288), (32, 64), 64),
Inception(256, (160, 320), (32, 128), 128),
nn.MaxPool2D(pool_size=3, strides=2, padding=1))
Inception 模型 - 第五阶段
b5 = nn.Sequential()
b5.add(Inception(256, (160, 320), (32, 128), 128),
Inception(384, (192, 384), (48, 128), 128),
nn.GlobalAvgPool2D())
net = nn.Sequential()
net.add(b1, b2, b3, b4, b5, nn.Dense(10))
查看网络
X = np.random.uniform(size=(1, 1, 96, 96))
net.initialize()
for layer in net:
X = layer(X)
print(layer.name, 'output shape:\t', X.shape)
sequential0 output shape: (1, 64, 24, 24)
sequential1 output shape: (1, 192, 12, 12)
sequential2 output shape: (1, 480, 6, 6)
sequential3 output shape: (1, 832, 3, 3)
sequential4 output shape: (1, 1024, 1, 1)
dense0 output shape: (1, 10)
训练
d2l.train_ch5(net, train_iter, test_iter, num_epochs=5, lr=0.1)
loss 0.336, train acc 0.873, test acc 0.879
2732.2 exampes/sec on gpu(0)
ResNet-18
import d2l
from mxnet import gluon, np, npx
from mxnet.gluon import nn
npx.set_np()
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size=256, resize=96)
残差块
class Residual(nn.Block):
def __init__(self, num_channels, use_1x1conv=False, strides=1, **kwargs):
super(Residual, self).__init__(**kwargs)
self.conv1 = nn.Conv2D(num_channels, kernel_size=3, padding=1, strides=strides)
self.conv2 = nn.Conv2D(num_channels, kernel_size=3, padding=1)
self.conv3 = None
if use_1x1conv:
self.conv3 = nn.Conv2D(num_channels, kernel_size=1, strides=strides)
self.bn1 = nn.BatchNorm()
self.bn2 = nn.BatchNorm()
def forward(self, X):
Y = npx.relu(self.bn1(self.conv1(X)))
Y = self.bn2(self.conv2(Y))
if self.conv3:
X = self.conv3(X)
return npx.relu(Y + X)
当输入和输出形状一样时
blk = Residual(3)
blk.initialize()
X = np.random.uniform(size=(4, 3, 6, 6))
blk(X).shape
减半输出高和宽,并增加输出通道。
blk = Residual(6, use_1x1conv=True, strides=2)
blk.initialize()
resnet块
def resnet_block(num_channels, num_residuals, first_block=False):
blk = nn.Sequential()
for i in range(num_residuals):
if i == 0 and not first_block:
blk.add(Residual(num_channels, use_1x1conv=True, strides=2))
else:
blk.add(Residual(num_channels))
return blk
整个模块
net = nn.Sequential()
net.add(nn.Conv2D(64, kernel_size=7, strides=2, padding=3),
nn.BatchNorm(), nn.Activation('relu'),
nn.MaxPool2D(pool_size=3, strides=2, padding=1),
resnet_block(64, 2, first_block=True),
resnet_block(128, 2),
resnet_block(256, 2),
resnet_block(512, 2),
nn.GlobalAvgPool2D(),
nn.Dense(10))
训练
d2l.train_ch5(net, train_iter, test_iter, num_epochs=10, lr=0.05)
loss 0.013, train acc 0.997, test acc 0.879
4900.2 exampes/sec on gpu(0)