LeNet
LeNet分为卷积层块和全连接层块两个部分。基本单位是卷积层后接max_pool:卷积层用于识别图像里的空间模式,max_pool用于降低卷积层对位置的敏感性。
代码
import d2lzh as d2l
import mxnet as mx
from mxnet import autograd, gluon, init, nd
from mxnet.gluon import loss as gloss, nn
import time
net = nn.Sequential()
net.add(nn.Conv2D(channels=6, kernel_size=5, activation='sigmoid'),
nn.MaxPool2D(pool_size=2, strides=2),
nn.Conv2D(channels=16, kernel_size=5, activation='sigmoid'),
nn.MaxPool2D(pool_size=2, strides=2),
# Dense会默认将(批量大小, 通道, 高, 宽)形状的输入转换成
# (批量大小, 通道 * 高 * 宽)形状的输入
nn.Dense(120, activation='sigmoid'),
nn.Dense(84, activation='sigmoid'),
nn.Dense(10))
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size=batch_size)
def try_gpu(): # 用于尝试是否可以在GPU上面运行
try:
ctx = mx.gpu()
_ = nd.zeros((1,), ctx=ctx)
except mx.base.MXNetError:
ctx = mx.cpu()
return ctx
def evaluate_accuracy(data_iter, net, ctx):
acc_sum, n = nd.array([0], ctx=ctx), 0
for X, y in data_iter:
# 如果ctx代表GPU及相应的显存,将数据复制到显存上
X, y = X.as_in_context(ctx), y.as_in_context(ctx).astype('float32')
acc_sum += (net(X).argmax(axis=1) == y).sum()
n += y.size
return acc_sum.asscalar() / n
def train(net, train_iter, test_iter, batch_size, trainer, ctx, num_epochs): # 训练函数,与之前写的一样
print('training on', ctx)
loss = gloss.SoftmaxCrossEntropyLoss()
for epoch in range(num_epochs):
train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time()
for X, y in train_iter:
X, y = X.as_in_context(ctx), y.as_in_context(ctx)
with autograd.record():
y_hat = net(X)
l = loss(y_hat, y).sum()
l.backward()
trainer.step(batch_size)
y = y.astype('float32')
train_l_sum += l.asscalar()
train_acc_sum += (y_hat.argmax(axis=1) == y).sum().asscalar()
n += y.size
test_acc = evaluate_accuracy(test_iter, net, ctx)
print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, '
'time %.1f sec'
% (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc,
time.time() - start))
ctx = try_gpu()
lr, num_epochs = 0.9, 5
net.initialize(force_reinit=True, ctx=ctx, init=init.Xavier()) # 用Xaiver函数进行初始化
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr})
train(net, train_iter, test_iter, batch_size, trainer, ctx, num_epochs)
结果
training on cpu(0)
epoch 1, loss 2.3172, train acc 0.109, test acc 0.110, time 24.1 sec
epoch 2, loss 1.3792, train acc 0.466, test acc 0.557, time 22.6 sec
epoch 3, loss 0.8552, train acc 0.666, test acc 0.700, time 24.5 sec
epoch 4, loss 0.7144, train acc 0.719, test acc 0.742, time 23.9 sec
epoch 5, loss 0.6420, train acc 0.747, test acc 0.767, time 25.0 sec