MXNET深度学习框架-17-使用gluon实现AlexNet

      Alexnet可以说是使深度学习大火的深度模型。它在2012年被Hinton等人提出,该模型凭借一个8层卷积神经网络而赢得了ImageNet的图像识别挑战,这个模型与经典的LeNet-5有点类似。

      这个模型有一些显著的特性:
1)网络层数比LeNet-5深,包含5层卷积和3层全连接。
2)第一层卷积核大小为 11 × 11 11×11 11×11,第二层为 5 × 5 5×5 5×5,之后均为 3 × 3 3×3 3×3,此外,第一、二和五层卷积层之后都跟随着池化核大小为 3 × 3 3×3 3×3,步长为2的池化层。
      具体的理论部分可以去阅读原论文:ImageNet Classification with Deep Convolutional Neural Networks

下面是AlexNet的相关实现代码:

import mxnet.gluon as gn
import mxnet.image as im
import mxnet.autograd as ag
import mxnet.ndarray as nd
import mxnet.initializer as init

'''---定义模型---'''
# AlexNet中的LRN其实没有太多用,反而会增加计算时间,所以删去
net=gn.nn.Sequential()
with net.name_scope():
    # 第一阶段
    net.add(gn.nn.Conv2D(channels=96,kernel_size=11,strides=(4,4),activation="relu"))
    net.add(gn.nn.MaxPool2D(pool_size=(3,3),strides=2))
    # 第二阶段
    net.add(gn.nn.Conv2D(channels=256, kernel_size=5, strides=(1, 1),padding=2,activation="relu"))
    net.add(gn.nn.MaxPool2D(pool_size=(3, 3), strides=2))
    # 第三阶段
    net.add(gn.nn.Conv2D(channels=384, kernel_size=3, strides=(1, 1), padding=1, activation="relu"))
    net.add(gn.nn.Conv2D(channels=384, kernel_size=3, strides=(1, 1), padding=1, activation="relu"))
    net.add(gn.nn.Conv2D(channels=256, kernel_size=3, strides=(1, 1), padding=1, activation="relu"))
    net.add(gn.nn.MaxPool2D(pool_size=(3, 3), strides=2))
    # 第四阶段
    net.add(gn.nn.Flatten())
    net.add(gn.nn.Dense(4096,activation="relu"))
    net.add(gn.nn.Dropout(0.5))
    # 第五阶段
    net.add(gn.nn.Dense(4096, activation="relu"))
    net.add(gn.nn.Dropout(0.5))
    # 第六阶段
    net.add(gn.nn.Dense(10))  # 真实AlexNet的输出其实是1000

'''---读取数据和预处理---'''
def load_data_fashion_mnist(batch_size, resize=None):
    transformer = []
    if resize:
        transformer += [gn.data.vision.transforms.Resize(resize)]
    transformer += [gn.data.vision.transforms.ToTensor()]
    transformer = gn.data.vision.transforms.Compose(transformer)
    mnist_train = gn.data.vision.FashionMNIST( train=True)
    mnist_test = gn.data.vision.FashionMNIST( train=False)
    train_iter = gn.data.DataLoader(
        mnist_train.transform_first(transformer), batch_size, shuffle=True)
    test_iter = gn.data.DataLoader(
        mnist_test.transform_first(transformer), batch_size, shuffle=False)
    return train_iter, test_iter

batch_size=32
train_iter,test_iter=load_data_fashion_mnist(batch_size,resize=224)
net.initialize(init=init.Xavier()) # 随机初始化



# 定义准确率
def accuracy(output,label):
    return nd.mean(output.argmax(axis=1)==label).asscalar()

def evaluate_accuracy(data_iter,net):# 定义测试集准确率
    acc=0
    for data,label in data_iter:
        label = label.astype('float32')
        output=net(data)
        acc+=accuracy(output,label)
    return acc/len(data_iter)

# softmax和交叉熵分开的话数值可能会不稳定
cross_loss=gn.loss.SoftmaxCrossEntropyLoss()
# 优化
train_step=gn.Trainer(net.collect_params(),'sgd',{"learning_rate":0.01})

# 训练
lr=0.1
epochs=20
for epoch in range(epochs):
    train_loss=0
    train_acc=0
    for image,y in train_iter:
        y = y.astype('float32')
        with ag.record():
            output = net(image)
            loss = cross_loss(output, y)
        loss.backward()
        train_step.step(batch_size)
        train_loss += nd.mean(loss).asscalar()
        train_acc += accuracy(output, y)
    test_acc = evaluate_accuracy(test_iter, net)
    print("Epoch %d, Loss:%f, Train acc:%f, Test acc:%f"
          %(epoch,train_loss/len(train_iter),train_acc/len(train_iter),test_acc))

由于AlexNet的全连接层神经元太大,输入图片的size也很大,所以设备训练很慢,为便于观察,我将上述参数调小:

import mxnet.gluon as gn
import mxnet.image as im
import mxnet.autograd as ag
import mxnet.ndarray as nd
import mxnet.initializer as init
import mxnet as mx

ctx=mx.gpu()
'''---定义模型---'''
# AlexNet中的LRN其实没有太多用,反而会增加计算时间,所以删去
net=gn.nn.Sequential()
with net.name_scope():
    # 第一阶段
    net.add(gn.nn.Conv2D(channels=96,kernel_size=11,strides=(4,4),activation="relu"))
    net.add(gn.nn.MaxPool2D(pool_size=(3,3),strides=2))
    # 第二阶段
    net.add(gn.nn.Conv2D(channels=256, kernel_size=5, strides=(1, 1),padding=2,activation="relu"))
    net.add(gn.nn.MaxPool2D(pool_size=(3, 3), strides=2))
    # 第三阶段
    net.add(gn.nn.Conv2D(channels=384, kernel_size=3, strides=(1, 1), padding=1, activation="relu"))
    net.add(gn.nn.Conv2D(channels=384, kernel_size=3, strides=(1, 1), padding=1, activation="relu"))
    net.add(gn.nn.Conv2D(channels=256, kernel_size=3, strides=(1, 1), padding=1, activation="relu"))
    net.add(gn.nn.MaxPool2D(pool_size=(3, 3), strides=2))
    # 第四阶段
    net.add(gn.nn.Flatten())
    net.add(gn.nn.Dense(512,activation="relu"))  # AlexNet的两层全连接为(1024,1024),训练时间太长,改为(512,128)
    net.add(gn.nn.Dropout(0.5))
    # 第五阶段
    net.add(gn.nn.Dense(128, activation="relu"))
    net.add(gn.nn.Dropout(0.5))
    # 第六阶段
    net.add(gn.nn.Dense(10))  # 真实AlexNet的输出其实是1000

'''---读取数据和预处理---'''
def load_data_fashion_mnist(batch_size, resize=None):
    transformer = []
    if resize:
        transformer += [gn.data.vision.transforms.Resize(resize)]
    transformer += [gn.data.vision.transforms.ToTensor()]
    transformer = gn.data.vision.transforms.Compose(transformer)
    mnist_train = gn.data.vision.FashionMNIST( train=True)
    mnist_test = gn.data.vision.FashionMNIST( train=False)
    train_iter = gn.data.DataLoader(
        mnist_train.transform_first(transformer), batch_size, shuffle=True)
    test_iter = gn.data.DataLoader(
        mnist_test.transform_first(transformer), batch_size, shuffle=False)
    return train_iter, test_iter

batch_size=64
train_iter,test_iter=load_data_fashion_mnist(batch_size,resize=112) # 原来的224大小太大,不好训练,所以改为112
net.initialize(ctx=ctx,init=init.Xavier()) # 随机初始化

# softmax和交叉熵损失函数
# 由于将它们分开会导致数值不稳定(前两章博文的结果可以对比),所以直接使用gluon提供的API
cross_loss=gn.loss.SoftmaxCrossEntropyLoss()

# 定义准确率
def accuracy(output,label):
    return nd.mean(output.argmax(axis=1)==label).asscalar()

def evaluate_accuracy(data_iter,net):# 定义测试集准确率
    acc=0
    for data,label in data_iter:
        data, label = data.as_in_context(ctx), label.as_in_context(ctx)
        label = label.astype('float32')
        output=net(data)
        acc+=accuracy(output,label)
    return acc/len(data_iter)

# softmax和交叉熵分开的话数值可能会不稳定
cross_loss=gn.loss.SoftmaxCrossEntropyLoss()
# 优化
train_step=gn.Trainer(net.collect_params(),'sgd',{"learning_rate":0.01})

# 训练
lr=0.1
epochs=20
for epoch in range(epochs):
    train_loss=0
    train_acc=0
    for image,y in train_iter:
        image, y = image.as_in_context(ctx), y.as_in_context(ctx)
        y = y.astype('float32')
        with ag.record():
            output = net(image)
            loss = cross_loss(output, y)
        loss.backward()
        train_step.step(batch_size)
        train_loss += nd.mean(loss).asscalar()
        train_acc += accuracy(output, y)
    test_acc = evaluate_accuracy(test_iter, net)
    print("Epoch %d, Loss:%f, Train acc:%f, Test acc:%f"
          %(epoch,train_loss/len(train_iter),train_acc/len(train_iter),test_acc))

训练结果(没跑完):
在这里插入图片描述

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值