MXNET深度学习框架-24-使用gluon的DenseNet

        ResNet的跨层链接思想影响了后面的模型发展,本章我们介绍DenseNet。下图主要展示了这两个区别(左图:ResNet,右图:DenseNet):
                        在这里插入图片描述
        ResNet和DenseNet的主要区别是将“加”改为了“融合”。DenseNet的好处是底层特征并没有丢失,而是完完全全被保留了下来:
在这里插入图片描述
        根据相关原理,我们来定义一下block:

import mxnet.ndarray as nd
import mxnet.autograd as ag
import mxnet.gluon as gn
import mxnet as mx
import matplotlib.pyplot as plt
from mxnet import init

def conv_block(channels): # 按照ResNet V2的结构定义conv_block
    out=gn.nn.Sequential()
    out.add(gn.nn.BatchNorm(),
            gn.nn.Activation("relu"),
            gn.nn.Conv2D(channels=channels,kernel_size=3,padding=1)
            )
    return out
# 稠密块由多个conv_block组成,每块使用相同的输出通道数。
# 构造dense block(稠密块)
class dense_block(gn.nn.Block):
    def __init__(self,num_layers,channels,**kwargs):
        super(dense_block, self).__init__(**kwargs)
        self.net=gn.nn.Sequential()
        for i in range(num_layers):
            self.net.add(conv_block(channels=channels))
    def forward(self, x):
        for layer in self.net:
            out=layer(x)
            x=nd.concat(x,out,dim=1) # 在通道维上将输入和输出连结,与ResNet不一样的是,它是一个融合,而ResNet是加法
        return x

        测试一下:

# 测试一个实例看看结果是否符合预期
dlk=dense_block(num_conv_block=2,channels=10)
dlk.initialize()
X=nd.random_normal(shape=(1,3,8,8)) # NCHW
print(dlk(X).shape)

结果:
在这里插入图片描述
        可以看到,除了通道数变成了23以外,其它的都没变,为什么呢?这是因为我有2个dense block,每个block的输出通道数为10,然后融合起来就是20,最后,别忘了最开始的通道数3,总共就是2×10+3=23。
        那么,这也会浮现一个问题,如果我的dense block数比较多,比如有4个,输出通道数为128,那么,整个模型的复杂度就会异常高,这明显不对,因此,引入一个过渡块,这个过渡块里其实没有什么高大上的东西,不外乎就是1×1的卷积+池化,用来缩小通道数和图像高、宽。下面是相关代码:

def trans_block(channels): # 过渡块
    out=gn.nn.Sequential()
    out.add(gn.nn.BatchNorm(),
            gn.nn.Activation("relu"),
            gn.nn.Conv2D(channels=channels,kernel_size=1),
            gn.nn.AvgPool2D(pool_size=2,strides=2)
            )
    return out

接下来测试一下:

tlk=trans_block(10)
tlk.initialize()
print(tlk(dlk(X)).shape)

结果:
在这里插入图片描述
可以看到,通道数从23变成了10,同时,宽高也减半了。
        DenseNet的主体就是将稠密块和过渡块反复堆叠,下面实现一个121层的DenseNet:

start_channel=64
growth_channel=32 # 全局通道数
block_layer_num=[6,12,24,16] # 每个dense block里有几个conv
def DenseNet():
    net=gn.nn.Sequential()
    with net.name_scope():
        # first block
        net.add(gn.nn.Conv2D(channels=start_channel,kernel_size=7,padding=3,strides=2),
                gn.nn.BatchNorm(),
                gn.nn.Activation("relu"),
                gn.nn.MaxPool2D(pool_size=2,strides=2,padding=1)
                )
        # dense block
        channels=start_channel
        for i,num_layers in enumerate(block_layer_num):
            net.add(dense_block(num_conv_block=num_layers,channels=growth_channel))
            channels+=channels+growth_channel*num_layers  # 计算已经有多少个通道数了
            # 在每一个dense block后面添加一个过渡块,用来减小通道数和宽高
            if i!=len(block_layer_num)-1:
                net.add(trans_block(channels=channels//2)) # 添加一个过渡块,通道数减半
        #  last block
        net.add(gn.nn.BatchNorm(),
                gn.nn.Activation("relu"),
                gn.nn.GlobalAvgPool2D(),
                gn.nn.Dense(10)
                )
    return net

下面放上所有代码:

import mxnet.ndarray as nd
import mxnet.autograd as ag
import mxnet.gluon as gn
import mxnet as mx
import matplotlib.pyplot as plt
from mxnet import init

def conv_block(channels): # 按照ResNet V2的结构定义conv_block
    out=gn.nn.Sequential()
    out.add(gn.nn.BatchNorm(),
            gn.nn.Activation("relu"),
            gn.nn.Conv2D(channels=channels,kernel_size=3,padding=1)
            )
    return out
# 稠密块由多个conv_block组成,每块使用相同的输出通道数。
# 构造dense block(稠密块)
class dense_block(gn.nn.Block):
    def __init__(self,num_conv_block,channels,**kwargs):
        super(dense_block, self).__init__(**kwargs)
        self.net=gn.nn.Sequential()
        for i in range(num_conv_block):
            self.net.add(conv_block(channels=channels))
    def forward(self, x):
        for layer in self.net:
            out=layer(x)
            x=nd.concat(x,out,dim=1) # 在通道维上将输入和输出连结,与ResNet不一样的是,它是一个融合,而ResNet是加法
        return x

# 测试一个实例看看结果是否符合预期
# dlk1=dense_block(num_conv_block=6,channels=32)
# dlk1.initialize()
# X=nd.random_normal(shape=(1,64,32,32)) # NCHW
# dlk2=dense_block(num_conv_block=12,channels=32)
# dlk2.initialize()
# dlk3=dense_block(num_conv_block=24,channels=32)
# dlk3.initialize()
# print(dlk1(X).shape)
# print(dlk2(dlk1(X)).shape)
# print(dlk3(dlk2(dlk1(X))).shape)

def trans_block(channels): # 过渡块
    out=gn.nn.Sequential()
    out.add(gn.nn.BatchNorm(),
            gn.nn.Activation("relu"),
            gn.nn.Conv2D(channels=channels,kernel_size=1),
            gn.nn.AvgPool2D(pool_size=2,strides=2)
            )
    return out

tlk=trans_block(10)
tlk.initialize()
# print(tlk(dlk(X)).shape)


start_channel=64
growth_channel=32 # 全局通道数
block_layer_num=[6,12,24,16] # 每个dense block里有几个conv
def DenseNet():
    net=gn.nn.Sequential()
    with net.name_scope():
        # first block
        net.add(gn.nn.Conv2D(channels=start_channel,kernel_size=7,padding=3,strides=2),
                gn.nn.BatchNorm(),
                gn.nn.Activation("relu"),
                gn.nn.MaxPool2D(pool_size=2,strides=2,padding=1)
                )
        # dense block
        channels=start_channel
        for i,num_layers in enumerate(block_layer_num):
            net.add(dense_block(num_conv_block=num_layers,channels=growth_channel))
            channels+=growth_channel*num_layers  # 计算已经有多少个通道数了
            # print(i,"channels:",channels)
            # 在每一个dense block后面添加一个过渡块,用来减小通道数和宽高
            if i!=len(block_layer_num)-1:
                channels//=2
                net.add(trans_block(channels=channels)) # 添加一个过渡块,通道数减半
                # print("channels//2:",channels)
        #  last block
        net.add(gn.nn.BatchNorm(),
                gn.nn.Activation("relu"),
                gn.nn.GlobalAvgPool2D(),
                gn.nn.Dense(10)
                )
    return net
ctx=mx.gpu()
net=DenseNet()
net.initialize(init=init.Xavier(),ctx=ctx)
# for layer in net:
#     X=X.as_in_context(ctx)
#     X=layer(X)
#     print(layer.name, 'output shape:\t', X.shape)
'''---读取数据和预处理---'''
def load_data_fashion_mnist(batch_size, resize=None):
    transformer = []
    if resize:
        transformer += [gn.data.vision.transforms.Resize(resize)]
    transformer += [gn.data.vision.transforms.ToTensor()]
    transformer = gn.data.vision.transforms.Compose(transformer)
    mnist_train = gn.data.vision.FashionMNIST(train=True)
    mnist_test = gn.data.vision.FashionMNIST(train=False)
    train_iter = gn.data.DataLoader(
        mnist_train.transform_first(transformer), batch_size, shuffle=True)
    test_iter = gn.data.DataLoader(
        mnist_test.transform_first(transformer), batch_size, shuffle=False)
    return train_iter, test_iter
batch_size=128
train_iter,test_iter=load_data_fashion_mnist(batch_size,resize=32) # 32,因为图片加大的话训练很慢,而且显存会吃不消


# 定义准确率
def accuracy(output,label):
    return nd.mean(output.argmax(axis=1)==label).asscalar()

def evaluate_accuracy(data_iter,net):# 定义测试集准确率
    acc=0
    for data,label in data_iter:
        data, label = data.as_in_context(ctx), label.as_in_context(ctx)
        label = label.astype('float32')
        output=net(data)
        acc+=accuracy(output,label)
    return acc/len(data_iter)

# softmax和交叉熵分开的话数值可能会不稳定
cross_loss=gn.loss.SoftmaxCrossEntropyLoss()
# 优化
train_step=gn.Trainer(net.collect_params(),'sgd',{"learning_rate":0.2}) #因为使用了BN,所以学习率可以大一些

# 训练
lr=0.1
epochs=20
for epoch in range(epochs):
    n=0
    train_loss=0
    train_acc=0
    for image,y in train_iter:
        image, y = image.as_in_context(ctx), y.as_in_context(ctx)
        y = y.astype('float32')

        with ag.record():
            output = net(image)
            loss = cross_loss(output, y)
        loss.backward()
        train_step.step(batch_size)
        train_loss += nd.mean(loss).asscalar()
        train_acc += accuracy(output, y)

    test_acc = evaluate_accuracy(test_iter, net)
    print("Epoch %d, Loss:%f, Train acc:%f, Test acc:%f"
          %(epoch,train_loss/len(train_iter),train_acc/len(train_iter),test_acc))

训练结果:
在这里插入图片描述

当然,原论文中还包含了1×1卷积,本文没有实现,说不上真正的121层。真正的121层DenseNet如下:

import mxnet.ndarray as nd
import mxnet.autograd as ag
import mxnet.gluon as gn
import mxnet as mx
from mxnet import init

def conv_block(channels): # 按照ResNet V2的结构定义conv_block
    out=gn.nn.Sequential()
    out.add(gn.nn.BatchNorm(),
            gn.nn.Activation("relu"),
            gn.nn.Conv2D(channels=4*channels,kernel_size=1,padding=0),
            gn.nn.BatchNorm(),
            gn.nn.Activation("relu"),
            gn.nn.Conv2D(channels=channels, kernel_size=3, padding=1)
            )
    return out
# 稠密块由多个conv_block组成,每块使用相同的输出通道数。
# 构造dense block(稠密块)
class dense_block(gn.nn.Block):
    def __init__(self,num_conv_block,channels,**kwargs):
        super(dense_block, self).__init__(**kwargs)
        self.net=gn.nn.Sequential()
        for i in range(num_conv_block):
            self.net.add(conv_block(channels=channels))
    def forward(self, x):
        for layer in self.net:
            out=layer(x)
            x=nd.concat(x,out,dim=1) # 在通道维上将输入和输出连结,与ResNet不一样的是,它是一个融合,而ResNet是加法
        return x

# 测试一个实例看看结果是否符合预期
dlk1=dense_block(num_conv_block=1,channels=3)
dlk1.initialize()
X=nd.random_normal(shape=(1,1,32,32)) # NCHW
print(dlk1(X).shape)


def trans_block(channels): # 过渡块
    out=gn.nn.Sequential()
    out.add(gn.nn.BatchNorm(),
            gn.nn.Activation("relu"),
            gn.nn.Conv2D(channels=channels,kernel_size=1),
            gn.nn.AvgPool2D(pool_size=2,strides=2)
            )
    return out

tlk=trans_block(10)
tlk.initialize()
# print(tlk(dlk(X)).shape)


start_channel=64
growth_channel=32 # 全局通道数
block_layer_num=[6,12,24,16] # 每个dense block里有几个conv
def DenseNet():
    net=gn.nn.Sequential()
    with net.name_scope():
        # first block
        net.add(gn.nn.Conv2D(channels=start_channel,kernel_size=7,padding=3,strides=2),
                gn.nn.BatchNorm(),
                gn.nn.Activation("relu"),
                gn.nn.MaxPool2D(pool_size=2,strides=2,padding=1)
                )
        # dense block
        channels=start_channel
        for i,num_layers in enumerate(block_layer_num):
            net.add(dense_block(num_conv_block=num_layers,channels=growth_channel))
            channels+=growth_channel*num_layers  # 计算已经有多少个通道数了
            # print(i,"channels:",channels)
            # 在每一个dense block后面添加一个过渡块,用来减小通道数和宽高
            if i!=len(block_layer_num)-1:
                channels//=2
                net.add(trans_block(channels=channels)) # 添加一个过渡块,通道数减半
                # print("channels//2:",channels)
        #  last block
        net.add(gn.nn.BatchNorm(),
                gn.nn.Activation("relu"),
                gn.nn.GlobalAvgPool2D(),
                gn.nn.Dense(10)
                )
    return net
ctx=mx.gpu()
net=DenseNet()
net.initialize(init=init.Xavier(),ctx=ctx)
# for layer in net:
#     X=X.as_in_context(ctx)
#     X=layer(X)
#     print(layer.name, 'output shape:\t', X.shape)
'''---读取数据和预处理---'''
def load_data_fashion_mnist(batch_size, resize=None):
    transformer = []
    if resize:
        transformer += [gn.data.vision.transforms.Resize(resize)]
    transformer += [gn.data.vision.transforms.ToTensor()]
    transformer = gn.data.vision.transforms.Compose(transformer)
    mnist_train = gn.data.vision.FashionMNIST(train=True)
    mnist_test = gn.data.vision.FashionMNIST(train=False)
    train_iter = gn.data.DataLoader(
        mnist_train.transform_first(transformer), batch_size, shuffle=True)
    test_iter = gn.data.DataLoader(
        mnist_test.transform_first(transformer), batch_size, shuffle=False)
    return train_iter, test_iter
batch_size=128
train_iter,test_iter=load_data_fashion_mnist(batch_size,resize=32) # 32,因为图片加大的话训练很慢,而且显存会吃不消


# 定义准确率
def accuracy(output,label):
    return nd.mean(output.argmax(axis=1)==label).asscalar()

def evaluate_accuracy(data_iter,net):# 定义测试集准确率
    acc=0
    for data,label in data_iter:
        data, label = data.as_in_context(ctx), label.as_in_context(ctx)
        label = label.astype('float32')
        output=net(data)
        acc+=accuracy(output,label)
    return acc/len(data_iter)

# softmax和交叉熵分开的话数值可能会不稳定
cross_loss=gn.loss.SoftmaxCrossEntropyLoss()
# 优化
train_step=gn.Trainer(net.collect_params(),'sgd',{"learning_rate":0.2}) #因为使用了BN,所以学习率可以大一些

# 训练
lr=0.1
epochs=20
for epoch in range(epochs):
    n=0
    train_loss=0
    train_acc=0
    for image,y in train_iter:
        image, y = image.as_in_context(ctx), y.as_in_context(ctx)
        y = y.astype('float32')

        with ag.record():
            output = net(image)
            loss = cross_loss(output, y)
        loss.backward()
        train_step.step(batch_size)
        train_loss += nd.mean(loss).asscalar()
        train_acc += accuracy(output, y)

    test_acc = evaluate_accuracy(test_iter, net)
    print("Epoch %d, Loss:%f, Train acc:%f, Test acc:%f"
          %(epoch,train_loss/len(train_iter),train_acc/len(train_iter),test_acc))

训练结果:
在这里插入图片描述

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值