深度学习Backbone

Bakbone

VGG Networks

论文地址:Very Deep Convolutional Networks for Large-Scale Image Recognition

ResNet

论文地址:Deep Residual Learning for Image Recogniction

视频解说:ResNet

image

image

论文给出图引出为什么提出restblock模块。随着训练迭代次数增加,越深的网络可能比浅层网络的训练效果不好,并且深层网络的训练难度比较大(keywords: identity function, skip connection)。另外作者发现拟合残差比直接拟合预测值效果要好,因为残差有个目标值,即为0。预测值是潜在的,没有参照的,因此难以训练。基于此作者提出一种残差模块,如下图所示:

image

image

ResNet:

image

训练性能结果:

image

Identity Mappings in Deep Residual Networks

import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.python.ops.gradients_impl import gradients

NUM_CLASSES = 10



def load_and_preprocess_image(img_path):
    # read pictures
    img_raw = tf.io.read_file(img_path)
    # decode pictures
    img_tensor = tf.image.decode_jpeg(img_raw, channels=channels)
    # resize
    img_tensor = tf.image.resize(img_tensor, [image_height, image_width])
    img_tensor = tf.cast(img_tensor, tf.float32)
    # normalization
    img = img_tensor / 255.0
    return img

def get_images_and_labels(data_root_dir):
    # get all images' paths (format: string)
    data_root = pathlib.Path(data_root_dir)
    all_image_path = [str(path) for path in list(data_root.glob('*/*'))]
    # get labels' names
    label_names = sorted(item.name for item in data_root.glob('*/'))
    # dict: {label : index}
    label_to_index = dict((label, index) for index, label in enumerate(label_names))
    # get all images' labels
    all_image_label = [label_to_index[pathlib.Path(single_image_path).parent.name] for single_image_path in all_image_path]

    return all_image_path, all_image_label


def get_dataset(dataset_root_dir):
    all_image_path, all_image_label = get_images_and_labels(data_root_dir=dataset_root_dir)
    # print("image_path: {}".format(all_image_path[:]))
    # print("image_label: {}".format(all_image_label[:]))
    # load the dataset and preprocess images
    image_dataset = tf.data.Dataset.from_tensor_slices(all_image_path).map(load_and_preprocess_image)
    label_dataset = tf.data.Dataset.from_tensor_slices(all_image_label)
    dataset = tf.data.Dataset.zip((image_dataset, label_dataset))
    image_count = len(all_image_path)

    return dataset, image_count


def generate_datasets():
    train_dataset, train_count = get_dataset(dataset_root_dir=config.train_dir)
    valid_dataset, valid_count = get_dataset(dataset_root_dir=config.valid_dir)
    test_dataset, test_count = get_dataset(dataset_root_dir=config.test_dir)


    # read the original_dataset in the form of batch
    train_dataset = train_dataset.shuffle(buffer_size=train_count).batch(batch_size=config.BATCH_SIZE)
    valid_dataset = valid_dataset.batch(batch_size=config.BATCH_SIZE)
    test_dataset = test_dataset.batch(batch_size=config.BATCH_SIZE)

    return train_dataset, valid_dataset, test_dataset

class BasicBlock(tf.keras.layers.Layer):

    def __init__(self, filter_num, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = tf.keras.layers.Conv2D(filters=filter_num,
                                            kernel_size=(3, 3),
                                            strides=stride,
                                            padding="same")
        self.bn1 = tf.keras.layers.BatchNormalization()
        self.conv2 = tf.keras.layers.Conv2D(filters=filter_num,
                                            kernel_size=(3, 3),
                                            strides=1,
                                            padding="same")
        self.bn2 = tf.keras.layers.BatchNormalization()
        if stride != 1:
            self.downsample = tf.keras.Sequential()
            self.downsample.add(tf.keras.layers.Conv2D(filters=filter_num,
                                                       kernel_size=(1, 1),
                                                       strides=stride))
            self.downsample.add(tf.keras.layers.BatchNormalization())
        else:
            self.downsample = lambda x: x

    def call(self, inputs, training=None, **kwargs):
        residual = self.downsample(inputs)

        x = self.conv1(inputs)
        x = self.bn1(x, training=training)
        x = tf.nn.relu(x)
        x = self.conv2(x)
        x = self.bn2(x, training=training)

        output = tf.nn.relu(tf.keras.layers.add([residual, x]))

        return output


class BottleNeck(tf.keras.layers.Layer):
    def __init__(self, filter_num, stride=1):
        super(BottleNeck, self).__init__()
        self.conv1 = tf.keras.layers.Conv2D(filters=filter_num,
                                            kernel_size=(1, 1),
                                            strides=1,
                                            padding='same')
        self.bn1 = tf.keras.layers.BatchNormalization()
        self.conv2 = tf.keras.layers.Conv2D(filters=filter_num,
                                            kernel_size=(3, 3),
                                            strides=stride,
                                            padding='same')
        self.bn2 = tf.keras.layers.BatchNormalization()
        self.conv3 = tf.keras.layers.Conv2D(filters=filter_num * 4,
                                            kernel_size=(1, 1),
                                            strides=1,
                                            padding='same')
        self.bn3 = tf.keras.layers.BatchNormalization()

        self.downsample = tf.keras.Sequential()
        self.downsample.add(tf.keras.layers.Conv2D(filters=filter_num * 4,
                                                   kernel_size=(1, 1),
                                                   strides=stride))
        self.downsample.add(tf.keras.layers.BatchNormalization())

    def call(self, inputs, training=None, **kwargs):
        residual = self.downsample(inputs)

        x = self.conv1(inputs)
        x = self.bn1(x, training=training)
        x = tf.nn.relu(x)
        x = self.conv2(x)
        x = self.bn2(x, training=training)
        x = tf.nn.relu(x)
        x = self.conv3(x)
        x = self.bn3(x, training=training)

        output = tf.nn.relu(tf.keras.layers.add([residual, x]))

        return output


def make_basic_block_layer(filter_num, blocks, stride=1):
    res_block = tf.keras.Sequential()
    res_block.add(BasicBlock(filter_num, stride=stride))

    for _ in range(1, blocks):
        res_block.add(BasicBlock(filter_num, stride=1))

    return res_block


def make_bottleneck_layer(filter_num, blocks, stride=1):
    res_block = tf.keras.Sequential()
    res_block.add(BottleNeck(filter_num, stride=stride))

    for _ in range(1, blocks):
        res_block.add(BottleNeck(filter_num, stride=1))

    return res_block



class ResNetTypeI(tf.keras.Model):
    def __init__(self, layer_params):
        super(ResNetTypeI, self).__init__()

        self.conv1 = tf.keras.layers.Conv2D(filters=64,
                                            kernel_size=(7, 7),
                                            strides=2,
                                            padding="same")
        self.bn1 = tf.keras.layers.BatchNormalization()
        self.pool1 = tf.keras.layers.MaxPool2D(pool_size=(3, 3),
                                               strides=2,
                                               padding="same")

        self.layer1 = make_basic_block_layer(filter_num=64,
                                             blocks=layer_params[0])
        self.layer2 = make_basic_block_layer(filter_num=128,
                                             blocks=layer_params[1],
                                             stride=2)
        self.layer3 = make_basic_block_layer(filter_num=256,
                                             blocks=layer_params[2],
                                             stride=2)
        self.layer4 = make_basic_block_layer(filter_num=512,
                                             blocks=layer_params[3],
                                             stride=2)

        self.avgpool = tf.keras.layers.GlobalAveragePooling2D()
        self.fc = tf.keras.layers.Dense(units=NUM_CLASSES, activation=tf.keras.activations.softmax)

    def call(self, inputs, training=None, mask=None):
        x = self.conv1(inputs)
        x = self.bn1(x, training=training)
        x = tf.nn.relu(x)
        x = self.pool1(x)
        x = self.layer1(x, training=training)
        x = self.layer2(x, training=training)
        x = self.layer3(x, training=training)
        x = self.layer4(x, training=training)
        x = self.avgpool(x)
        output = self.fc(x)

        return output


class ResNetTypeII(tf.keras.Model):
    def __init__(self, layer_params):
        super(ResNetTypeII, self).__init__()
        self.conv1 = tf.keras.layers.Conv2D(filters=64,
                                            kernel_size=(7, 7),
                                            strides=2,
                                            padding="same")
        self.bn1 = tf.keras.layers.BatchNormalization()
        self.pool1 = tf.keras.layers.MaxPool2D(pool_size=(3, 3),
                                               strides=2,
                                               padding="same")

        self.layer1 = make_bottleneck_layer(filter_num=64,
                                            blocks=layer_params[0])
        self.layer2 = make_bottleneck_layer(filter_num=128,
                                            blocks=layer_params[1],
                                            stride=2)
        self.layer3 = make_bottleneck_layer(filter_num=256,
                                            blocks=layer_params[2],
                                            stride=2)
        self.layer4 = make_bottleneck_layer(filter_num=512,
                                            blocks=layer_params[3],
                                            stride=2)

        self.avgpool = tf.keras.layers.GlobalAveragePooling2D()
        self.fc = tf.keras.layers.Dense(units=NUM_CLASSES, activation=tf.keras.activations.softmax)

    def call(self, inputs, training=None, mask=None):
        x = self.conv1(inputs)
        x = self.bn1(x, training=training)
        x = tf.nn.relu(x)
        x = self.pool1(x)
        x = self.layer1(x, training=training)
        x = self.layer2(x, training=training)
        x = self.layer3(x, training=training)
        x = self.layer4(x, training=training)
        x = self.avgpool(x)
        output = self.fc(x)

        return output


def resnet_18():
    return ResNetTypeI(layer_params=[2, 2, 2, 2])


def resnet_34():
    return ResNetTypeI(layer_params=[3, 4, 6, 3])


def resnet_50():
    return ResNetTypeII(layer_params=[3, 4, 6, 3])


def resnet_101():
    return ResNetTypeII(layer_params=[3, 4, 23, 3])


def resnet_152():
    return ResNetTypeII(layer_params=[3, 8, 36, 3])


if __name__ == '__main__':
    # build model
    model = resnet_18()
    model.build(input_shape=(None, 512, 512,  3))
    print(model.summary())
    
    # generate datasets
    train_dataset, valid_dataset, test_dataset = generate_datasets()

    # define loss function
    loss_object = tf.keras.losses.SparseCategoricalCrossentropy()
    optimizer = tf.keras.optimizers.Adam()

    train_loss = tf.keras.metrics.Mean(name='train_loss')
    valid_loss = tf.keras.metrics.Mean(name = 'test_loss')

    train_accuracy   = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
    valid_accuracy  = tf.keras.metrics.SparseCategoricalAccuracy(name = 'test_accuracy')

    # train step
    @tf.function
    def train_step(images, labels):
        with tf.GradientTape() as tape:
            predictions = model(images, training=True)
            loss = loss_object(y_true=labels, y_pred=predictions)
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(grads_and_vars=zip(gradients, model.trainable_variables))
        train_loss(loss)
        train_accuracy(labels, predictions)
    
    # valid step
    @tf.function
    def valid_step(images, labels):
        predictions = model(images, training = False)
        v_loss = loss_object(y_true=labels, y_pred = predictions)
        valid_loss(v_loss)
        valid_accuracy(labels, predictions)


    # start train
    for epoch in range(100):
        train_loss.reset_states()
        train_accuracy.reset_states()
        valid_loss.reset_states()
        valid_accuracy.reset_states()
        step =1 
        for images, labels in train_dataset:
            step += 1
            train_step(images, labels)
        print("Epoch: {}/{}, step: {}/{}, loss: {:.5f}, accuracy: {:.5f}".format(epoch + 1,
                                                                                     100,
                                                                                     step,
                                                                                     1000 / 32),
                                                                                     train_loss.result(),
                                                                                     train_accuracy.result()))

        for valid_images, valid_labels in valid_dataset:
            valid_step(valid_images, valid_labels)

        print("Epoch: {}/{}, train loss: {:.5f}, train accuracy: {:.5f}, "
              "valid loss: {:.5f}, valid accuracy: {:.5f}".format(epoch + 1,
                                                                  100,
                                                                  train_loss.result(),
                                                                  train_accuracy.result(),
                                                                  valid_loss.result(),
                                                                  valid_accuracy.result()))

    model.save_weights(filepath='model/', save_format='tf')

GoogleNet

GoogleNet解析

InceptionV1

InceptionV2

InceptionV3

ShuffleNet

ShuffleNetV1

ShuffleNet主要参考的是:轻量级网络之ShuffleNet,ShuffleNetV1的论文:轻量级网络之ShuffleNet。ShuffleNet网络主要使用pointwise group convolution和channel shuffle两种特殊处理方式,达到保证准确率的同时,减少计算量。网络结构如下图所示:
在这里插入图片描述
pointwise group convolution,其实是卷积核为1*1的group convolution操作。group convolution即分组卷积,最早见于AlexNet,在AlexNet中被用来切分网络,使其在2个GPU上并行运行。group conv还可实现通道之间的稀疏连接,假设channel=N,则group conv的具体操作为:

  1. 将channel分成M份,则每个group对应N/M个channel。
  2. 各个group卷积完成后进行concat,也就是按channel合并,作为这一层的输出channel。

channel shuffle如下图所示,也就是将每个group分成几个subgroup,然后将不同group的subgroup作为一个新的group,从而使得新的group带有之前所有group的特征信息。
在这里插入图片描述
下图是ShuffleNet的网络结构,和ResNet类似。ShuffleNet包括3个stage,其中每个stage用ShuffleNet unit代替原来的Residual block,组数g控制了pointwise convolutions的稀疏性。随着g的变化,控制输出通道数的大小使得计算花销保持不变(140 MFLOPs)。显然,对于给定的复杂度约束,较大的组数会导致更多的输出通道(从而产生更多卷积滤波器),这有助于编码更多的信息,然而组数越多会使得每组的通道数减少,也可能导致单个卷积滤波器的性能下降。
在这里插入图片描述
ShuffleNet的核心就是用pointwise group convolution,channel shuffle和depthwise separable convolution代替ResNet block的相应层构成了ShuffleNet uint,达到了减少计算量和提高准确率的目的。其中pointwise group convolution和depthwise separable convolution主要减少了计算量,channel shuffle实现了跨组信息交换,提高了识别率。

ShuffleNetv2

在这里插入图片描述
先对比不进行下采样的(a)和©,我们首先可以发现,ShuffleNet V2 Unit在一开始引入了一个Channel Split,主要功能是将输入的feature map按照其通道数 c c c分割成 c ′ c{}' c c − c ′ c-c{}' cc,这里其实相当于一个分组操作,可以减少FLOPs,用于替代后续被替换掉的Group Convolution,同时,为了对应G1原则,作者在这里取 c ′ = c 2 c{}'= \frac{c}{2} c=2c,等于将输入feature map在通道维度上二等分;其次,在右边的串行分支中, 1 x 1 1x1 1x1的group convolution被替换成了普通的 1 x 1 1x1 1x1卷积层,因此不需要Channel Shuffle,也删掉了后续的Channel Shuffle,这与G2原则相适应;之后将element-wise operation Add的操作替换成了Concat,与G4原则相对应;然后在最后的输出前加入Channel Shuffle,正如之前所说,最初的Channel Split其实是一个分组的操作,于是便需要在输出前进行Shuffle,来保持组间存在信息交流,增加信息表达能力;还有一点,因为这个Unit不进行下采样,通道数也保持不变,在网络模型中经常会重复多次,在重复多次的过程中,Channel Shuffle,Channel Split,Concat这三个操作可以合并在一起减少串行数目,与G3原则相适应。

再对比(b)和(d),与(a)和©类似,group convolution被普通的卷积层替代,左边分支的平均池层用一个DW和PW替代,因为没有Channel Split操作,最后进行Concat的时候,通道数翻倍,这时可以理解成变成了两组拥有和输入相同通道数的输出,于是同样要进行Channel Shuffle操作,而在后续连接©的时候,Channel Shuffle,Channel Split,Concat三个操作同样可以合并在一起。
在这里插入图片描述

代码实现

class ConvBNRelu(Model):

    def __init__(self, channels, kernel_size, strides):
        super(ConvBNRelu, self).__init__()
        self.conv = layers.Conv2D(channels, kernel_size, strides, padding='same', use_bias=False)
        self.bn = layers.BatchNormalization()
        self.relu = layers.ReLU()
    
    def __call__(self, inputs, training = True):
        x = self.conv(inputs)
        x = self.bn(x, training)
        x = self.relu(x)
        return x

class DepthwiseConvBNRelu(Model):
    def __init__(self, kernel_size, strides):
        super(DepthwiseConvBNRelu, self).__init__()
        self.depth_wise = layers.DepthwiseConv2D(kernel_size, strides, padding = 'same', use_bias=False)
        self.bn = layers.BatchNormalization()
    
    def __call__(self, inputs, training = True):
        x = self.depth_wise(inputs)
        x =self.bn(x, training)
        return x

class ChannelShuffle(Model):
    def __init__(self, group):
        super(ChannelShuffle, self).__init__()
        self.group = group
    
    def __call__(self, inputs):
        shape = inputs.shape
        # batch = shape[0]
        h = shape[1]
        w = shape[2]
        c = shape[3]
        # assert c % self.group == 0, 'c % group needs to be zero!'

        inputs = tf.reshape(inputs, shape=[-1, h, w, c // self.group, self.group])
        inputs = tf.transpose(inputs, [0, 1, 2, 4, 3])
        inputs = tf.reshape(inputs, shape=(-1, h, w, c))

        return inputs

class ShuffleBlock(Model):
    def __init__(self, channels, strides, split_ratio = 0.5):
        super(ShuffleBlock, self).__init__()
        self.split_ratio = split_ratio
        self.conv1 = ConvBNRelu(channels//2, 1, 1)
        self.depth_wise = DepthwiseConvBNRelu(3, strides = strides)
        self.conv2 = ConvBNRelu(channels//2, 1, 1)
        self.shuffle = ChannelShuffle(group=2)
    
    def __call__(self, inputs, training):
        # channle split
        x1, x2 = tf.split(inputs, num_or_size_splits=int(1/self.split_ratio), axis = -1)
        # conv_1*1 depthwise_3*3 conv1*1
        x2 = self.conv1(x2, training)
        x2 = self.depth_wise(x2, training)
        x2 = self.conv2(x2, training)
        # concatenate x1 and x2 to make information comunicate
        feature = layers.Concatenate()([x1, x2])
        # channel shuffle
        res = self.shuffle(feature)
        return res

class ShuffleConvBlock(Model):

    def __init__(self, in_channels, out_channels, strides):
        super(ShuffleConvBlock, self).__init__()
        self.conv1 = ConvBNRelu(out_channels - in_channels, 1, 1)
        self.depth_wise = DepthwiseConvBNRelu(3, strides=strides)
        self.conv2 = ConvBNRelu(out_channels - in_channels, 1, 1)
        self.depth_wise_lateral = DepthwiseConvBNRelu(3, strides=strides)
        self.conv_lateral = ConvBNRelu(in_channels, 1, 1)
        self.shuffle = ChannelShuffle(group=2)

    def __call__(self, inputs, training):
        x1, x2 = inputs, inputs
        x2 = self.conv1(x2, training)
        x2 = self.depth_wise(x2, training)
        x2 = self.conv2(x2, training)

        x1 = self.depth_wise_lateral(x1, training)
        x1 = self.conv_lateral(x1, training)

        feature = layers.Concatenate()([x1, x2])
        res = self.shuffle(feature)

        return res

class ShuffleNetStage(Model):

    def __init__(self, repeat, in_channels, out_channels):
        super(ShuffleNetStage, self).__init__()
        self.shuffle_conv_block = ShuffleConvBlock(in_channels=in_channels,
                                                   out_channels=out_channels,
                                                   strides=2)
        self.convs = []
        for i in range(repeat):
            self.convs.append(ShuffleBlock(channels=out_channels,
                                           strides=1))

    def __call__(self, inputs, training):
        x = self.shuffle_conv_block(inputs, training)
        for conv in self.convs:
            x = conv(x, training)

        return x

class ShuffleNetV2(Model):
    """ShuffleNetV2
    How to reduce MAC:
    1.make channels_in == channels_out
    2.don't use group convolution
    3.change add to concatenate
    4.don't make model fragmented
    So, use:
    1.conv_1X1
    2.depthwise and pointwise
    3.concatenate rather than add
    4.maybe shuffle block can promote accuracy
    """

    def __init__(self, channels=[24, 116, 232, 464, 1024]):
        super(ShuffleNetV2, self).__init__()
        self.conv1 = layers.Conv2D(channels[0], 3, 2, padding='same')
        self.pool = layers.MaxPool2D(3, strides=2, padding='same')
        self.stage1 = ShuffleNetStage(repeat=3, in_channels=channels[0], out_channels=channels[1])
        self.stage2 = ShuffleNetStage(repeat=7, in_channels=channels[1], out_channels=channels[2])
        self.stage3 = ShuffleNetStage(repeat=3, in_channels=channels[2], out_channels=channels[3])
        self.conv2 = layers.Conv2D(channels[4], kernel_size=1, padding='same')

    def __call__(self, inputs, training):
        x = self.conv1(inputs)
        x = self.pool(x)
        x = self.stage1(x, training)
        x = self.stage2(x, training)
        x = self.stage3(x, training)
        x = self.conv2(x)

        return x


SqueezeNet

SqueezeNet中核心的构成是Fire module,整个网络是经过一个个的Fire module链接组合而成。Fire module结构以下:
在这里插入图片描述
首先上一层的特征图会通过 1 × 1 1\times 1 1×1的卷积核进行降维,这一点和bottleneck版的resnet是很像的,通过压缩的特征图会分别送入的 1 × 1 1\times 1 1×1卷积核和 3 × 3 3\times3 3×3的卷积核中进行升维,让网络具备宽度,最后这两部分的特征图会进行通道串接,注意是串接操做,而不是resnet中的相加,而这种串接操做和inception很像。整个Squeezenet模型,Fire moudel模块一共有8个,随着网络的加深,特征图的通道数量也依次变多,分别为128,128,256,256,384,384,512,512,通道上升的颇有规律。Squeezenet所有采用常规的空间卷积操做,在参数数量上和其余模型计算方法是同样的,因此它的模型体量小就是由于卷积核用的少,可是它并无像论文题目中写的那样小的不超过0.5M,而是4.8M。从4.8到0.5是由于采用了模型压缩方法,可是Squeezenet自己和模型压缩是没有关系的

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值