Retinanet网络解析

最新推荐文章于 2022-09-21 19:56:25 发布

zh_JNU

最新推荐文章于 2022-09-21 19:56:25 发布

阅读量3.1k

点赞数 4

分类专栏： keras

本文链接：https://blog.csdn.net/zh_JNU/article/details/95369551

版权

keras 专栏收录该内容

19 篇文章 1 订阅

订阅专栏

最近在做小目标识别的项目,对于数据不平衡的常见解决方案一个就是设置正负样本的比例来进行损失计算.还有一个就是focal_loss损失.请参考何凯文大佬的论文<focal loss for dense object detection>.说道focal_loss就去看看了开源的Retinanet网络,请参考https://github.com/fizyr/keras-retinanet(keras库).里面写的太复杂,看了好久没有理清楚(智商低).决定静下心再看一遍,最后理清楚了网络的构建和损失函数以及数据的输入输出张量的要求.并进行了复现.

下面介绍下Retinanet网络结构,直接暴力粘贴代码.最后是网络结构图.发现没什么好讲的,关于这些博客好多的,哈哈.算了直接看代码去理解整个思想吧.

#coding:utf-8
from keras.applications import VGG16
import os
from keras import backend
import keras
import math
import tensorflow as tf
from keras.layers import Input, Lambda, Conv2D, MaxPooling2D, BatchNormalization, ELU, Reshape, Concatenate, Activation
import numpy as np
from keras.models import Model,Input
os.environ['CUDA_VISIBLE_DEVICES']=''
class UpsampleLike(keras.layers.Layer):  ####上采样以便于进行FPN
    """ Keras layer for upsampling a Tensor to be the same shape as another Tensor.
    """

    def call(self, inputs, **kwargs):
        source, target = inputs
        target_shape = keras.backend.shape(target)
        if keras.backend.image_data_format() == 'channels_first':
            source = backend.transpose(source, (0, 2, 3, 1))
            output = tf.image.resize_nearest_neighbor(source, (target_shape[2], target_shape[3]))
            #output = backend.resize_images(source, (target_shape[2], target_shape[3]), method='nearest')
            output = backend.transpose(output, (0, 3, 1, 2))
            return output
        else:
            #return backend.resize_images(source, (target_shape[1], target_shape[2]), method='bilinear')
            return tf.image.resize_bilinear(source, (target_shape[1], target_shape[2]))

    def compute_output_shape(self, input_shape):
        if keras.backend.image_data_format() == 'channels_first':
            return (input_shape[0][0], input_shape[0][1]) + input_shape[1][2:4]
        else:
            return (input_shape[0][0],) + input_shape[1][1:3] + (input_shape[0][-1],)
class PriorProbability(keras.initializers.Initializer):  ###用于初始化分类网络的bias,论文                               #中就是这样操作的
    """ Apply a prior probability to the weights.
    """

    def __init__(self, probability=0.01):
        self.probability = probability

    def get_config(self):
        return {
            'probability': self.probability
        }

    def __call__(self, shape, dtype=None):
        # set bias to -log((1 - p)/p) for foreground
        result = np.ones(shape, dtype=dtype) * -math.log((1 - self.probability) / self.probability)

        return result
def default_classification_model(       #共享分类网络
    num_classes,
    num_anchors,
    pyramid_feature_size=256,
    prior_probability=0.01,
    classification_feature_size=256,
    name='classification_submodel'
):
    """ Creates the default regression submodel.

    Args
        num_classes                 : Number of classes to predict a score for at each feature level.
        num_anchors                 : Number of anchors to predict classification scores for at each feature level.
        pyramid_feature_size        : The number of filters to expect from the feature pyramid levels.
        classification_feature_size : The number of filters to use in the layers in the classification submodel.
        name                        : The name of the submodel.

    Returns
        A keras.models.Model that predicts classes for each anchor.
    """
    options = {
        'kernel_size' : 3,
        'strides'     : 1,
        'padding'     : 'same',
    }

    if keras.backend.image_data_format() == 'channels_first':
        inputs  = keras.layers.Input(shape=(pyramid_feature_size, None, None))
    else:
        inputs  = keras.layers.Input(shape=(None, None, pyramid_feature_size))
    outputs = inputs
    for i in range(4):
        outputs = keras.layers.Conv2D(
            filters=classification_feature_size,
            activation='relu',
            name='pyramid_classification_{}'.format(i),
            kernel_initializer=keras.initializers.normal(mean=0.0, stddev=0.01, seed=None),
            bias_initializer='zeros',
            **options
        )(outputs)

    outputs = keras.layers.Conv2D(
        filters=num_classes * num_anchors,
        kernel_initializer=keras.initializers.normal(mean=0.0, stddev=0.01, seed=None),
        bias_initializer=PriorProbability(probability=prior_probability),
        name='pyramid_classification',
        **options
    )(outputs)

    # reshape output and apply sigmoid
    if keras.backend.image_data_format() == 'channels_first':
        outputs = keras.layers.Permute((2, 3, 1), name='pyramid_classification_permute')(outputs)
    outputs = keras.layers.Reshape((-1, num_classes), name='pyramid_classification_reshape')(outputs)
    outputs = keras.layers.Activation('sigmoid', name='pyramid_classification_sigmoid')(outputs)

    return keras.models.Model(inputs=inputs, outputs=outputs, name=name)


def default_regression_model(num_values, num_anchors, pyramid_feature_size=256, regression_feature_size=256, name='regression_submodel'):####共享回归网络
    """ Creates the default regression submodel.

    Args
        num_values              : Number of values to regress.
        num_anchors             : Number of anchors to regress for each feature level.
        pyramid_feature_size    : The number of filters to expect from the feature pyramid levels.
        regression_feature_size : The number of filters to use in the layers in the regression submodel.
        name                    : The name of the submodel.

    Returns
        A keras.models.Model that predicts regression values for each anchor.
    """
    # All new conv layers except the final one in the
    # RetinaNet (classification) subnets are initialized
    # with bias b = 0 and a Gaussian weight fill with stddev = 0.01.
    options = {
        'kernel_size'        : 3,
        'strides'            : 1,
        'padding'            : 'same',
        'kernel_initializer' : keras.initializers.normal(mean=0.0, stddev=0.01, seed=None),
        'bias_initializer'   : 'zeros'
    }

    if keras.backend.image_data_format() == 'channels_first':
        inputs  = keras.layers.Input(shape=(pyramid_feature_size, None, None))
    else:
        inputs  = keras.layers.Input(shape=(None, None, pyramid_feature_size))
    outputs = inputs
    for i in range(4):
        outputs = keras.layers.Conv2D(
            filters=regression_feature_size,
            activation='relu',
            name='pyramid_regression_{}'.format(i),
            **options
        )(outputs)

    outputs = keras.layers.Conv2D(num_anchors * num_values, name='pyramid_regression', **options)(outputs)
    if keras.backend.image_data_format() == 'channels_first':
        outputs = keras.layers.Permute((2, 3, 1), name='pyramid_regression_permute')(outputs)
    outputs = keras.layers.Reshape((-1, num_values), name='pyramid_regression_reshape')(outputs)

    return keras.models.Model(inputs=inputs, outputs=outputs, name=name)

def default_submodels(num_classes=46, num_anchors=9):####包装分类和回归网络
    """ Create a list of default submodels used for object detection.

    The default submodels contains a regression submodel and a classification submodel.

    Args
        num_classes : Number of classes to use.
        num_anchors : Number of base anchors.

    Returns
        A list of tuple, where the first element is the name of the submodel and the second element is the submodel itself.
    """
    return [
        ('regression', default_regression_model(4, num_anchors)),
        ('classification', default_classification_model(num_classes, num_anchors))
    ]
def __build_model_pyramid(name, model, features):
    """ Applies a single submodel to each FPN level.

    Args
        name     : Name of the submodel.
        model    : The submodel to evaluate.
        features : The FPN features.

    Returns
        A tensor containing the response from the submodel on the FPN features.
    """
    return keras.layers.Concatenate(axis=1, name=name)([model(f) for f in features])
def my_retinanet():
    # vgg_model = VGG16(input_shape=vgg_inputs,weights=None, include_top=None)
    # layer_names = ["block3_pool", "block4_pool", "block5_pool"]
    # C3,C4,C5 = [vgg_model.get_layer(name).output for name in layer_names]
    feature_size = 256
    x = Input(shape=(1024, 1024, 3))
    normed = Lambda(lambda z: z/127.5 - 1., # Convert input feature range to [-1,1]
                    output_shape=(1024, 1024, 3),
                    name='lambda1')(x)

    conv1_1 = Conv2D(32, (3, 3), name='conv1_1', strides=(1, 1), padding="same")(normed)
    conv1_2 = Conv2D(32, (3, 3), name='conv1_2', strides=(1, 1), padding="same",activation='relu')(conv1_1)
    pool1 = MaxPooling2D(pool_size=(2, 2), name='pool1')(conv1_2)
    bn1 = BatchNormalization(axis=3, momentum=0.99, name='bn1')(pool1) # Tensorflow uses filter format [filter_height, filter_width, in_channels, out_channels], hence axis = 3

    conv2_1 = Conv2D(64, (3, 3), name='conv2_1', strides=(1, 1), padding="same")(bn1)
    conv2_2 = Conv2D(64, (3, 3), name='conv2_2', strides=(1, 1), padding="same",activation='relu')(conv2_1)
    pool2 = MaxPooling2D(pool_size=(2, 2), name='pool2')(conv2_2)
    bn2 = BatchNormalization(axis=3, momentum=0.99, name='bn2')(pool2) # Tensorflow uses filter format [filter_height, filter_width, in_channels, out_channels], hence axis = 3

    conv3_1 = Conv2D(128, (1, 1), name='conv3_1', strides=(1, 1), padding="same")(bn2)
    conv3_2 = Conv2D(256, (3, 3), name='conv3_2', strides=(1, 1), padding="same",activation='relu')(conv3_1)
    pool3 = MaxPooling2D(pool_size=(2, 2), name='pool3')(conv3_2)
    bn3 = BatchNormalization(axis=3, momentum=0.99, name='bn3')(pool3) # Tensorflow uses filter format [filter_height, filter_width, in_channels, out_channels], hence axis = 3

    conv4_1 = Conv2D(128, (1, 1), name='conv4_1', strides=(1, 1), padding="same")(bn3)
    conv4_2 = Conv2D(256, (3, 3), name='conv4_2', strides=(1, 1), padding="same",activation='relu')(conv4_1)
    bn4 = BatchNormalization(axis=3, momentum=0.99, name='bn4')(conv4_2) # Tensorflow uses filter format [filter_height, filter_width, in_channels, out_channels], hence axis = 3
    pool4 = MaxPooling2D(pool_size=(2, 2), name='pool4')(bn4)

    conv5_1 = Conv2D(128, (1, 1), name='conv5_1', strides=(1, 1), padding="same")(pool4)
    conv5_2 = Conv2D(256, (3, 3), name='conv5_2', strides=(1, 1), padding="same",activation='relu')(conv5_1)
    bn5 = BatchNormalization(axis=3, momentum=0.99, name='bn5')(conv5_2) # Tensorflow uses filter format [filter_height, filter_width, in_channels, out_channels], hence axis = 3
    pool5 = MaxPooling2D(pool_size=(2, 2), name='pool5')(bn5)

    conv6_1 = Conv2D(128, (1, 1), name='conv6_1', strides=(1, 1), padding="same")(pool5)
    conv6_2 = Conv2D(256, (3, 3), name='conv6_2', strides=(1, 1), padding="same",activation='relu')(conv6_1)
    bn6 = BatchNormalization(axis=3, momentum=0.99, name='bn6')(conv6_2)
    C3,C4,C5= bn4,bn5,bn6
    P5           = keras.layers.Conv2D(feature_size, kernel_size=1, strides=1, padding='same', name='C5_reduced')(C5)
    P5_upsampled = UpsampleLike(name='P5_upsampled')([P5, C4])
    P5           = keras.layers.Conv2D(feature_size, kernel_size=3, strides=1, padding='same', name='P5')(P5)

    # add P5 elementwise to C4
    P4           = keras.layers.Conv2D(feature_size, kernel_size=1, strides=1, padding='same', name='C4_reduced')(C4)
    P4           = keras.layers.Add(name='P4_merged')([P5_upsampled, P4])
    P4_upsampled = UpsampleLike(name='P4_upsampled')([P4, C3])
    P4           = keras.layers.Conv2D(feature_size, kernel_size=3, strides=1, padding='same', name='P4')(P4)

    # add P4 elementwise to C3
    P3 = keras.layers.Conv2D(feature_size, kernel_size=1, strides=1, padding='same', name='C3_reduced')(C3)
    P3 = keras.layers.Add(name='P3_merged')([P4_upsampled, P3])
    P3 = keras.layers.Conv2D(feature_size, kernel_size=3, strides=1, padding='same', name='P3')(P3)

    # "P6 is obtained via a 3x3 stride-2 conv on C5"
    P6 = keras.layers.Conv2D(feature_size, kernel_size=3, strides=2, padding='same', name='P6')(C5)

    # "P7 is computed by applying ReLU followed by a 3x3 stride-2 conv on P6"
    P7 = keras.layers.Activation('relu', name='C6_relu')(P6)
    P7 = keras.layers.Conv2D(feature_size, kernel_size=3, strides=2, padding='same', name='P7')(P7)
    features = [P3,P4,P5,P6,P7]
    submodels = default_submodels(num_classes=45, num_anchors=5)
    outputs = [__build_model_pyramid(n, m, features) for n, m in submodels]
    model = Model(x,outputs)
    predictor_sizes = np.array([P3._keras_shape[1:3],
                                P4._keras_shape[1:3],
                                P5._keras_shape[1:3],
                                P6._keras_shape[1:3],
                                P7._keras_shape[1:3]])
    return model,predictor_sizes
if __name__ == '__main__':
    model ,predictor_size = my_retinanet()
    model.summary()
    print predictor_size

zh_JNU

关注

4
点赞
踩
8

收藏

觉得还不错? 一键收藏
1
评论
Retinanet网络解析

最近在做小目标识别的项目,对于数据不平衡的常见解决方案一个就是设置正负样本的比例来进行损失计算.还有一个就是focal_loss损失.请参考何凯文大佬的论文<focal loss for dense object detection>.说道focal_loss就去看看了开源的Retinanet网络,请参考https://github.com/fizyr/keras-re...
复制链接

扫一扫