Retinanet网络解析

        最近在做小目标识别的项目,对于数据不平衡的常见解决方案一个就是设置正负样本的比例来进行损失计算.还有一个就是focal_loss损失.请参考何凯文大佬的论文<focal loss for dense object detection>.说道focal_loss就去看看了开源的Retinanet网络,请参考https://github.com/fizyr/keras-retinanet(keras库).里面写的太复杂,看了好久没有理清楚(智商低).决定静下心再看一遍,最后理清楚了网络的构建和损失函数以及数据的输入输出张量的要求.并进行了复现.

        下面介绍下Retinanet网络结构,直接暴力粘贴代码.最后是网络结构图.发现没什么好讲的,关于这些博客好多的,哈哈.算了直接看代码去理解整个思想吧.

#coding:utf-8
from keras.applications import VGG16
import os
from keras import backend
import keras
import math
import tensorflow as tf
from keras.layers import Input, Lambda, Conv2D, MaxPooling2D, BatchNormalization, ELU, Reshape, Concatenate, Activation
import numpy as np
from keras.models import Model,Input
os.environ['CUDA_VISIBLE_DEVICES']=''
class UpsampleLike(keras.layers.Layer):  ####上采样以便于进行FPN
    """ Keras layer for upsampling a Tensor to be the same shape as another Tensor.
    """

    def call(self, inputs, **kwargs):
        source, target = inputs
        target_shape = keras.backend.shape(target)
        if keras.backend.image_data_format() == 'channels_first':
            source = backend.transpose(source, (0, 2, 3, 1))
            output = tf.image.resize_nearest_neighbor(source, (target_shape[2], target_shape[3]))
            #output = backend.resize_images(source, (target_shape[2], target_shape[3]), method='nearest')
            output = backend.transpose(output, (0, 3, 1, 2))
            return output
        else:
            #return backend.resize_images(source, (target_shape[1], target_shape[2]), method='bilinear')
            return tf.image.resize_bilinear(source, (target_shape[1], target_shape[2]))

    def compute_output_shape(self, input_shape):
        if keras.backend.image_data_format() == 'channels_first':
            return (input_shape[0][0], input_shape[0][1]) + input_shape[1][2:4]
        else:
            return (input_shape[0][0],) + input_shape[1][1:3] + (input_shape[0][-1],)
class PriorProbability(keras.initializers.Initializer):  ###用于初始化分类网络的bias,论文                               #中就是这样操作的
    """ Apply a prior probability to the weights.
    """

    def __init__(self, probability=0.01):
        self.probability = probability

    def get_config(self):
        return {
            'probability': self.probability
        }

    def __call__(self, shape, dtype=None):
        # set bias to -log((1 - p)/p) for foreground
        result = np.ones(shape, dtype=dtype) * -math.log((1 - self.probability) / self.probability)

        return result
def default_classification_model(       #共享分类网络
    num_classes,
    num_anchors,
    pyramid_feature_size=256,
    prior_probability=0.01,
    classification_feature_size=256,
    name='classification_submodel'
):
    """ Creates the default regression submodel.

    Args
        num_classes                 : Number of classes to predict a score for at each feature level.
        num_anchors                 : Number of anchors to predict classification scores for at each feature level.
        pyramid_feature_size        : The number of filters to expect from the feature pyramid levels.
        classification_feature_size : The number of filters to use in the layers in the classification submodel.
        name                        : The name of the submodel.

    Returns
        A keras.models.Model that predicts classes for each anchor.
    """
    options = {
        'kernel_size' : 3,
        'strides'     : 1,
        'padding'     : 'same',
    }

    if keras.backend.image_data_format() == 'channels_first':
        inputs  = keras.layers.Input(shape=(pyramid_feature_size, None, None))
    else:
        inputs  = keras.layers.Input(shape=(None, None, pyramid_feature_size))
    outputs = inputs
    for i in range(4):
        outputs = keras.layers.Conv2D(
            filters=classification_feature_size,
            activation='relu',
            name='pyramid_classification_{}'.format(i),
            kernel_initializer=keras.initializers.normal(mean=0.0, stddev=0.01, seed=None),
            bias_initializer='zeros',
            **options
        )(outputs)

    outputs = keras.layers.Conv2D(
        filters=num_classes * num_anchors,
        kernel_initializer=keras.initializers.normal(mean=0.0, stddev=0.01, seed=None),
        bias_initializer=PriorProbability(probability=prior_probability),
        name='pyramid_classification',
        **options
    )(outputs)

    # reshape output and apply sigmoid
    if keras.backend.image_data_format() == 'channels_first':
        outputs = keras.layers.Permute((2, 3, 1), name='pyramid_classification_permute')(outputs)
    outputs = keras.layers.Reshape((-1, num_classes), name='pyramid_classification_reshape')(outputs)
    outputs = keras.layers.Activation('sigmoid', name='pyramid_classification_sigmoid')(outputs)

    return keras.models.Model(inputs=inputs, outputs=outputs, name=name)


def default_regression_model(num_values, num_anchors, pyramid_feature_size=256, regression_feature_size=256, name='regression_submodel'):####共享回归网络
    """ Creates the default regression submodel.

    Args
        num_values              : Number of values to regress.
        num_anchors             : Number of anchors to regress for each feature level.
        pyramid_feature_size    : The number of filters to expect from the feature pyramid levels.
        regression_feature_size : The number of filters to use in the layers in the regression submodel.
        name                    : The name of the submodel.

    Returns
        A keras.models.Model that predicts regression values for each anchor.
    """
    # All new conv layers except the final one in the
    # RetinaNet (classification) subnets are initialized
    # with bias b = 0 and a Gaussian weight fill with stddev = 0.01.
    options = {
        'kernel_size'        : 3,
        'strides'            : 1,
        'padding'            : 'same',
        'kernel_initializer' : keras.initializers.normal(mean=0.0, stddev=0.01, seed=None),
        'bias_initializer'   : 'zeros'
    }

    if keras.backend.image_data_format() == 'channels_first':
        inputs  = keras.layers.Input(shape=(pyramid_feature_size, None, None))
    else:
        inputs  = keras.layers.Input(shape=(None, None, pyramid_feature_size))
    outputs = inputs
    for i in range(4):
        outputs = keras.layers.Conv2D(
            filters=regression_feature_size,
            activation='relu',
            name='pyramid_regression_{}'.format(i),
            **options
        )(outputs)

    outputs = keras.layers.Conv2D(num_anchors * num_values, name='pyramid_regression', **options)(outputs)
    if keras.backend.image_data_format() == 'channels_first':
        outputs = keras.layers.Permute((2, 3, 1), name='pyramid_regression_permute')(outputs)
    outputs = keras.layers.Reshape((-1, num_values), name='pyramid_regression_reshape')(outputs)

    return keras.models.Model(inputs=inputs, outputs=outputs, name=name)

def default_submodels(num_classes=46, num_anchors=9):####包装分类和回归网络
    """ Create a list of default submodels used for object detection.

    The default submodels contains a regression submodel and a classification submodel.

    Args
        num_classes : Number of classes to use.
        num_anchors : Number of base anchors.

    Returns
        A list of tuple, where the first element is the name of the submodel and the second element is the submodel itself.
    """
    return [
        ('regression', default_regression_model(4, num_anchors)),
        ('classification', default_classification_model(num_classes, num_anchors))
    ]
def __build_model_pyramid(name, model, features):
    """ Applies a single submodel to each FPN level.

    Args
        name     : Name of the submodel.
        model    : The submodel to evaluate.
        features : The FPN features.

    Returns
        A tensor containing the response from the submodel on the FPN features.
    """
    return keras.layers.Concatenate(axis=1, name=name)([model(f) for f in features])
def my_retinanet():
    # vgg_model = VGG16(input_shape=vgg_inputs,weights=None, include_top=None)
    # layer_names = ["block3_pool", "block4_pool", "block5_pool"]
    # C3,C4,C5 = [vgg_model.get_layer(name).output for name in layer_names]
    feature_size = 256
    x = Input(shape=(1024, 1024, 3))
    normed = Lambda(lambda z: z/127.5 - 1., # Convert input feature range to [-1,1]
                    output_shape=(1024, 1024, 3),
                    name='lambda1')(x)

    conv1_1 = Conv2D(32, (3, 3), name='conv1_1', strides=(1, 1), padding="same")(normed)
    conv1_2 = Conv2D(32, (3, 3), name='conv1_2', strides=(1, 1), padding="same",activation='relu')(conv1_1)
    pool1 = MaxPooling2D(pool_size=(2, 2), name='pool1')(conv1_2)
    bn1 = BatchNormalization(axis=3, momentum=0.99, name='bn1')(pool1) # Tensorflow uses filter format [filter_height, filter_width, in_channels, out_channels], hence axis = 3

    conv2_1 = Conv2D(64, (3, 3), name='conv2_1', strides=(1, 1), padding="same")(bn1)
    conv2_2 = Conv2D(64, (3, 3), name='conv2_2', strides=(1, 1), padding="same",activation='relu')(conv2_1)
    pool2 = MaxPooling2D(pool_size=(2, 2), name='pool2')(conv2_2)
    bn2 = BatchNormalization(axis=3, momentum=0.99, name='bn2')(pool2) # Tensorflow uses filter format [filter_height, filter_width, in_channels, out_channels], hence axis = 3

    conv3_1 = Conv2D(128, (1, 1), name='conv3_1', strides=(1, 1), padding="same")(bn2)
    conv3_2 = Conv2D(256, (3, 3), name='conv3_2', strides=(1, 1), padding="same",activation='relu')(conv3_1)
    pool3 = MaxPooling2D(pool_size=(2, 2), name='pool3')(conv3_2)
    bn3 = BatchNormalization(axis=3, momentum=0.99, name='bn3')(pool3) # Tensorflow uses filter format [filter_height, filter_width, in_channels, out_channels], hence axis = 3

    conv4_1 = Conv2D(128, (1, 1), name='conv4_1', strides=(1, 1), padding="same")(bn3)
    conv4_2 = Conv2D(256, (3, 3), name='conv4_2', strides=(1, 1), padding="same",activation='relu')(conv4_1)
    bn4 = BatchNormalization(axis=3, momentum=0.99, name='bn4')(conv4_2) # Tensorflow uses filter format [filter_height, filter_width, in_channels, out_channels], hence axis = 3
    pool4 = MaxPooling2D(pool_size=(2, 2), name='pool4')(bn4)

    conv5_1 = Conv2D(128, (1, 1), name='conv5_1', strides=(1, 1), padding="same")(pool4)
    conv5_2 = Conv2D(256, (3, 3), name='conv5_2', strides=(1, 1), padding="same",activation='relu')(conv5_1)
    bn5 = BatchNormalization(axis=3, momentum=0.99, name='bn5')(conv5_2) # Tensorflow uses filter format [filter_height, filter_width, in_channels, out_channels], hence axis = 3
    pool5 = MaxPooling2D(pool_size=(2, 2), name='pool5')(bn5)

    conv6_1 = Conv2D(128, (1, 1), name='conv6_1', strides=(1, 1), padding="same")(pool5)
    conv6_2 = Conv2D(256, (3, 3), name='conv6_2', strides=(1, 1), padding="same",activation='relu')(conv6_1)
    bn6 = BatchNormalization(axis=3, momentum=0.99, name='bn6')(conv6_2)
    C3,C4,C5= bn4,bn5,bn6
    P5           = keras.layers.Conv2D(feature_size, kernel_size=1, strides=1, padding='same', name='C5_reduced')(C5)
    P5_upsampled = UpsampleLike(name='P5_upsampled')([P5, C4])
    P5           = keras.layers.Conv2D(feature_size, kernel_size=3, strides=1, padding='same', name='P5')(P5)

    # add P5 elementwise to C4
    P4           = keras.layers.Conv2D(feature_size, kernel_size=1, strides=1, padding='same', name='C4_reduced')(C4)
    P4           = keras.layers.Add(name='P4_merged')([P5_upsampled, P4])
    P4_upsampled = UpsampleLike(name='P4_upsampled')([P4, C3])
    P4           = keras.layers.Conv2D(feature_size, kernel_size=3, strides=1, padding='same', name='P4')(P4)

    # add P4 elementwise to C3
    P3 = keras.layers.Conv2D(feature_size, kernel_size=1, strides=1, padding='same', name='C3_reduced')(C3)
    P3 = keras.layers.Add(name='P3_merged')([P4_upsampled, P3])
    P3 = keras.layers.Conv2D(feature_size, kernel_size=3, strides=1, padding='same', name='P3')(P3)

    # "P6 is obtained via a 3x3 stride-2 conv on C5"
    P6 = keras.layers.Conv2D(feature_size, kernel_size=3, strides=2, padding='same', name='P6')(C5)

    # "P7 is computed by applying ReLU followed by a 3x3 stride-2 conv on P6"
    P7 = keras.layers.Activation('relu', name='C6_relu')(P6)
    P7 = keras.layers.Conv2D(feature_size, kernel_size=3, strides=2, padding='same', name='P7')(P7)
    features = [P3,P4,P5,P6,P7]
    submodels = default_submodels(num_classes=45, num_anchors=5)
    outputs = [__build_model_pyramid(n, m, features) for n, m in submodels]
    model = Model(x,outputs)
    predictor_sizes = np.array([P3._keras_shape[1:3],
                                P4._keras_shape[1:3],
                                P5._keras_shape[1:3],
                                P6._keras_shape[1:3],
                                P7._keras_shape[1:3]])
    return model,predictor_sizes
if __name__ == '__main__':
    model ,predictor_size = my_retinanet()
    model.summary()
    print predictor_size

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值