最近在做小目标识别的项目,对于数据不平衡的常见解决方案一个就是设置正负样本的比例来进行损失计算.还有一个就是focal_loss损失.请参考何凯文大佬的论文<focal loss for dense object detection>.说道focal_loss就去看看了开源的Retinanet网络,请参考https://github.com/fizyr/keras-retinanet(keras库).里面写的太复杂,看了好久没有理清楚(智商低).决定静下心再看一遍,最后理清楚了网络的构建和损失函数以及数据的输入输出张量的要求.并进行了复现.
下面介绍下Retinanet网络结构,直接暴力粘贴代码.最后是网络结构图.发现没什么好讲的,关于这些博客好多的,哈哈.算了直接看代码去理解整个思想吧.
#coding:utf-8
from keras.applications import VGG16
import os
from keras import backend
import keras
import math
import tensorflow as tf
from keras.layers import Input, Lambda, Conv2D, MaxPooling2D, BatchNormalization, ELU, Reshape, Concatenate, Activation
import numpy as np
from keras.models import Model,Input
os.environ['CUDA_VISIBLE_DEVICES']=''
class UpsampleLike(keras.layers.Layer): ####上采样以便于进行FPN
""" Keras layer for upsampling a Tensor to be the same shape as another Tensor.
"""
def call(self, inputs, **kwargs):
source, target = inputs
target_shape = keras.backend.shape(target)
if keras.backend.image_data_format() == 'channels_first':
source = backend.transpose(source, (0, 2, 3, 1))
output = tf.image.resize_nearest_neighbor(source, (target_shape[2], target_shape[3]))
#output = backend.resize_images(source, (target_shape[2], target_shape[3]), method='nearest')
output = backend.transpose(output, (0, 3, 1, 2))
return output
else:
#return backend.resize_images(source, (target_shape[1], target_shape[2]), method='bilinear')
return tf.image.resize_bilinear(source, (target_shape[1], target_shape[2]))
def compute_output_shape(self, input_shape):
if keras.backend.image_data_format() == 'channels_first':
return (input_shape[0][0], input_shape[0][1]) + input_shape[1][2:4]
else:
return (input_shape[0][0],) + input_shape[1][1:3] + (input_shape[0][-1],)
class PriorProbability(keras.initializers.Initializer): ###用于初始化分类网络的bias,论文 #中就是这样操作的
""" Apply a prior probability to the weights.
"""
def __init__(self, probability=0.01):
self.probability = probability
def get_config(self):
return {
'probability': self.probability
}
def __call__(self, shape, dtype=None):
# set bias to -log((1 - p)/p) for foreground
result = np.ones(shape, dtype=dtype) * -math.log((1 - self.probability) / self.probability)
return result
def default_classification_model( #共享分类网络
num_classes,
num_anchors,
pyramid_feature_size=256,
prior_probability=0.01,
classification_feature_size=256,
name='classification_submodel'
):
""" Creates the default regression submodel.
Args
num_classes : Number of classes to predict a score for at each feature level.
num_anchors : Number of anchors to predict classification scores for at each feature level.
pyramid_feature_size : The number of filters to expect from the feature pyramid levels.
classification_feature_size : The number of filters to use in the layers in the classification submodel.
name : The name of the submodel.
Returns
A keras.models.Model that predicts classes for each anchor.
"""
options = {
'kernel_size' : 3,
'strides' : 1,
'padding' : 'same',
}
if keras.backend.image_data_format() == 'channels_first':
inputs = keras.layers.Input(shape=(pyramid_feature_size, None, None))
else:
inputs = keras.layers.Input(shape=(None, None, pyramid_feature_size))
outputs = inputs
for i in range(4):
outputs = keras.layers.Conv2D(
filters=classification_feature_size,
activation='relu',
name='pyramid_classification_{}'.format(i),
kernel_initializer=keras.initializers.normal(mean=0.0, stddev=0.01, seed=None),
bias_initializer='zeros',
**options
)(outputs)
outputs = keras.layers.Conv2D(
filters=num_classes * num_anchors,
kernel_initializer=keras.initializers.normal(mean=0.0, stddev=0.01, seed=None),
bias_initializer=PriorProbability(probability=prior_probability),
name='pyramid_classification',
**options
)(outputs)
# reshape output and apply sigmoid
if keras.backend.image_data_format() == 'channels_first':
outputs = keras.layers.Permute((2, 3, 1), name='pyramid_classification_permute')(outputs)
outputs = keras.layers.Reshape((-1, num_classes), name='pyramid_classification_reshape')(outputs)
outputs = keras.layers.Activation('sigmoid', name='pyramid_classification_sigmoid')(outputs)
return keras.models.Model(inputs=inputs, outputs=outputs, name=name)
def default_regression_model(num_values, num_anchors, pyramid_feature_size=256, regression_feature_size=256, name='regression_submodel'):####共享回归网络
""" Creates the default regression submodel.
Args
num_values : Number of values to regress.
num_anchors : Number of anchors to regress for each feature level.
pyramid_feature_size : The number of filters to expect from the feature pyramid levels.
regression_feature_size : The number of filters to use in the layers in the regression submodel.
name : The name of the submodel.
Returns
A keras.models.Model that predicts regression values for each anchor.
"""
# All new conv layers except the final one in the
# RetinaNet (classification) subnets are initialized
# with bias b = 0 and a Gaussian weight fill with stddev = 0.01.
options = {
'kernel_size' : 3,
'strides' : 1,
'padding' : 'same',
'kernel_initializer' : keras.initializers.normal(mean=0.0, stddev=0.01, seed=None),
'bias_initializer' : 'zeros'
}
if keras.backend.image_data_format() == 'channels_first':
inputs = keras.layers.Input(shape=(pyramid_feature_size, None, None))
else:
inputs = keras.layers.Input(shape=(None, None, pyramid_feature_size))
outputs = inputs
for i in range(4):
outputs = keras.layers.Conv2D(
filters=regression_feature_size,
activation='relu',
name='pyramid_regression_{}'.format(i),
**options
)(outputs)
outputs = keras.layers.Conv2D(num_anchors * num_values, name='pyramid_regression', **options)(outputs)
if keras.backend.image_data_format() == 'channels_first':
outputs = keras.layers.Permute((2, 3, 1), name='pyramid_regression_permute')(outputs)
outputs = keras.layers.Reshape((-1, num_values), name='pyramid_regression_reshape')(outputs)
return keras.models.Model(inputs=inputs, outputs=outputs, name=name)
def default_submodels(num_classes=46, num_anchors=9):####包装分类和回归网络
""" Create a list of default submodels used for object detection.
The default submodels contains a regression submodel and a classification submodel.
Args
num_classes : Number of classes to use.
num_anchors : Number of base anchors.
Returns
A list of tuple, where the first element is the name of the submodel and the second element is the submodel itself.
"""
return [
('regression', default_regression_model(4, num_anchors)),
('classification', default_classification_model(num_classes, num_anchors))
]
def __build_model_pyramid(name, model, features):
""" Applies a single submodel to each FPN level.
Args
name : Name of the submodel.
model : The submodel to evaluate.
features : The FPN features.
Returns
A tensor containing the response from the submodel on the FPN features.
"""
return keras.layers.Concatenate(axis=1, name=name)([model(f) for f in features])
def my_retinanet():
# vgg_model = VGG16(input_shape=vgg_inputs,weights=None, include_top=None)
# layer_names = ["block3_pool", "block4_pool", "block5_pool"]
# C3,C4,C5 = [vgg_model.get_layer(name).output for name in layer_names]
feature_size = 256
x = Input(shape=(1024, 1024, 3))
normed = Lambda(lambda z: z/127.5 - 1., # Convert input feature range to [-1,1]
output_shape=(1024, 1024, 3),
name='lambda1')(x)
conv1_1 = Conv2D(32, (3, 3), name='conv1_1', strides=(1, 1), padding="same")(normed)
conv1_2 = Conv2D(32, (3, 3), name='conv1_2', strides=(1, 1), padding="same",activation='relu')(conv1_1)
pool1 = MaxPooling2D(pool_size=(2, 2), name='pool1')(conv1_2)
bn1 = BatchNormalization(axis=3, momentum=0.99, name='bn1')(pool1) # Tensorflow uses filter format [filter_height, filter_width, in_channels, out_channels], hence axis = 3
conv2_1 = Conv2D(64, (3, 3), name='conv2_1', strides=(1, 1), padding="same")(bn1)
conv2_2 = Conv2D(64, (3, 3), name='conv2_2', strides=(1, 1), padding="same",activation='relu')(conv2_1)
pool2 = MaxPooling2D(pool_size=(2, 2), name='pool2')(conv2_2)
bn2 = BatchNormalization(axis=3, momentum=0.99, name='bn2')(pool2) # Tensorflow uses filter format [filter_height, filter_width, in_channels, out_channels], hence axis = 3
conv3_1 = Conv2D(128, (1, 1), name='conv3_1', strides=(1, 1), padding="same")(bn2)
conv3_2 = Conv2D(256, (3, 3), name='conv3_2', strides=(1, 1), padding="same",activation='relu')(conv3_1)
pool3 = MaxPooling2D(pool_size=(2, 2), name='pool3')(conv3_2)
bn3 = BatchNormalization(axis=3, momentum=0.99, name='bn3')(pool3) # Tensorflow uses filter format [filter_height, filter_width, in_channels, out_channels], hence axis = 3
conv4_1 = Conv2D(128, (1, 1), name='conv4_1', strides=(1, 1), padding="same")(bn3)
conv4_2 = Conv2D(256, (3, 3), name='conv4_2', strides=(1, 1), padding="same",activation='relu')(conv4_1)
bn4 = BatchNormalization(axis=3, momentum=0.99, name='bn4')(conv4_2) # Tensorflow uses filter format [filter_height, filter_width, in_channels, out_channels], hence axis = 3
pool4 = MaxPooling2D(pool_size=(2, 2), name='pool4')(bn4)
conv5_1 = Conv2D(128, (1, 1), name='conv5_1', strides=(1, 1), padding="same")(pool4)
conv5_2 = Conv2D(256, (3, 3), name='conv5_2', strides=(1, 1), padding="same",activation='relu')(conv5_1)
bn5 = BatchNormalization(axis=3, momentum=0.99, name='bn5')(conv5_2) # Tensorflow uses filter format [filter_height, filter_width, in_channels, out_channels], hence axis = 3
pool5 = MaxPooling2D(pool_size=(2, 2), name='pool5')(bn5)
conv6_1 = Conv2D(128, (1, 1), name='conv6_1', strides=(1, 1), padding="same")(pool5)
conv6_2 = Conv2D(256, (3, 3), name='conv6_2', strides=(1, 1), padding="same",activation='relu')(conv6_1)
bn6 = BatchNormalization(axis=3, momentum=0.99, name='bn6')(conv6_2)
C3,C4,C5= bn4,bn5,bn6
P5 = keras.layers.Conv2D(feature_size, kernel_size=1, strides=1, padding='same', name='C5_reduced')(C5)
P5_upsampled = UpsampleLike(name='P5_upsampled')([P5, C4])
P5 = keras.layers.Conv2D(feature_size, kernel_size=3, strides=1, padding='same', name='P5')(P5)
# add P5 elementwise to C4
P4 = keras.layers.Conv2D(feature_size, kernel_size=1, strides=1, padding='same', name='C4_reduced')(C4)
P4 = keras.layers.Add(name='P4_merged')([P5_upsampled, P4])
P4_upsampled = UpsampleLike(name='P4_upsampled')([P4, C3])
P4 = keras.layers.Conv2D(feature_size, kernel_size=3, strides=1, padding='same', name='P4')(P4)
# add P4 elementwise to C3
P3 = keras.layers.Conv2D(feature_size, kernel_size=1, strides=1, padding='same', name='C3_reduced')(C3)
P3 = keras.layers.Add(name='P3_merged')([P4_upsampled, P3])
P3 = keras.layers.Conv2D(feature_size, kernel_size=3, strides=1, padding='same', name='P3')(P3)
# "P6 is obtained via a 3x3 stride-2 conv on C5"
P6 = keras.layers.Conv2D(feature_size, kernel_size=3, strides=2, padding='same', name='P6')(C5)
# "P7 is computed by applying ReLU followed by a 3x3 stride-2 conv on P6"
P7 = keras.layers.Activation('relu', name='C6_relu')(P6)
P7 = keras.layers.Conv2D(feature_size, kernel_size=3, strides=2, padding='same', name='P7')(P7)
features = [P3,P4,P5,P6,P7]
submodels = default_submodels(num_classes=45, num_anchors=5)
outputs = [__build_model_pyramid(n, m, features) for n, m in submodels]
model = Model(x,outputs)
predictor_sizes = np.array([P3._keras_shape[1:3],
P4._keras_shape[1:3],
P5._keras_shape[1:3],
P6._keras_shape[1:3],
P7._keras_shape[1:3]])
return model,predictor_sizes
if __name__ == '__main__':
model ,predictor_size = my_retinanet()
model.summary()
print predictor_size