本章继续分析Efficientdet模型,Box与Class回归网络部分。
Box与Class实现代码一样,共5层共用一个卷积层,然后链接单独的BN,接激活函数swish,drop_connect方法用于drop部分卷积。Box与Class主要区别是在最后一层通道数不同,Box通道数为num_anchors*4,Class通道数为num_anchors*(num_classes+1),这里的1为背景。
结构图如下:
代码如下:
class ClassNet(tf.keras.layers.Layer):
"""Object class prediction network."""
def __init__(self,
num_classes=90,
num_anchors=9,
num_filters=32,
min_level=3,
max_level=7,
is_training_bn=False,
act_type='swish',
repeats=4,
separable_conv=True,
survival_prob=None,
strategy=None,
data_format='channels_last',
name='class_net',
**kwargs):
"""Initialize the ClassNet.
Args:
num_classes: number of classes.
num_anchors: number of anchors.
num_filters: number of filters for "intermediate" layers.
min_level: minimum level for features.
max_level: maximum level for features.
is_training_bn: True if we train the BatchNorm.
act_type: String of the activation used.
repeats: number of intermediate layers.
separable_conv: True to use separable_conv instead of conv2D.
survival_prob: if a value is set then drop connect will be used.
strategy: string to specify training strategy for TPU/GPU/CPU.
data_format: string of 'channel_first' or 'channels_last'.
name: the name of this layerl.
**kwargs: other parameters.
"""
super().__init__(name=name, **kwargs)
self.num_classes = num_classes
self.num_anchors = num_anchors
self.num_filters = num_filters
self.min_level = min_level
self.max_level = max_level
self.repeats = repeats
self.separable_conv = separable_conv
self.is_training_bn = is_training_bn
self.survival_prob = survival_prob
self.act_type = act_type
self.strategy = strategy
self.data_format = data_format
self.conv_ops = []
self.bns = []
if separable_conv:
conv2d_layer = functools.partial(
tf.keras.layers.SeparableConv2D,
depth_multiplier=1,
data_format=data_format,
pointwise_initializer=tf.initializers.VarianceScaling(),
depthwise_initializer=tf.initializers.VarianceScaling())
else:
conv2d_layer = functools.partial(
tf.keras.layers.Conv2D,
data_format=data_format,
kernel_initializer=tf.random_normal_initializer(stddev=0.01))
for i in range(self.repeats):
# If using SeparableConv2D
self.conv_ops.append(
conv2d_layer(
self.num_filters,
kernel_size=3,
bias_initializer=tf.zeros_initializer(),
activation=None,
padding='same',
name='class-%d' % i))
bn_per_level = []
for level in range(self.min_level, self.max_level + 1):
bn_per_level.append(
util_keras.build_batch_norm(
is_training_bn=self.is_training_bn,
strategy=self.strategy,
data_format=self.data_format,
name='class-%d-bn-%d' % (i, level),
))
self.bns.append(bn_per_level)
self.classes = conv2d_layer(
num_classes * num_anchors,
kernel_size=3,
bias_initializer=tf.constant_initializer(-np.log((1 - 0.01) / 0.01)),
padding='same',
name='class-predict')
def call(self, inputs, training, **kwargs):
"""Call ClassNet."""
class_outputs = []
for level_id in range(0, self.max_level - self.min_level + 1):
image = inputs[level_id]
for i in range(self.repeats):
# 这里有跳层
original_image = image
# 所有层经过同一个卷积
image = self.conv_ops[i](image)
# 每层单独对应BN
image = self.bns[i][level_id](image, training=training)
if self.act_type:
image = utils.activation_fn(image, self.act_type)
if i > 0 and self.survival_prob:
# 多于1层时,drop掉后面几层
image = utils.drop_connect(image, training, self.survival_prob)
image = image + original_image
# 最后输出结果
class_outputs.append(self.classes(image))
return class_outputs
class BoxNet(tf.keras.layers.Layer):
"""Box regression network."""
def __init__(self,
num_anchors=9,
num_filters=32,
min_level=3,
max_level=7,
is_training_bn=False,
act_type='swish',
repeats=4,
separable_conv=True,
survival_prob=None,
strategy=None,
data_format='channels_last',
name='box_net',
**kwargs):
"""Initialize BoxNet.
Args:
num_anchors: number of anchors used.
num_filters: number of filters for "intermediate" layers.
min_level: minimum level for features.
max_level: maximum level for features.
is_training_bn: True if we train the BatchNorm.
act_type: String of the activation used.
repeats: number of "intermediate" layers.
separable_conv: True to use separable_conv instead of conv2D.
survival_prob: if a value is set then drop connect will be used.
strategy: string to specify training strategy for TPU/GPU/CPU.
data_format: string of 'channel_first' or 'channels_last'.
name: Name of the layer.
**kwargs: other parameters.
"""
super().__init__(name=name, **kwargs)
self.num_anchors = num_anchors
self.num_filters = num_filters
self.min_level = min_level
self.max_level = max_level
self.repeats = repeats
self.separable_conv = separable_conv
self.is_training_bn = is_training_bn
self.survival_prob = survival_prob
self.act_type = act_type
self.strategy = strategy
self.data_format = data_format
self.conv_ops = []
self.bns = []
for i in range(self.repeats):
# If using SeparableConv2D
if self.separable_conv:
self.conv_ops.append(
tf.keras.layers.SeparableConv2D(
filters=self.num_filters,
depth_multiplier=1,
pointwise_initializer=tf.initializers.VarianceScaling(),
depthwise_initializer=tf.initializers.VarianceScaling(),
data_format=self.data_format,
kernel_size=3,
activation=None,
bias_initializer=tf.zeros_initializer(),
padding='same',
name='box-%d' % i))
# If using Conv2d
else:
self.conv_ops.append(
tf.keras.layers.Conv2D(
filters=self.num_filters,
kernel_initializer=tf.random_normal_initializer(stddev=0.01),
data_format=self.data_format,
kernel_size=3,
activation=None,
bias_initializer=tf.zeros_initializer(),
padding='same',
name='box-%d' % i))
bn_per_level = []
for level in range(self.min_level, self.max_level + 1):
bn_per_level.append(
util_keras.build_batch_norm(
is_training_bn=self.is_training_bn,
strategy=self.strategy,
data_format=self.data_format,
name='box-%d-bn-%d' % (i, level)))
self.bns.append(bn_per_level)
if self.separable_conv:
self.boxes = tf.keras.layers.SeparableConv2D(
filters=4 * self.num_anchors,
depth_multiplier=1,
pointwise_initializer=tf.initializers.VarianceScaling(),
depthwise_initializer=tf.initializers.VarianceScaling(),
data_format=self.data_format,
kernel_size=3,
activation=None,
bias_initializer=tf.zeros_initializer(),
padding='same',
name='box-predict')
else:
self.boxes = tf.keras.layers.Conv2D(
filters=4 * self.num_anchors,
kernel_initializer=tf.random_normal_initializer(stddev=0.01),
data_format=self.data_format,
kernel_size=3,
activation=None,
bias_initializer=tf.zeros_initializer(),
padding='same',
name='box-predict')
def call(self, inputs, training):
"""Call boxnet."""
# 和分类网络一样
box_outputs = []
for level_id in range(0, self.max_level - self.min_level + 1):
image = inputs[level_id]
for i in range(self.repeats):
# 这里有跳层
original_image = image
# 所有层经过同一个卷积
image = self.conv_ops[i](image)
# 每层单独对应BN
image = self.bns[i][level_id](image, training=training)
if self.act_type:
image = utils.activation_fn(image, self.act_type)
if i > 0 and self.survival_prob:
# 多于1层时,drop掉后面几层
image = utils.drop_connect(image, training, self.survival_prob)
image = image + original_image
box_outputs.append(self.boxes(image))
return box_outputs
到此本章内容结束,下一章,Loss计算细节分析,敬请关注!!!