本章继续分析Efficientdet源码的loss计算部分。
loss计算部分,是整个训练过程的关键所在。正确的计算能让训练加快收敛,与提高模型精度。
训练过程包括:
- 素材预处理,将标签转换成模型输出对应形状。
- 传入模型进行前向传播,计算出预测结果。
- 将预测结果与实际结果进行loss计算。
- 根据梯度将loss进行反向传播,更新各层的权重变量。
要计算loss,首先,从标签预处理开始。标签处理代码在efficientdet\dataloader.py文件,InputReader类中实现。官方源码因参数太多,看起来很复杂,实际处理逻辑不复杂,很容易复现,这里直接文字描述。
预处理关键点如下:
- 预处理维度与模型输出维度一样,class输出维度为[batch,height,width,anchor*(classes+1)],box输出维度为。最后的维度anchor*(classes+1),是候选框(anchor)数量*(类别数+背景),背景默认class_id为0。
- 候选框是预先设置的一系列不同大小形状的框,该模型每层特征为9个,yolo是3个。
- 将特征层的每个特征点作为候选框中心,与实际物体框计算IOU。在IOU大于0.5的特征点位置,填上物体的ID。
- 如某个特征点有两个或以上IOU大于0.5的物体框,则选IOU最大的。
- 物体类别ID从1~N,背景为0。
- 直到所有特征点匹配完对应的物体ID,box标签已处理则是填入[y1, x1, y2, x2]转[ty, tx, th, tw]的值,标签预处理结束。
box标签[y1, x1, y2, x2]转[ty, tx, th, tw],关键代码如下:
def _encode(self, boxes, anchors):
"""Encode a box collection with respect to anchor collection.
Args:
boxes: BoxList holding N boxes to be encoded.
anchors: BoxList of anchors.
Returns:
a tensor representing N anchor-encoded boxes of the format
[ty, tx, th, tw].
"""
# Convert anchors to the center coordinate representation.
ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
ycenter, xcenter, h, w = boxes.get_center_coordinates_and_sizes()
# Avoid NaN in division and log below.
# 防止除0
ha = tf.maximum(EPSILON, ha)
wa = tf.maximum(EPSILON, wa)
h = tf.maximum(EPSILON, h)
w = tf.maximum(EPSILON, w)
tx = (xcenter - xcenter_a) / wa
ty = (ycenter - ycenter_a) / ha
tw = tf.log(w / wa)
th = tf.log(h / ha)
# Scales location targets as used in paper for joint training.
if self._scale_factors:
ty *= self._scale_factors[0]
tx *= self._scale_factors[1]
th *= self._scale_factors[2]
tw *= self._scale_factors[3]
return tf.transpose(tf.stack([ty, tx, th, tw]))
def _decode(self, rel_codes, anchors):
"""Decode relative codes to boxes.
Args:
rel_codes: a tensor representing N anchor-encoded boxes.
anchors: BoxList of anchors.
Returns:
boxes: BoxList holding N bounding boxes.
"""
ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
ty, tx, th, tw = tf.unstack(tf.transpose(rel_codes))
if self._scale_factors:
ty /= self._scale_factors[0]
tx /= self._scale_factors[1]
th /= self._scale_factors[2]
tw /= self._scale_factors[3]
w = tf.exp(tw) * wa
h = tf.exp(th) * ha
ycenter = ty * ha + ycenter_a
xcenter = tx * wa + xcenter_a
ymin = ycenter - h / 2.
xmin = xcenter - w / 2.
ymax = ycenter + h / 2.
xmax = xcenter + w / 2.
return box_list.BoxList(tf.transpose(tf.stack([ymin, xmin, ymax, xmax])))
接着就是loss计算部分。
loss计算只在训练过程使用到,训练代码入口点位于efficientdet\keras\train.py,模型中只使用了box_loss和class_loss,代码如下:
model = train_lib.EfficientDetNetTrain(params['model_name'], config)
height, width = utils.parse_image_size(params['image_size'])
model.build((params['batch_size'], height, width, 3))
model.compile(
optimizer=train_lib.get_optimizer(params),
loss={
'box_loss':
train_lib.BoxLoss(
params['delta'], reduction=tf.keras.losses.Reduction.NONE),
'box_iou_loss':
train_lib.BoxIouLoss(
params['iou_loss_type'],
params['min_level'],
params['max_level'],
params['num_scales'],
params['aspect_ratios'],
params['anchor_scale'],
params['image_size'],
reduction=tf.keras.losses.Reduction.NONE),
'class_loss':
train_lib.FocalLoss(
params['alpha'],
params['gamma'],
label_smoothing=params['label_smoothing'],
reduction=tf.keras.losses.Reduction.NONE),
'seg_loss':
tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
})
box_loss实现代码如下:
class BoxLoss(tf.keras.losses.Loss):
"""L2 box regression loss."""
def __init__(self, delta=0.1, **kwargs):
"""Initialize box loss.
Args:
delta: `float`, the point where the huber loss function changes from a
quadratic to linear. It is typically around the mean value of regression
target. For instances, the regression targets of 512x512 input with 6
anchors on P3-P7 pyramid is about [0.1, 0.1, 0.2, 0.2].
**kwargs: other params.
"""
super().__init__(**kwargs)
self.huber = tf.keras.losses.Huber(
delta, reduction=tf.keras.losses.Reduction.NONE)
@tf.autograph.experimental.do_not_convert
def call(self, y_true, box_outputs):
num_positives, box_targets = y_true
normalizer = num_positives * 4.0
mask = tf.cast(box_targets != 0.0, tf.float32)
box_targets = tf.expand_dims(box_targets, axis=-1)
box_outputs = tf.expand_dims(box_outputs, axis=-1)
box_loss = self.huber(box_targets, box_outputs) * mask
box_loss = tf.reduce_sum(box_loss)
box_loss /= normalizer
return box_loss
class_loss使用FocalLoss,解决正负样本极不均匀的问题,实现代码如下:
class FocalLoss(tf.keras.losses.Loss):
"""Compute the focal loss between `logits` and the golden `target` values.
Focal loss = -(1-pt)^gamma * log(pt)
where pt is the probability of being classified to the true class.
"""
def __init__(self, alpha, gamma, label_smoothing=0.0, **kwargs):
"""Initialize focal loss.
Args:
alpha: A float32 scalar multiplying alpha to the loss from positive
examples and (1-alpha) to the loss from negative examples.
gamma: A float32 scalar modulating loss from hard and easy examples.
label_smoothing: Float in [0, 1]. If > `0` then smooth the labels.
**kwargs: other params.
"""
super().__init__(**kwargs)
self.alpha = alpha
self.gamma = gamma
self.label_smoothing = label_smoothing
@tf.autograph.experimental.do_not_convert
def call(self, y, y_pred):
"""Compute focal loss for y and y_pred.
Args:
y: A tuple of (normalizer, y_true), where y_true is the target class.
y_pred: A float32 tensor [batch, height_in, width_in, num_predictions].
Returns:
the focal loss.
"""
normalizer, y_true = y
alpha = tf.convert_to_tensor(self.alpha, dtype=y_pred.dtype)
gamma = tf.convert_to_tensor(self.gamma, dtype=y_pred.dtype)
# compute focal loss multipliers before label smoothing, such that it will
# not blow up the loss.
pred_prob = tf.sigmoid(y_pred)
p_t = (y_true * pred_prob) + ((1 - y_true) * (1 - pred_prob))
alpha_factor = y_true * alpha + (1 - y_true) * (1 - alpha)
modulating_factor = (1.0 - p_t)**gamma
# apply label smoothing for cross_entropy for each entry.
y_true = y_true * (1.0 - self.label_smoothing) + 0.5 * self.label_smoothing
ce = tf.nn.sigmoid_cross_entropy_with_logits(labels=y_true, logits=y_pred)
# compute the final loss and return
return alpha_factor * modulating_factor * ce / normalizer
整个loss计算代码,位于efficientdet\keras\train_lib.py,EfficientDetNetTrain类,_detection_loss方法,具体代码如下:
class EfficientDetNetTrain(efficientdet_keras.EfficientDetNet):
"""A customized trainer for EfficientDet.
see https://www.tensorflow.org/guide/keras/customizing_what_happens_in_fit
"""
def _freeze_vars(self):
'''忽略权重'''
# var_freeze_expr: None
if self.config.var_freeze_expr:
return [
v for v in self.trainable_variables
if not re.match(self.config.var_freeze_expr, v.name)
]
return self.trainable_variables
def _reg_l2_loss(self, weight_decay, regex=r'.*(kernel|weight):0$'):
"""Return regularization l2 loss loss."""
var_match = re.compile(regex)
return weight_decay * tf.add_n([
tf.nn.l2_loss(v)
for v in self.trainable_variables
if var_match.match(v.name)
])
def _detection_loss(self, cls_outputs, box_outputs, labels, loss_vals):
"""Computes total detection loss.
Computes total detection loss including box and class loss from all levels.
Args:
cls_outputs: an OrderDict with keys representing levels and values
representing logits in [batch_size, height, width, num_anchors].
box_outputs: an OrderDict with keys representing levels and values
representing box regression targets in [batch_size, height, width,
num_anchors * 4].
labels: the dictionary that returned from dataloader that includes
groundtruth targets.
loss_vals: A dict of loss values.
Returns:
total_loss: an integer tensor representing total loss reducing from
class and box losses from all levels.
cls_loss: an integer tensor representing total class loss.
box_loss: an integer tensor representing total box regression loss.
box_iou_loss: an integer tensor representing total box iou loss.
"""
# convert to float32 for loss computing.
cls_outputs = [tf.cast(i, tf.float32) for i in cls_outputs]
box_outputs = [tf.cast(i, tf.float32) for i in box_outputs]
# Sum all positives in a batch for normalization and avoid zero
# num_positives_sum, which would lead to inf loss during training
# mean_num_positives:num_positives的批量均值
# 所有批量的总非背景特征数
num_positives_sum = tf.reduce_sum(labels['mean_num_positives']) + 1.0
levels = range(len(cls_outputs))
cls_losses = []
box_losses = []
for level in levels:
# Onehot encoding for classification labels.
cls_targets_at_level = tf.one_hot(labels['cls_targets_%d' % (level + 3)],
self.config.num_classes)
if self.config.data_format == 'channels_first':
bs, _, width, height, _ = cls_targets_at_level.get_shape().as_list()
cls_targets_at_level = tf.reshape(cls_targets_at_level,
[bs, -1, width, height])
else:
bs, width, height, _, _ = cls_targets_at_level.get_shape().as_list()
cls_targets_at_level = tf.reshape(cls_targets_at_level,
[bs, width, height, -1])
box_targets_at_level = labels['box_targets_%d' % (level + 3)]
# 分类loss计算
class_loss_layer = self.loss.get('class_loss', None)
if class_loss_layer:
cls_loss = class_loss_layer([num_positives_sum, cls_targets_at_level],
cls_outputs[level])
if self.config.data_format == 'channels_first':
cls_loss = tf.reshape(
cls_loss, [bs, -1, width, height, self.config.num_classes])
else:
cls_loss = tf.reshape(
cls_loss, [bs, width, height, -1, self.config.num_classes])
# 只计算非中间值loss
cls_loss *= tf.cast(
tf.expand_dims(
tf.not_equal(labels['cls_targets_%d' % (level + 3)], -2), -1),
tf.float32)
cls_losses.append(tf.reduce_sum(cls_loss))
# bos loss计算
# box_loss_weight: 50
if self.config.box_loss_weight and self.loss.get('box_loss', None):
box_loss_layer = self.loss['box_loss']
box_losses.append(
box_loss_layer([num_positives_sum, box_targets_at_level],
box_outputs[level]))
# iou_loss_type: None
if self.config.iou_loss_type:
box_outputs = tf.concat([tf.reshape(v, [-1, 4]) for v in box_outputs],
axis=0)
box_targets = tf.concat([
tf.reshape(labels['box_targets_%d' % (level + 3)], [-1, 4])
for level in levels
],
axis=0)
box_iou_loss_layer = self.loss['box_iou_loss']
box_iou_loss = box_iou_loss_layer([num_positives_sum, box_targets],
box_outputs)
loss_vals['box_iou_loss'] = box_iou_loss
else:
box_iou_loss = 0
cls_loss = tf.add_n(cls_losses) if cls_losses else 0
box_loss = tf.add_n(box_losses) if box_losses else 0
total_loss = (
cls_loss + self.config.box_loss_weight * box_loss +
self.config.iou_loss_weight * box_iou_loss)
loss_vals['det_loss'] = total_loss
loss_vals['cls_loss'] = cls_loss
loss_vals['box_loss'] = box_loss
return total_loss
def train_step(self, data):
"""Train step.
Args:
data: Tuple of (images, labels). Image tensor with shape [batch_size,
height, width, 3]. The height and width are fixed and equal.Input labels
in a dictionary. The labels include class targets and box targets which
are dense label maps. The labels are generated from get_input_fn
function in data/dataloader.py.
Returns:
A dict record loss info.
"""
images, labels = data
with tf.GradientTape() as tape:
if len(self.config.heads) == 2:
cls_outputs, box_outputs, seg_outputs = self(images, training=True)
elif 'object_detection' in self.config.heads:
# 目标检测
cls_outputs, box_outputs = self(images, training=True)
elif 'segmentation' in self.config.heads:
seg_outputs, = self(images, training=True)
reg_l2loss = self._reg_l2_loss(self.config.weight_decay)
total_loss = reg_l2loss
loss_vals = {}
if 'object_detection' in self.config.heads:
# 目标检测loss计算
det_loss = self._detection_loss(cls_outputs, box_outputs, labels,
loss_vals)
total_loss += det_loss
if 'segmentation' in self.config.heads:
seg_loss_layer = self.loss['seg_loss']
seg_loss = seg_loss_layer(labels['image_masks'], seg_outputs)
total_loss += seg_loss
loss_vals['seg_loss'] = seg_loss
if isinstance(self.optimizer,
tf.keras.mixed_precision.experimental.LossScaleOptimizer):
scaled_loss = self.optimizer.get_scaled_loss(total_loss)
else:
scaled_loss = total_loss
loss_vals['loss'] = total_loss
trainable_vars = self._freeze_vars()
scaled_gradients = tape.gradient(scaled_loss, trainable_vars)
if isinstance(self.optimizer,
tf.keras.mixed_precision.experimental.LossScaleOptimizer):
gradients = self.optimizer.get_unscaled_gradients(scaled_gradients)
else:
gradients = scaled_gradients
if self.config.clip_gradients_norm > 0:
# tf.clip_by_global_norm,将梯度限制在10.0内,防止loss异常导致梯度爆炸
gradients, gnorm = tf.clip_by_global_norm(gradients,
self.config.clip_gradients_norm)
loss_vals['gnorm'] = gnorm
self.optimizer.apply_gradients(zip(gradients, trainable_vars))
return loss_vals
def test_step(self, data):
"""Test step.
Args:
data: Tuple of (images, labels). Image tensor with shape [batch_size,
height, width, 3]. The height and width are fixed and equal.Input labels
in a dictionary. The labels include class targets and box targets which
are dense label maps. The labels are generated from get_input_fn
function in data/dataloader.py.
Returns:
A dict record loss info.
"""
images, labels = data
if len(self.config.heads) == 2:
cls_outputs, box_outputs, seg_outputs = self(images, training=True)
elif 'object_detection' in self.config.heads:
cls_outputs, box_outputs = self(images, training=True)
elif 'segmentation' in self.config.heads:
seg_outputs, = self(images, training=True)
reg_l2loss = self._reg_l2_loss(self.config.weight_decay)
total_loss = reg_l2loss
loss_vals = {}
if 'object_detection' in self.config.heads:
det_loss = self._detection_loss(cls_outputs, box_outputs, labels,
loss_vals)
total_loss += det_loss
if 'segmentation' in self.config.heads:
seg_loss_layer = self.loss['seg_loss']
seg_loss = seg_loss_layer(labels['image_masks'], seg_outputs)
total_loss += seg_loss
loss_vals['seg_loss'] = seg_loss
loss_vals['loss'] = total_loss
return loss_vals
到此本章内容结束,下一章,Optimizer梯度计算分析,敬请关注!!!