1. 简介
在目标检测中,为了解决类别不平衡的问题,通常使用 F o c a l L o s s {\rm Focal\ Loss} Focal Loss;在回归分支通常使用 I o U L o s s {\rm IoU\ Loss} IoU Loss;为了解决 F C O S {\rm FCOS} FCOS的问题,本文还将介绍另外一种常用的损失函数 C r o s s E n t r o p y L o s s {\rm CrossEntropyLoss} CrossEntropyLoss。在 M M D e t e c t i o n {\rm MMDetection} MMDetection中的损失函数定义位置为链接。
2. Focal Loss
链接, F o c a l L o s s {\rm Focal\ Loss} Focal Loss来自于 R e t i n a N e t {\rm RetinaNet} RetinaNet,其提出的初衷是为了缓解目标检测中前景和背景数量不平衡的问题。这里不说明公式的推导,只给出最后的结果。 L F L = 1 N ∑ i − [ α 1 ⋅ y i ⋅ ( 1 − p i ) γ ⋅ log ( p i ) + α 2 ⋅ ( 1 − y i ) ⋅ p i γ ⋅ log ( 1 − p i ) ] L_{FL}=\frac{1}{N}\sum_i-\left[\alpha_1\cdot y_i\cdot(1-p_i)^{\gamma}\cdot\log(p_i)+\alpha_2\cdot (1-y_i)\cdot p_i^{\gamma}\cdot\log(1-p_i)\right] LFL=N1i∑−[α1⋅yi⋅(1−pi)γ⋅log(pi)+α2⋅(1−yi)⋅piγ⋅log(1−pi)]
其中,上式中的 i i i表示参与损失计算的样本, N N N表示样本总数。上式可以简写为: F L ( p t ) = − α t ( 1 − p t ) γ log ( p t ) (1) {\rm FL}(p_t)=-\alpha_t(1-p_t)^{\gamma}\log(p_t)\tag{1} FL(pt)=−αt(1−pt)γlog(pt)(1)
上式即是 F o c a l L o s s {\rm Focal\ Loss} Focal Loss的最终形式,在 M M D e t e c t i o n {\rm MMDetection} MMDetection中的实现代码如下(具体实现使用 C + + {\rm C++} C++和 C U D A {\rm CUDA} CUDA):
def sigmoid_focal_loss(pred, target, weight=None, gamma=2.0, alpha=0.25, reduction='mean', avg_factor=None):
# pred.shape=(N,C),target.shape=(N,1)
# weight表示此项损失的权重大小
# gamma和alpha为FocalLoss中的指数参数
# reduction表示将损失之转化成标量的方式,有none、sum和mean;avg_factor表示归一化因子
# _sigmoid_focal_loss的具体实现用C++和CUDA编写
loss = _sigmoid_focal_loss(pred, target, gamma, alpha, None, 'none')
if weight is not None:
if weight.shape != loss.shape:
if weight.size(0) == loss.size(0):
# weight.shape=(num_priors,) => (num_priors,1)
weight = weight.view(-1, 1)
else:
assert weight.numel() == loss.numel()
weight = weight.view(loss.size(0), -1)
assert weight.ndim == loss.ndim
# 为损失添加权重和后处理
loss = weight_reduce_loss(loss, weight, reduction, avg_factor)
return loss
@LOSSES.register_module()
class FocalLoss(nn.Module):
def __init__(self, use_sigmoid=True, gamma=2.0, alpha=0.25, reduction='mean', loss_weight=1.0):
# use_sigmoid:是否对输出结果使用sigmoid函数,即归一化到[0,1]内
super(FocalLoss, self).__init__()
assert use_sigmoid is True, 'Only sigmoid focal loss supported now.'
self.use_sigmoid = use_sigmoid
self.gamma = gamma
self.alpha = alpha
self.reduction = reduction
self.loss_weight = loss_weight
def forward(self, pred, target, weight=None, avg_factor=None, reduction_override=None):
assert reduction_override in (None, 'none', 'mean', 'sum')
reduction = (reduction_override if reduction_override else self.reduction)
if self.use_sigmoid:
loss_cls = self.loss_weight * sigmoid_focal_loss(
pred,
target,
weight,
gamma=self.gamma,
alpha=self.alpha,
reduction=reduction,
avg_factor=avg_factor)
else:
raise NotImplementedError
# 返回分类损失值
return loss_cls
def weight_reduce_loss(loss, weight=None, reduction='mean', avg_factor=None):
# 为损失添加权重和后处理
if weight is not None:
loss = loss * weight
if avg_factor is None:
loss = reduce_loss(loss, reduction)
else:
if reduction == 'mean':
loss = loss.sum() / avg_factor
elif reduction != 'none':
raise ValueError('avg_factor can not be used with reduction="sum"')
return loss
3. IoU Loss
链接,在目标检测中曾根据预测边界框与真实框之间的距离来优化预测的边界框,但后来发现其实预测边界框具有整体相关性,即不能简单地使用边或者点的距离来衡量预测结果。由此诞生了
I
o
U
L
o
s
s
{\rm IoU\ Loss}
IoU Loss,其数学公式如下:
L
I
o
U
=
−
ln
I
n
t
e
r
s
e
c
t
i
o
n
(
A
,
B
)
U
n
i
o
n
(
A
,
B
)
(2)
L_{IoU}=-\ln\frac{\rm Intersection(A,B)}{\rm Union(A,B)}\tag{2}
LIoU=−lnUnion(A,B)Intersection(A,B)(2)
@weighted_loss # 使用weighted_loss装饰器,可以在调用的时候指定该项损失的权重
def iou_loss(pred, target, eps=1e-6):
# 求交并比大小,最后一项clamp(min=eps)防止交并比值为零造成对数的真数部分为零而出现错误
ious = bbox_overlaps(pred, target, is_aligned=True).clamp(min=eps)
# 根据公式(2)计算IoU Loss
loss = -ious.log()
return loss
@LOSSES.register_module()
class IoULoss(nn.Module):
def __init__(self, eps=1e-6, reduction='mean', loss_weight=1.0):
super(IoULoss, self).__init__()
self.eps = eps
self.reduction = reduction # 转换方式
self.loss_weight = loss_weight # 该项损失的权重系数
def forward(self, pred, target, weight=None, avg_factor=None, reduction_override=None, **kwargs):
# pred为预测的边界框信息,shape=(N,4)
# target为回归目标,shape=(N,4)
assert reduction_override in (None, 'none', 'mean', 'sum')
reduction = (reduction_override if reduction_override else self.reduction)
if (weight is not None) and (not torch.any(weight > 0)) and (reduction != 'none'):
return (pred * weight).sum()
if weight is not None and weight.dim() > 1:
assert weight.shape == pred.shape
weight = weight.mean(-1)
# 调用函数计算损失
loss = self.loss_weight * iou_loss(
pred,
target,
weight,
eps=self.eps,
reduction=reduction,
avg_factor=avg_factor,
**kwargs)
# 返回损失值
return loss
4. CrossEntropyLoss
链接,交叉熵用来判断实际输出与期望输出的接近程度,刻画的是实际输出与期望输出的距离。当交叉熵值越小时,两个输出越接近。这里不做公式的具体推导,其形式如下: L = − ∑ x ( p ( x ) log q ( x ) + ( 1 − p ( x ) ) log ( 1 − q ( x ) ) ) (3) L=-\sum_x\left(p(x)\log q(x)+(1-p(x))\log(1-q(x))\right)\tag{3} L=−x∑(p(x)logq(x)+(1−p(x))log(1−q(x)))(3)
def cross_entropy(pred, label, weight=None, reduction='mean', avg_factor=None, class_weight=None):
# pred.shape=(N,C), label.shape=(N,1)
# 使用Pytorch内置的交叉熵损失函数
loss = F.cross_entropy(pred, label, weight=class_weight, reduction='none')
if weight is not None:
weight = weight.float()
loss = weight_reduce_loss(loss, weight=weight, reduction=reduction, avg_factor=avg_factor)
# 返回损失值
return loss
def binary_cross_entropy(pred, label, weight=None, reduction='mean', avg_factor=None, class_weight=None):
if pred.dim() != label.dim():
label, weight = _expand_onehot_labels(label, weight, pred.size(-1))
if weight is not None:
weight = weight.float()
loss = F.binary_cross_entropy_with_logits(pred, label.float(), pos_weight=class_weight, reduction='none')
# 转换为标量的形式
loss = weight_reduce_loss(loss, weight, reduction=reduction, avg_factor=avg_factor)
return loss
def mask_cross_entropy(pred, target, label, reduction='mean', avg_factor=None, class_weight=None):
# TODO: handle these two reserved arguments
assert reduction == 'mean' and avg_factor is None
num_rois = pred.size()[0]
inds = torch.arange(0, num_rois, dtype=torch.long, device=pred.device)
pred_slice = pred[inds, label].squeeze(1)
return F.binary_cross_entropy_with_logits(
pred_slice, target, weight=class_weight, reduction='mean')[None]
@LOSSES.register_module()
class CrossEntropyLoss(nn.Module):
def __init__(self, use_sigmoid=False, use_mask=False, reduction='mean', class_weight=None, loss_weight=1.0):
super(CrossEntropyLoss, self).__init__()
assert (use_sigmoid is False) or (use_mask is False)
self.use_sigmoid = use_sigmoid # 是否使用sigmoid
self.use_mask = use_mask # 是否使用Mask CrossEntropyLoss,本质是二元交叉熵损失
self.reduction = reduction # 后处理方式
self.loss_weight = loss_weight # 该项损失的权重
self.class_weight = class_weight # 每个类别的权重
# 二元交叉熵熵失
if self.use_sigmoid:
self.cls_criterion = binary_cross_entropy
# mask交叉熵损失
elif self.use_mask:
self.cls_criterion = mask_cross_entropy
# 多元交叉熵损失
else:
self.cls_criterion = cross_entropy
def forward(self, cls_score, label, weight=None, avg_factor=None, reduction_override=None, **kwargs):
assert reduction_override in (None, 'none', 'mean', 'sum')
reduction = (reduction_override if reduction_override else self.reduction)
# 为每个类别赋予权重
if self.class_weight is not None:
class_weight = cls_score.new_tensor(self.class_weight)
else:
class_weight = None
loss_cls = self.loss_weight * self.cls_criterion(
cls_score,
label,
weight,
class_weight=class_weight,
reduction=reduction,
avg_factor=avg_factor,
**kwargs)
# 返回损失值
return loss_cls
参考
- https://github.com/open-mmlab/mmdetection.