retinanet loss 不同于普通的多分类交叉熵损失,它的分类模型之后接的是sigmoid激活函数
class ClassificationModel(nn.Module):
def __init__(self, num_features_in, num_anchors=9, num_classes=80, prior=0.01, feature_size=256):
super(ClassificationModel, self).__init__()
self.num_classes = num_classes
self.num_anchors = num_anchors
self.conv1 = nn.Conv2d(num_features_in, feature_size, kernel_size=3, padding=1)
self.act1 = nn.ReLU()
self.conv2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
self.act2 = nn.ReLU()
self.conv3 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
self.act3 = nn.ReLU()
self.conv4 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
self.act4 = nn.ReLU()
self.output = nn.Conv2d(feature_size, num_anchors*num_classes, kernel_size=3, padding=1)
self.output_act = nn.Sigmoid()
假设有5类,得到结果为[num_anchor, 5] 背景没有占类别,对于每个点来说 都单独用二分类交叉熵计算
比如[0.6,0.8,0.6],[0,1,0]
计算公式为-0.75*(0.4**2)*log0.6 - 0.25*(0.2**2)*log0.8-0.75*(0.4**2)*log0.6
class FocalLoss(nn.Module):
def forward(self, classifications, regressions, anchors, annotations):
alpha = 0.25
gamma = 2.0
batch_size = classifications.shape[0]
classification_losses = []
regression_losses = []
anchor = anchors[0, :, :]
anchor_widths = anchor[:, 2] - anchor[:, 0]
anchor_heights = anchor[:, 3] - anchor[:, 1]
anchor_ctr_x = anchor[:, 0] + 0.5 * anchor_widths
anchor_ctr_y = anchor[:, 1] + 0.5 * anchor_heights
for j in range(batch_size):
classification = classifications[j, :, :]
regression = regressions[j, :, :]
bbox_annotation = annotations[j, :, :]
bbox_annotation = bbox_annotation[bbox_annotation[:, 4] != -1]
if bbox_annotation.shape[0] == 0:
regression_losses.append(torch.tensor(0).float().cuda())
classification_losses.append(torch.tensor(0).float().cuda())
continue
classification = torch.clamp(classification, 1e-4, 1.0 - 1e-4)
IoU = calc_iou(anchors[0, :, :], bbox_annotation[:, :4]) # num_anchors x num_annotations
IoU_max, IoU_argmax = torch.max(IoU, dim=1) # num_anchors x 1
#import pdb
#pdb.set_trace()
# compute the loss for classification
targets = torch.ones(classification.shape) * -1
targets = targets.cuda()
targets[torch.lt(IoU_max, 0.4), :] = 0 #低于0.4全部标0
positive_indices = torch.ge(IoU_max, 0.5) #高于0.5标1
num_positive_anchors = positive_indices.sum()
assigned_annotations = bbox_annotation[IoU_argmax, :]
targets[positive_indices, :] = 0
targets[positive_indices, assigned_annotations[positive_indices, 4].long()] = 1
alpha_factor = torch.ones(targets.shape).cuda() * alpha
alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor, 1. - alpha_factor)
focal_weight = torch.where(torch.eq(targets, 1.), 1. - classification, classification)
focal_weight = alpha_factor * torch.pow(focal_weight, gamma)
bce = -(targets * torch.log(classification) + (1.0 - targets) * torch.log(1.0 - classification))
# cls_loss = focal_weight * torch.pow(bce, gamma)
cls_loss = focal_weight * bce
cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, torch.zeros(cls_loss.shape).cuda())
classification_losses.append(cls_loss.sum()/torch.clamp(num_positive_anchors.float(), min=1.0))
# compute the loss for regression
if positive_indices.sum() > 0:
assigned_annotations = assigned_annotations[positive_indices, :]
anchor_widths_pi = anchor_widths[positive_indices]
anchor_heights_pi = anchor_heights[positive_indices]
anchor_ctr_x_pi = anchor_ctr_x[positive_indices]
anchor_ctr_y_pi = anchor_ctr_y[positive_indices]
gt_widths = assigned_annotations[:, 2] - assigned_annotations[:, 0]
gt_heights = assigned_annotations[:, 3] - assigned_annotations[:, 1]
gt_ctr_x = assigned_annotations[:, 0] + 0.5 * gt_widths
gt_ctr_y = assigned_annotations[:, 1] + 0.5 * gt_heights
# clip widths to 1
gt_widths = torch.clamp(gt_widths, min=1)
gt_heights = torch.clamp(gt_heights, min=1)
targets_dx = (gt_ctr_x - anchor_ctr_x_pi) / anchor_widths_pi
targets_dy = (gt_ctr_y - anchor_ctr_y_pi) / anchor_heights_pi
targets_dw = torch.log(gt_widths / anchor_widths_pi)
targets_dh = torch.log(gt_heights / anchor_heights_pi)
targets = torch.stack((targets_dx, targets_dy, targets_dw, targets_dh))
targets = targets.t()
targets = targets/torch.Tensor([[0.1, 0.1, 0.2, 0.2]]).cuda()
negative_indices = 1 - positive_indices
regression_diff = torch.abs(targets - regression[positive_indices, :])
regression_loss = torch.where(
torch.le(regression_diff, 1.0 / 9.0),
0.5 * 9.0 * torch.pow(regression_diff, 2),
regression_diff - 0.5 / 9.0
)
regression_losses.append(regression_loss.mean())
else:
regression_losses.append(torch.tensor(0).float().cuda())
return torch.stack(classification_losses).mean(dim=0, keepdim=True), torch.stack(regression_losses).mean(dim=0, keepdim=True)
自己测试
classification_losses=[]
alpha = 0.25
gamma = 2.0
a = np.array([[0,0,0,1],[-1,-1,-1,-1],[0,0,0,0],[0,1,0,0]],np.float32)
print(a)
b = np.array([[0.2,0.5,0.5,0.6],[0.4,0.4,0.5,0.2],[0.4,0.2,0.5,0.7],[0.2,0.2,0.1,0.5]],np.float32)
print(b)
targets = torch.from_numpy(a)
classification=torch.from_numpy(b)
alpha_factor = (torch.ones(targets.shape) * alpha).float()
alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor, 1. - alpha_factor)
focal_weight = torch.where(torch.eq(targets, 1.), 1. - classification, classification)
focal_weight = alpha_factor * torch.pow(focal_weight, gamma)
# print(alpha_factor)
# print(focal_weight)
part_1 = -(targets * torch.log(classification))
part_2 = -((1.0 - targets) * torch.log(1.0 - classification))
print('--- ')
print(part_1)
print(part_2)
bce = -(targets * torch.log(classification) + (1.0 - targets) * torch.log(1.0 - classification))
bce = part_1+part_2
# cls_loss = focal_weight * torch.pow(bce, gamma)
cls_loss = focal_weight * bce
cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, torch.zeros(cls_loss.shape))
# classification_losses.append(cls_loss.sum()/torch.clamp(num_positive_anchors.float(), min=1.0))
[[ 0. 0. 0. 1.]
[-1. -1. -1. -1.]
[ 0. 0. 0. 0.]
[ 0. 1. 0. 0.]]
[[ 0.2 0.5 0.5 0.60000002]
[ 0.40000001 0.40000001 0.5 0.2 ]
[ 0.40000001 0.2 0.5 0.69999999]
[ 0.2 0.2 0.1 0.5 ]]
---
tensor([[ 0.0000, 0.0000, 0.0000, 0.5108],
[-0.9163, -0.9163, -0.6931, -1.6094],
[ 0.0000, 0.0000, 0.0000, 0.0000],
[ 0.0000, 1.6094, 0.0000, 0.0000]])
123123123
tensor([[ 0.2231, 0.6931, 0.6931, 0.0000],
[ 1.0217, 1.0217, 1.3863, 0.4463],
[ 0.5108, 0.2231, 0.6931, 1.2040],
[ 0.2231, 0.0000, 0.1054, 0.6931]])