def forward_single(self, x: Tensor) -> Tuple[Tensor]:
"""Forward feature map of a single FPN level."""
dcn_base_offset = self.dcn_base_offset.type_as(x)
# If we use center_init, the initial reppoints is from center points.
# If we use bounding bbox representation, the initial reppoints is
# from regular grid placed on a pre-defined bbox.
if self.use_grid_points or not self.center_init: # 按默认配置不跑这个分支
scale = self.point_base_scale / 2
points_init = dcn_base_offset / dcn_base_offset.max() * scale
bbox_init = x.new_tensor([-scale, -scale, scale,
scale]).view(1, 4, 1, 1)
else: # 按默认配置跑这个分支
points_init = 0
cls_feat = x
pts_feat = x
for cls_conv in self.cls_convs:
cls_feat = cls_conv(cls_feat)
for reg_conv in self.reg_convs:
pts_feat = reg_conv(pts_feat)
# initialize reppoints
# regress 第一次的偏移量
pts_out_init = self.reppoints_pts_init_out(
self.relu(self.reppoints_pts_init_conv(pts_feat)))
if self.use_grid_points: # 按默认配置这个不跑
pts_out_init, bbox_out_init = self.gen_grid_from_reg(
pts_out_init, bbox_init.detach())
else: # 按默认配置跑这个,且 points_init 就是 0,相当于没操作。
pts_out_init = pts_out_init + points_init
# refine and classify reppoints
pts_out_init_grad_mul = (1 - self.gradient_mul) * pts_out_init.detach(
) + self.gradient_mul * pts_out_init
# 按默认配置这个 -dcn_base_offset 操作可以使得当第一次的偏移量,也就是这里的 pts_out_init_grad_mul,为 0 的时候,也就是预测目标大小为零,下面 dcn 的卷积核的几个点都是集中在当前位置中心点上的。本质上就是适配这个 dcn 算子。因为 dcn 算子实现出来的效果是偏移量为零的时候卷积核的几个点是分布在 vanilla 卷积核的几个点的位置上的,相当于相对于中心点存在 ±1(let's say 3x3 conv) 的偏移量。
dcn_offset = pts_out_init_grad_mul - dcn_base_offset
cls_out = self.reppoints_cls_out(
self.relu(self.reppoints_cls_conv(cls_feat, dcn_offset)))
pts_out_refine = self.reppoints_pts_refine_out(
self.relu(self.reppoints_pts_refine_conv(pts_feat, dcn_offset)))
if self.use_grid_points: # 按默认配置这里不跑
pts_out_refine, bbox_out_refine = self.gen_grid_from_reg(
pts_out_refine, bbox_out_init.detach())
else: # 按默认配置跑这里
pts_out_refine = pts_out_refine + pts_out_init.detach()
if self.training:
return cls_out, pts_out_init, pts_out_refine
else:
return cls_out, self.points2bbox(pts_out_refine)
def loss_by_feat_single(self, cls_score: Tensor, pts_pred_init: Tensor,
pts_pred_refine: Tensor, labels: Tensor,
label_weights, bbox_gt_init: Tensor,
bbox_weights_init: Tensor, bbox_gt_refine: Tensor,
bbox_weights_refine: Tensor, stride: int,
avg_factor_init: int,
avg_factor_refine: int) -> Tuple[Tensor]:
"""Calculate the loss of a single scale level based on the features
extracted by the detection head.
Args:
cls_score (Tensor): Box scores for each scale level
Has shape (N, num_classes, h_i, w_i).
pts_pred_init (Tensor): Points of shape
(batch_size, h_i * w_i, num_points * 2).
pts_pred_refine (Tensor): Points refined of shape
(batch_size, h_i * w_i, num_points * 2).
labels (Tensor): Ground truth class indices with shape
(batch_size, h_i * w_i).
label_weights (Tensor): Label weights of shape
(batch_size, h_i * w_i).
bbox_gt_init (Tensor): BBox regression targets in the init stage
of shape (batch_size, h_i * w_i, 4).
bbox_weights_init (Tensor): BBox regression loss weights in the
init stage of shape (batch_size, h_i * w_i, 4).
bbox_gt_refine (Tensor): BBox regression targets in the refine
stage of shape (batch_size, h_i * w_i, 4).
bbox_weights_refine (Tensor): BBox regression loss weights in the
refine stage of shape (batch_size, h_i * w_i, 4).
stride (int): Point stride.
avg_factor_init (int): Average factor that is used to average
the loss in the init stage.
avg_factor_refine (int): Average factor that is used to average
the loss in the refine stage.
Returns:
Tuple[Tensor]: loss components.
"""
# classification loss
labels = labels.reshape(-1)
label_weights = label_weights.reshape(-1)
cls_score = cls_score.permute(0, 2, 3,
1).reshape(-1, self.cls_out_channels)
cls_score = cls_score.contiguous()
loss_cls = self.loss_cls(
cls_score, labels, label_weights, avg_factor=avg_factor_refine)
# points loss
bbox_gt_init = bbox_gt_init.reshape(-1, 4)
bbox_weights_init = bbox_weights_init.reshape(-1, 4)
bbox_pred_init = self.points2bbox(
pts_pred_init.reshape(-1, 2 * self.num_points), y_first=False)
bbox_gt_refine = bbox_gt_refine.reshape(-1, 4)
bbox_weights_refine = bbox_weights_refine.reshape(-1, 4)
bbox_pred_refine = self.points2bbox(
pts_pred_refine.reshape(-1, 2 * self.num_points), y_first=False)
normalize_term = self.point_base_scale * stride
# 按默认配置是 SmoothL1 loss,权重是 0.5。
loss_pts_init = self.loss_bbox_init(
bbox_pred_init / normalize_term,
bbox_gt_init / normalize_term,
bbox_weights_init,
avg_factor=avg_factor_init)
# 按默认配置是 SmoothL1 loss,权重是 1.0。
loss_pts_refine = self.loss_bbox_refine(
bbox_pred_refine / normalize_term,
bbox_gt_refine / normalize_term,
bbox_weights_refine,
avg_factor=avg_factor_refine)
return loss_cls, loss_pts_init, loss_pts_refine
def loss_by_feat(
self,
cls_scores: List[Tensor],
pts_preds_init: List[Tensor],
pts_preds_refine: List[Tensor],
batch_gt_instances: InstanceList,
batch_img_metas: List[dict],
batch_gt_instances_ignore: OptInstanceList = None
) -> Dict[str, Tensor]:
"""Calculate the loss based on the features extracted by the detection
head.
Args:
cls_scores (list[Tensor]): Box scores for each scale level,
each is a 4D-tensor, of shape (batch_size, num_classes, h, w).
pts_preds_init (list[Tensor]): Points for each scale level, each is
a 3D-tensor, of shape (batch_size, h_i * w_i, num_points * 2).
pts_preds_refine (list[Tensor]): Points refined for each scale
level, each is a 3D-tensor, of shape
(batch_size, h_i * w_i, num_points * 2).
batch_gt_instances (list[:obj:`InstanceData`]): Batch of
gt_instance. It usually includes ``bboxes`` and ``labels``
attributes.
batch_img_metas (list[dict]): Meta information of each image, e.g.,
image size, scaling factor, etc.
batch_gt_instances_ignore (list[:obj:`InstanceData`], Optional):
Batch of gt_instances_ignore. It includes ``bboxes`` attribute
data that is ignored during training and testing.
Defaults to None.
Returns:
dict[str, Tensor]: A dictionary of loss components.
"""
featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]
device = cls_scores[0].device
# target for initial stage
center_list, valid_flag_list = self.get_points(featmap_sizes,
batch_img_metas, device)
pts_coordinate_preds_init = self.offset_to_pts(center_list,
pts_preds_init)
if self.train_cfg['init']['assigner']['type'] == 'PointAssigner':
# Assign target for center list
candidate_list = center_list
else:
# transform center list to bbox list and
# assign target for bbox list
bbox_list = self.centers_to_bboxes(center_list)
candidate_list = bbox_list
cls_reg_targets_init = self.get_targets(
proposals_list=candidate_list,
valid_flag_list=valid_flag_list,
batch_gt_instances=batch_gt_instances,
batch_img_metas=batch_img_metas,
batch_gt_instances_ignore=batch_gt_instances_ignore,
stage='init',
return_sampling_results=False)
(*_, bbox_gt_list_init, candidate_list_init, bbox_weights_list_init,
avg_factor_init) = cls_reg_targets_init
# target for refinement stage
center_list, valid_flag_list = self.get_points(featmap_sizes,
batch_img_metas, device)
pts_coordinate_preds_refine = self.offset_to_pts(
center_list, pts_preds_refine)
bbox_list = []
for i_img, center in enumerate(center_list):
bbox = []
for i_lvl in range(len(pts_preds_refine)):
bbox_preds_init = self.points2bbox(
pts_preds_init[i_lvl].detach())
bbox_shift = bbox_preds_init * self.point_strides[i_lvl]
bbox_center = torch.cat(
[center[i_lvl][:, :2], center[i_lvl][:, :2]], dim=1)
bbox.append(bbox_center +
bbox_shift[i_img].permute(1, 2, 0).reshape(-1, 4))
bbox_list.append(bbox)
cls_reg_targets_refine = self.get_targets(
proposals_list=bbox_list,
valid_flag_list=valid_flag_list,
batch_gt_instances=batch_gt_instances,
batch_img_metas=batch_img_metas,
batch_gt_instances_ignore=batch_gt_instances_ignore,
stage='refine',
return_sampling_results=False)
(labels_list, label_weights_list, bbox_gt_list_refine,
candidate_list_refine, bbox_weights_list_refine,
avg_factor_refine) = cls_reg_targets_refine
# compute loss
losses_cls, losses_pts_init, losses_pts_refine = multi_apply(
self.loss_by_feat_single,
cls_scores,
pts_coordinate_preds_init,
pts_coordinate_preds_refine,
labels_list, # 这里的 labels_list 是 refine 阶段的。
label_weights_list,
bbox_gt_list_init,
bbox_weights_list_init,
bbox_gt_list_refine,
bbox_weights_list_refine,
self.point_strides,
avg_factor_init=avg_factor_init,
avg_factor_refine=avg_factor_refine)
loss_dict_all = {
'loss_cls': losses_cls,
'loss_pts_init': losses_pts_init,
'loss_pts_refine': losses_pts_refine
}
return loss_dict_all
meta arch
猜的:forward输出的东西作为输入一个tuple x 给 BaseDenseHead.loss。
BaseDenseHead.loss 调用 loss_by_feat。
所以改 forward 的 train 状态下的输出只需要对应改 loss_by_feat 的接口就行了。