2021SC@SDUSC
FCOS核心实现
目前FCOS官方代码已经开源在 AdelaiDet,代码基于detectron2框架。这里主要关注FCOS最核心的部分,就是正负样本的定义策略,代码如下:
def compute_targets_for_locations(self, locations, targets, size_ranges, num_loc_list):
"""
Args:
locations: [N, 2], 所有FPN层cat后的位置
targets: GT,detectron2格式
size_ranges: [N, 2], 各个位置所在FPN层的回归size限制(min,max)
num_loc_list: list,各个FPN层的位置数
"""
labels = [] # 各个位置的类别target
reg_targets = [] # 各个位置的回归target
xs, ys = locations[:, 0], locations[:, 1] # 位置x,y坐标
num_targets = 0 # 用于索引
# for循环处理各个image
for im_i in range(len(targets)):
targets_per_im = targets[im_i]
bboxes = targets_per_im.gt_boxes.tensor # [M, 4]
labels_per_im = targets_per_im.gt_classes # [M]
# 无GT,默认target全为负样本
if bboxes.numel() == 0:
labels.append(labels_per_im.new_zeros(locations.size(0)) + self.num_classes)
reg_targets.append(locations.new_zeros((locations.size(0), 4)))
continue
area = targets_per_im.gt_boxes.area() # [M]
# 计算每个位置与各个GT的l,t,r,b
l = xs[:, None] - bboxes[:, 0][None] # [N, M]
t = ys[:, None] - bboxes[:, 1][None] # [N, M]
r = bboxes[:, 2][None] - xs[:, None] # [N, M]
b = bboxes[:, 3][None] - ys[:, None] # [N, M]
reg_targets_per_im = torch.stack([l, t, r, b], dim=2) # [N, M, 4]
if self.center_sample: # 中心采样
is_in_boxes = self.get_sample_region(
bboxes, self.strides, num_loc_list, xs, ys,
bitmasks=None, radius=self.radius
) # [N, M]
else: # 全部采样
is_in_boxes = reg_targets_per_im.min(dim=2)[0] > 0 # [N, M]
max_reg_targets_per_im = reg_targets_per_im.max(dim=2)[0] # [N, M]
# 限制每个位置所属FPN的size限制
is_cared_in_the_level = \
(max_reg_targets_per_im >= size_ranges[:, [0]]) & \
(max_reg_targets_per_im <= size_ranges[:, [1]]) # [N, M]
# GT的面积
locations_to_gt_area = area[None].repeat(len(locations), 1) # [N, M]
locations_to_gt_area[is_in_boxes == 0] = INF # 排除条件1:GT内或者GT中心区域
locations_to_gt_area[is_cared_in_the_level == 0] = INF # 排除条件2:FPN size限制
# 每个位置可能匹配多个GT,所以要选取最小面积的GT
# locations_to_gt_inds: [N], 每个位置要回归的GT index
locations_to_min_area, locations_to_gt_inds = locations_to_gt_area.min(dim=1)
# 回归target
reg_targets_per_im = reg_targets_per_im[range(len(locations)), locations_to_gt_inds]
# 分类target
labels_per_im = labels_per_im[locations_to_gt_inds]
# 负样本处理,这里用self.num_classes为负样本
labels_per_im[locations_to_min_area == INF] = self.num_classes
labels.append(labels_per_im)
reg_targets.append(reg_targets_per_im)
总体上看,代码逻辑比RetinaNet的IoU策略更复杂一些,这里只需要计算分类和回归的target即可,因为center-ness分支的target可以用回归target计算得到:
def compute_ctrness_targets(reg_targets):
if len(reg_targets) == 0:
return reg_targets.new_zeros(len(reg_targets))
left_right = reg_targets[:, [0, 2]]
top_bottom = reg_targets[:, [1, 3]]
ctrness = (left_right.min(dim=-1)[0] / left_right.max(dim=-1)[0]) * \
(top_bottom.min(dim=-1)[0] / top_bottom.max(dim=-1)[0])
return torch.sqrt(ctrness)
至于上面的中心采样策略,其实现代码如下,注意中心区域要限制在GT内:
def get_sample_region(self, boxes, strides, num_loc_list, loc_xs, loc_ys, bitmasks=None, radius=1):
center_x = boxes[..., [0, 2]].sum(dim=-1) * 0.5 # [M,]
center_y = boxes[..., [1, 3]].sum(dim=-1) * 0.5 # [M,]
num_gts = boxes.shape[0]
K = len(loc_xs)
boxes = boxes[None].expand(K, num_gts, 4) # [N, M, 4]
center_x = center_x[None].expand(K, num_gts) # [N, M]
center_y = center_y[None].expand(K, num_gts) # [N, M]
center_gt = boxes.new_zeros(boxes.shape) # [N, M, 4]
# 无GT
if center_x.numel() == 0 or center_x[..., 0].sum() == 0:
return loc_xs.new_zeros(loc_xs.shape, dtype=torch.uint8)
beg = 0
# for循环处理各个FPN层
for level, num_loc in enumerate(num_loc_list):
end = beg + num_loc
# 计算中心区域范围
stride = strides[level] * radius
xmin = center_x[beg:end] - stride
ymin = center_y[beg:end] - stride
xmax = center_x[beg:end] + stride
ymax = center_y[beg:end] + stride
# 限制中心区域不超过GT
center_gt[beg:end, :, 0] = torch.where(xmin > boxes[beg:end, :, 0], xmin, boxes[beg:end, :, 0])
center_gt[beg:end, :, 1] = torch.where(ymin > boxes[beg:end, :, 1], ymin, boxes[beg:end, :, 1])
center_gt[beg:end, :, 2] = torch.where(xmax > boxes[beg:end, :, 2], boxes[beg:end, :, 2], xmax)
center_gt[beg:end, :, 3] = torch.where(ymax > boxes[beg:end, :, 3], boxes[beg:end, :, 3], ymax)
beg = end
left = loc_xs[:, None] - center_gt[..., 0]
right = center_gt[..., 2] - loc_xs[:, None]
top = loc_ys[:, None] - center_gt[..., 1]
bottom = center_gt[..., 3] - loc_ys[:, None]
center_bbox = torch.stack((left, top, right, bottom), -1)
inside_gt_bbox_mask = center_bbox.min(-1)[0] > 0 # 位置在GT的中心区域
return inside_gt_bbox_mask
总结
FCOS是一个很特别的基于anchor-free的检测模型,因为它不是基于关键点进行检测,而是每个位置直接回归目标,从另外一方面讲,FCOS虽没有anchor,但实际上和RetinaNet非常相似。但是FCOS依然是一个不错的工作,它让我们重新思考这种密集anchor的方式其实非常不必要,只要采用更好的正负样本定义策略,ATSS更进一步强化了这个问题的重要性。