这层并不会被用在测试中,只会在训练中起作用,用于cls和reg的loss的回传
- bbox inside weights
用来设置正样本回归loss的权重,默认为1(负样本为0)(负样本为0,即可以区分正负样本是否计算 loss)
原文中,正样本计算回归 loss,负样本不计算(可认为 loss 为 0)。
2. bbox outside weights
用来平衡 RPN 分类 Loss 和回归 Loss 的权重。
class _AnchorTargetLayer(nn.Module):
"""
Assign anchors to ground-truth targets. Produces anchor classification
labels and bounding-box regression targets.
"""
def __init__(self, feat_stride, scales, ratios):
super(_AnchorTargetLayer, self).__init__()
self._feat_stride = feat_stride """P*Q to M*N to (M/16)*(N/16)"""
self._scales = scales
anchor_scales = scales
self._anchors = torch.from_numpy(generate_anchors(scales=np.array(anchor_scales), ratios=np.array(ratios))).float()
self._num_anchors = self._anchors.size(0)
# allow boxes to sit over the edge by a small amount
self._allowed_border = 0 # default is 0
def forward(self, input):
# Algorithm:
#
# for each (H, W) location i
# generate 9 anchor boxes centered on cell i
# apply predicted bbox deltas at cell i to each of the 9 anchors
# filter out-of-image anchors
rpn_cls_score = input[0]
gt_boxes = input[1] """shape:[batch_size,18,W,H]"""
im_info = input[2]
num_boxes = input[3]
# map of shape (..., H, W)
height, width = rpn_cls_score.size(2), rpn_cls_score.size(3)
batch_size = gt_boxes.size(0)
feat_height, feat_width = rpn_cls_score.size(2), rpn_cls_score.size(3)
shift_x = np.arange(0, feat_width) * self._feat_stride
shift_y = np.arange(0, feat_height) * self._feat_stride
shift_x, shift_y = np.meshgrid(shift_x, shift_y)"""make a matrix shift_x*shift_y"""
shifts = torch.from_numpy(np.vstack((shift_x.ravel(), shift_y.ravel(),
shift_x.ravel(), shift_y.ravel())).transpose())
shifts = shifts.contiguous().type_as(rpn_cls_score).float()
A = self._num_anchors """A=9"""
K = shifts.size(0) """the sun of the points in origin image"""
"""
add A anchors (1, A, 4) to
cell K shifts (K, 1, 4) to get
shift anchors (K, A, 4)
reshape to (K*A, 4) shifted anchors
"""
self._anchors = self._anchors.type_as(gt_boxes) # move to specific gpu.
all_anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4)
all_anchors = all_anchors.view(K * A, 4)
total_anchors = int(K * A)
"""
Cut the anchor whose size exceeds the image, inds_inside is the index array of
the anchor inside the image
"""
keep = ((all_anchors[:, 0] >= -self._allowed_border) &
(all_anchors[:, 1] >= -self._allowed_border) &
(all_anchors[:, 2] < long(im_info[0][1]) + self._allowed_border) &
(all_anchors[:, 3] < long(im_info[0][0]) + self._allowed_border))
inds_inside = torch.nonzero(keep).view(-1)
# keep only inside anchors
anchors = all_anchors[inds_inside, :]
# label: 1 is positive, 0 is negative, -1 is dont care
# initilize the bbox_inside_weight and bbox_outside_weight to make softmax
# classification,Y=W*X
labels = gt_boxes.new(batch_size, inds_inside.size(0)).fill_(-1)
bbox_inside_weights = gt_boxes.new(batch_size, inds_inside.size(0)).zero_()
bbox_outside_weights = gt_boxes.new(batch_size, inds_inside.size(0)).zero_()
"""
这里overlaps是计算所有anchor与ground-truth的重合度,它是一个len(anchors) x
len(gt_boxes)的二维数组,每个元素是各个anchor和gt_boxes的overlap值,这个overlap值的
计算是这样的:
overlap = (重合部分面积) / (anchor面积 + gt_boxes面积 - 重合部分面积)
· argmax_overlaps是每个anchor对应最大overlap的gt_boxes的下标
· max_overlaps是每个anchor对应最大的overlap值
相对应的
· gt_argmax_overlaps是每个gt_boxes对应最大overlap的anchor的下标
· gt_max_overlaps是每个gt_boxes对应最大的overlap值
"""
overlaps = bbox_overlaps_batch(anchors, gt_boxes) # calculate iou
max_overlaps, argmax_overlaps = torch.max(overlaps, 2)
gt_max_overlaps, _ = torch.max(overlaps, 1)
if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
gt_max_overlaps[gt_max_overlaps==0] = 1e-5
keep = torch.sum(overlaps.eq(gt_max_overlaps.view(batch_size,1,-1).expand_as(overlaps)), 2)
if torch.sum(keep) > 0:
labels[keep>0] = 1
# fg label: above threshold IOU
labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1
if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
# The next two steps are to keep the positive and negative samples strictly 1:1.
num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
sum_fg = torch.sum((labels == 1).int(), 1)
sum_bg = torch.sum((labels == 0).int(), 1)
for i in range(batch_size):
# subsample positive labels if we have too many
if sum_fg[i] > num_fg:
fg_inds = torch.nonzero(labels[i] == 1).view(-1)
# torch.randperm seems has a bug on multi-gpu setting that cause the segfault.
# See https://github.com/pytorch/pytorch/issues/1868 for more details.
# use numpy instead.
#rand_num = torch.randperm(fg_inds.size(0)).type_as(gt_boxes).long()
rand_num = torch.from_numpy(np.random.permutation(fg_inds.size(0))).type_as(gt_boxes).long()
disable_inds = fg_inds[rand_num[:fg_inds.size(0)-num_fg]]
labels[i][disable_inds] = -1
# num_bg = cfg.TRAIN.RPN_BATCHSIZE - sum_fg[i]
num_bg = cfg.TRAIN.RPN_BATCHSIZE - torch.sum((labels == 1).int(), 1)[i]
# subsample negative labels if we have too many
if sum_bg[i] > num_bg:
bg_inds = torch.nonzero(labels[i] == 0).view(-1)
#rand_num = torch.randperm(bg_inds.size(0)).type_as(gt_boxes).long()
rand_num = torch.from_numpy(np.random.permutation(bg_inds.size(0))).type_as(gt_boxes).long()
disable_inds = bg_inds[rand_num[:bg_inds.size(0)-num_bg]]
labels[i][disable_inds] = -1
offset = torch.arange(0, batch_size)*gt_boxes.size(1)
argmax_overlaps = argmax_overlaps + offset.view(batch_size, 1).type_as(argmax_overlaps)
"""
计算所有的positive的dx,dy,dw,dh,但只有128个positive最后参与loss的回传
"""
bbox_targets = _compute_targets_batch(anchors, gt_boxes.view(-1,5)[argmax_overlaps.view(-1), :].view(batch_size, -1, 5))
# use a single value instead of 4 values for easy index.
bbox_inside_weights[labels==1] = cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS[0]
if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0:
num_examples = torch.sum(labels[i] >= 0)
positive_weights = 1.0 / num_examples.item()
negative_weights = 1.0 / num_examples.item()
else:
assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) &
(cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1))
bbox_outside_weights[labels == 1] = positive_weights
bbox_outside_weights[labels == 0] = negative_weights
labels = _unmap(labels, total_anchors, inds_inside, batch_size, fill=-1)
bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, batch_size, fill=0)
bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, batch_size, fill=0)
bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, batch_size, fill=0)
outputs = []
labels = labels.view(batch_size, height, width, A).permute(0,3,1,2).contiguous()
labels = labels.view(batch_size, 1, A * height, width)
outputs.append(labels)
bbox_targets = bbox_targets.view(batch_size, height, width, A*4).permute(0,3,1,2).contiguous()
outputs.append(bbox_targets)
anchors_count = bbox_inside_weights.size(1)
bbox_inside_weights = bbox_inside_weights.view(batch_size,anchors_count,1).expand(batch_size, anchors_count, 4)
bbox_inside_weights = bbox_inside_weights.contiguous().view(batch_size, height, width, 4*A)\
.permute(0,3,1,2).contiguous()
outputs.append(bbox_inside_weights)
bbox_outside_weights = bbox_outside_weights.view(batch_size,anchors_count,1).expand(batch_size, anchors_count, 4)
bbox_outside_weights = bbox_outside_weights.contiguous().view(batch_size, height, width, 4*A)\
.permute(0,3,1,2).contiguous()
outputs.append(bbox_outside_weights)
"""
outputs=[labels,bbox_targets,bbox_inside_weights,bbox_outside_weights]
"""
return outputs
def backward(self, top, propagate_down, bottom):
"""This layer does not propagate gradients."""
pass
def reshape(self, bottom, top):
"""Reshaping happens during the call to forward."""
pass
def _unmap(data, count, inds, batch_size, fill=0):
""" Unmap a subset of item (data) back to the original set of items (of
size count) """
if data.dim() == 2:
ret = torch.Tensor(batch_size, count).fill_(fill).type_as(data)
ret[:, inds] = data
else:
ret = torch.Tensor(batch_size, count, data.size(2)).fill_(fill).type_as(data)
ret[:, inds,:] = data
return ret
def _compute_targets_batch(ex_rois, gt_rois):
"""Compute bounding-box regression targets for an image."""
return bbox_transform_batch(ex_rois, gt_rois[:, :, :4])