前面博客介绍了DataSet与Transform的生成,但是在做目标检测的时候我们的label是不固定的,由于每张图片物体出现的个数是不一样的,我们要将label生成anchor形状的,使用的是target_generator函数,输入的box是(1,N,4),label是(1,N,1)。
整体函数:
class SSDTargetGenerator(Block):
"""Training targets generator for Single-shot Object Detection.
Parameters
----------
iou_thresh : float
IOU overlap threshold for maximum matching, default is 0.5.
neg_thresh : float
IOU overlap threshold for negative mining, default is 0.5.
negative_mining_ratio : float
Ratio of hard vs positive for negative mining.
stds : array-like of size 4, default is (0.1, 0.1, 0.2, 0.2)
Std value to be divided from encoded values.
"""
def __init__(self, iou_thresh=0.5, neg_thresh=0.5, negative_mining_ratio=3,
stds=(0.1, 0.1, 0.2, 0.2), **kwargs):
super(SSDTargetGenerator, self).__init__(**kwargs)
self._matcher = CompositeMatcher(
[BipartiteMatcher(share_max=False), MaximumMatcher(iou_thresh)])
if negative_mining_ratio > 0:
self._sampler = OHEMSampler(negative_mining_ratio, thresh=neg_thresh)
self._use_negative_sampling = True
else:
self._sampler = NaiveSampler()
self._use_negative_sampling = False
self._cls_encoder = MultiClassEncoder()
self._box_encoder = NormalizedBoxCenterEncoder(stds=stds)
self._center_to_corner = BBoxCenterToCorner(split=False)
# pylint: disable=arguments-differ
def forward(self, anchors, cls_preds, gt_boxes, gt_ids):
"""Generate training targets."""
anchors = self._center_to_corner(anchors.reshape((-1, 4)))
ious = nd.transpose(nd.contrib.box_iou(anchors, gt_boxes), (1, 0, 2))
matches = self._matcher(ious)
if self._use_negative_sampling:
samples = self._sampler(matches, cls_preds, ious)
else:
samples = self._sampler(matches)
cls_targets = self._cls_encoder(samples, matches, gt_ids)
box_targets, box_masks = self._box_encoder(samples, matches, anchors, gt_boxes)
return cls_targets, box_targets, box_masks
center_to_corner(anchors(shape为(1,N,4)))
class BBoxCenterToCorner(gluon.HybridBlock):
def __init__(self, axis=-1, split=False):
super(BBoxCenterToCorner, self).__init__()
self._split = split
self._axis = axis
def hybrid_forward(self, F, x):
"""Hybrid forward"""
x, y, w, h = F.split(x, axis=self._axis, num_outputs=4)
hw = w / 2
hh = h / 2
xmin = x - hw
ymin = y - hh
xmax = x + hw
ymax = y + hh
if not self._split:
return F.concat(xmin, ymin, xmax, ymax, dim=self._axis)
else:
return xmin, ymin, xmax, ymax
bbox_iou(anchors(shape为(N,4)),bbox(shape为(1,M,4)))
def bbox_iou(bbox_a, bbox_b, offset=0):
if bbox_a.shape[1] < 4 or bbox_b.shape[1] < 4:
raise IndexError("Bounding boxes axis 1 must have at least length 4")
tl = np.maximum(bbox_a[:, None, :2], bbox_b[:, :2])
br = np.minimum(bbox_a[:, None, 2:4], bbox_b[:, 2:4])
area_i = np.prod(br - tl + offset, axis=2) * (tl < br).all(axis=2)
area_a = np.prod(bbox_a[:, 2:4] - bbox_a[:, :2] + offset, axis=1)
area_b = np.prod(bbox_b[:, 2:4] - bbox_b[:, :2] + offset, axis=1)
return area_i / (area_a[:, None] + area_b - area_i)
Matcher(ious)
class BipartiteMatcher(gluon.HybridBlock):
def __init__(self, threshold=1e-12, is_ascend=False, eps=1e-12, share_max=True):
super(BipartiteMatcher, self).__init__()
self._threshold = threshold
self._is_ascend = is_ascend
self._eps = eps
self._share_max = share_max
def hybrid_forward(self, F, x):
match = F.contrib.bipartite_matching(x, threshold=self._threshold,
is_ascend=self._is_ascend)
# make sure if iou(a, y) == iou(b, y), then b should also be a good match
# otherwise positive/negative samples are confusing
# potential argmax and max
pargmax = x.argmax(axis=-1, keepdims=True) # (B, num_anchor, 1)
maxs = x.max(axis=-2, keepdims=True) # (B, 1, num_gt)
if self._share_max:
mask = F.broadcast_greater_equal(x + self._eps, maxs) # (B, num_anchor, num_gt)
mask = F.sum(mask, axis=-1, keepdims=True) # (B, num_anchor, 1)
else:
pmax = F.pick(x, pargmax, axis=-1, keepdims=True) # (B, num_anchor, 1)
mask = F.broadcast_greater_equal(pmax + self._eps, maxs) # (B, num_anchor, num_gt)
mask = F.pick(mask, pargmax, axis=-1, keepdims=True) # (B, num_anchor, 1)
new_match = F.where(mask > 0, pargmax, F.ones_like(pargmax) * -1)
result = F.where(match[0] < 0, new_match.squeeze(axis=-1), match[0])
return result
class MaximumMatcher(gluon.HybridBlock):
def __init__(self, threshold):
super(MaximumMatcher, self).__init__()
self._threshold = threshold
def hybrid_forward(self, F, x):
argmax = F.argmax(x, axis=-1)
match = F.where(F.pick(x, argmax, axis=-1) >= self._threshold, argmax,
F.ones_like(argmax) * -1)
return match
Sampler(matcher)
class NaiveSampler(gluon.HybridBlock):
def __init__(self):
super(NaiveSampler, self).__init__()
def hybrid_forward(self, F, x):
"""Hybrid forward"""
marker = F.ones_like(x)
y = F.where(x >= 0, marker, marker * -1)
return y
class OHEMSampler(gluon.Block):
def forward(self, x, logits, ious):
"""Forward"""
F = nd
num_positive = F.sum(x > -1, axis=1)
num_negative = self._ratio * num_positive
num_total = x.shape[1] # scalar
num_negative = F.minimum(F.maximum(self._min_samples, num_negative),
num_total - num_positive)
positive = logits.slice_axis(axis=2, begin=1, end=-1)
background = logits.slice_axis(axis=2, begin=0, end=1).reshape((0, -1))
maxval = positive.max(axis=2)
esum = F.exp(logits - maxval.reshape((0, 0, 1))).sum(axis=2)
score = -F.log(F.exp(background - maxval) / esum)
mask = F.ones_like(score) * -1
score = F.where(x < 0, score, mask) # mask out positive samples
if len(ious.shape) == 3:
ious = F.max(ious, axis=2)
score = F.where(ious < self._thresh, score, mask) # mask out if iou is large
argmaxs = F.argsort(score, axis=1, is_ascend=False)
# neg number is different in each batch, using dynamic numpy operations.
y = np.zeros(x.shape)
y[np.where(x.asnumpy() >= 0)] = 1 # assign positive samples
argmaxs = argmaxs.asnumpy()
for i, num_neg in zip(range(x.shape[0]), num_negative.asnumpy().astype(np.int32)):
indices = argmaxs[i, :num_neg]
y[i, indices.astype(np.int32)] = -1 # assign negative samples
return F.array(y, ctx=x.context)
cls_encoder(samples,matches,gt_ids)
class MultiClassEncoder(gluon.HybridBlock):
def __init__(self, ignore_label=-1):
super(MultiClassEncoder, self).__init__()
self._ignore_label = ignore_label
def hybrid_forward(self, F, samples, matches, refs):
# samples (B, N) (+1, -1, 0: ignore), matches (B, N) [0, M), refs (B, M)
# reshape refs (B, M) -> (B, 1, M) -> (B, N, M)
refs = F.repeat(refs.reshape((0, 1, -1)), axis=1, repeats=matches.shape[1])
# ids (B, N, M) -> (B, N), value [0, M + 1), 0 reserved for background class
target_ids = F.pick(refs, matches, axis=2) + 1
# samples 0: set ignore samples to ignore_label
targets = F.where(samples > 0.5, target_ids, nd.ones_like(target_ids) * self._ignore_label)
# samples -1: set negative samples to 0
targets = F.where(samples < -0.5, nd.zeros_like(targets), targets)
return targets
bbox_encoder(samples, matches, anchors, bboxs)
class NormalizedBoxCenterEncoder(gluon.Block):
def __init__(self, stds=(0.1, 0.1, 0.2, 0.2), means=(0., 0., 0., 0.)):
super(NormalizedBoxCenterEncoder, self).__init__()
assert len(stds) == 4, "Box Encoder requires 4 std values."
self._stds = stds
self._means = means
with self.name_scope():
self.corner_to_center = BBoxCornerToCenter(split=True)
def forward(self, samples, matches, anchors, refs):
F = nd
# TODO(zhreshold): batch_pick, take multiple elements?
# refs [B, M, 4], anchors [B, N, 4], samples [B, N], matches [B, N]
# refs [B, M, 4] -> reshape [B, 1, M, 4] -> repeat [B, N, M, 4]
ref_boxes = F.repeat(refs.reshape((0, 1, -1, 4)), axis=1, repeats=matches.shape[1])
# refs [B, N, M, 4] -> 4 * [B, N, M]
ref_boxes = F.split(ref_boxes, axis=-1, num_outputs=4, squeeze_axis=True)
# refs 4 * [B, N, M] -> pick from matches [B, N, 1] -> concat to [B, N, 4]
ref_boxes = F.concat(*[F.pick(ref_boxes[i], matches, axis=2).reshape((0, -1, 1)) \
for i in range(4)], dim=2)
# transform based on x, y, w, h
# g [B, N, 4], a [B, N, 4] -> codecs [B, N, 4]
g = self.corner_to_center(ref_boxes)
a = self.corner_to_center(anchors)
t0 = ((g[0] - a[0]) / a[2] - self._means[0]) / self._stds[0]
t1 = ((g[1] - a[1]) / a[3] - self._means[1]) / self._stds[1]
t2 = (F.log(g[2] / a[2]) - self._means[2]) / self._stds[2]
t3 = (F.log(g[3] / a[3]) - self._means[3]) / self._stds[3]
codecs = F.concat(t0, t1, t2, t3, dim=2)
# samples [B, N] -> [B, N, 1] -> [B, N, 4] -> boolean
temp = F.tile(samples.reshape((0, -1, 1)), reps=(1, 1, 4)) > 0.5
# fill targets and masks [B, N, 4]
targets = F.where(temp, codecs, F.zeros_like(codecs))
masks = F.where(temp, F.ones_like(temp), F.zeros_like(temp))
return targets, masks