使用Mxnet进行重构SSD(三)

最新推荐文章于 2022-11-19 15:34:05 发布

米小凡

最新推荐文章于 2022-11-19 15:34:05 发布

阅读量570

点赞数

本文链接：https://blog.csdn.net/xiaomifanhxx/article/details/89353478

版权

前面博客介绍了DataSet与Transform的生成，但是在做目标检测的时候我们的label是不固定的，由于每张图片物体出现的个数是不一样的，我们要将label生成anchor形状的，使用的是target_generator函数，输入的box是(1,N,4),label是(1,N,1)。

整体函数：

class SSDTargetGenerator(Block):
    """Training targets generator for Single-shot Object Detection.
    Parameters
    ----------
    iou_thresh : float
        IOU overlap threshold for maximum matching, default is 0.5.
    neg_thresh : float
        IOU overlap threshold for negative mining, default is 0.5.
    negative_mining_ratio : float
        Ratio of hard vs positive for negative mining.
    stds : array-like of size 4, default is (0.1, 0.1, 0.2, 0.2)
        Std value to be divided from encoded values.
    """
    def __init__(self, iou_thresh=0.5, neg_thresh=0.5, negative_mining_ratio=3,
                 stds=(0.1, 0.1, 0.2, 0.2), **kwargs):
        super(SSDTargetGenerator, self).__init__(**kwargs)
        self._matcher = CompositeMatcher(
            [BipartiteMatcher(share_max=False), MaximumMatcher(iou_thresh)])
        if negative_mining_ratio > 0:
            self._sampler = OHEMSampler(negative_mining_ratio, thresh=neg_thresh)
            self._use_negative_sampling = True
        else:
            self._sampler = NaiveSampler()
            self._use_negative_sampling = False
        self._cls_encoder = MultiClassEncoder()
        self._box_encoder = NormalizedBoxCenterEncoder(stds=stds)
        self._center_to_corner = BBoxCenterToCorner(split=False)

    # pylint: disable=arguments-differ
    def forward(self, anchors, cls_preds, gt_boxes, gt_ids):
        """Generate training targets."""
        anchors = self._center_to_corner(anchors.reshape((-1, 4)))
        ious = nd.transpose(nd.contrib.box_iou(anchors, gt_boxes), (1, 0, 2))
        matches = self._matcher(ious)
        if self._use_negative_sampling:
            samples = self._sampler(matches, cls_preds, ious)
        else:
            samples = self._sampler(matches)
        cls_targets = self._cls_encoder(samples, matches, gt_ids)
        box_targets, box_masks = self._box_encoder(samples, matches, anchors, gt_boxes)
        return cls_targets, box_targets, box_masks

center_to_corner(anchors(shape为(1,N,4)))

class BBoxCenterToCorner(gluon.HybridBlock):
    def __init__(self, axis=-1, split=False):
        super(BBoxCenterToCorner, self).__init__()
        self._split = split
        self._axis = axis

    def hybrid_forward(self, F, x):
        """Hybrid forward"""
        x, y, w, h = F.split(x, axis=self._axis, num_outputs=4)
        hw = w / 2
        hh = h / 2
        xmin = x - hw
        ymin = y - hh
        xmax = x + hw
        ymax = y + hh
        if not self._split:
            return F.concat(xmin, ymin, xmax, ymax, dim=self._axis)
        else:
            return xmin, ymin, xmax, ymax

bbox_iou(anchors(shape为(N,4)),bbox(shape为(1,M,4)))

def bbox_iou(bbox_a, bbox_b, offset=0):
    if bbox_a.shape[1] < 4 or bbox_b.shape[1] < 4:
        raise IndexError("Bounding boxes axis 1 must have at least length 4")

    tl = np.maximum(bbox_a[:, None, :2], bbox_b[:, :2])
    br = np.minimum(bbox_a[:, None, 2:4], bbox_b[:, 2:4])

    area_i = np.prod(br - tl + offset, axis=2) * (tl < br).all(axis=2)
    area_a = np.prod(bbox_a[:, 2:4] - bbox_a[:, :2] + offset, axis=1)
    area_b = np.prod(bbox_b[:, 2:4] - bbox_b[:, :2] + offset, axis=1)
    return area_i / (area_a[:, None] + area_b - area_i)

Matcher（ious）

class BipartiteMatcher(gluon.HybridBlock):
    def __init__(self, threshold=1e-12, is_ascend=False, eps=1e-12, share_max=True):
        super(BipartiteMatcher, self).__init__()
        self._threshold = threshold
        self._is_ascend = is_ascend
        self._eps = eps
        self._share_max = share_max

    def hybrid_forward(self, F, x):
        match = F.contrib.bipartite_matching(x, threshold=self._threshold,
                                             is_ascend=self._is_ascend)
        # make sure if iou(a, y) == iou(b, y), then b should also be a good match
        # otherwise positive/negative samples are confusing
        # potential argmax and max
        pargmax = x.argmax(axis=-1, keepdims=True)  # (B, num_anchor, 1)
        maxs = x.max(axis=-2, keepdims=True)  # (B, 1, num_gt)
        if self._share_max:
            mask = F.broadcast_greater_equal(x + self._eps, maxs)  # (B, num_anchor, num_gt)
            mask = F.sum(mask, axis=-1, keepdims=True)  # (B, num_anchor, 1)
        else:
            pmax = F.pick(x, pargmax, axis=-1, keepdims=True)   # (B, num_anchor, 1)
            mask = F.broadcast_greater_equal(pmax + self._eps, maxs)  # (B, num_anchor, num_gt)
            mask = F.pick(mask, pargmax, axis=-1, keepdims=True)  # (B, num_anchor, 1)
        new_match = F.where(mask > 0, pargmax, F.ones_like(pargmax) * -1)
        result = F.where(match[0] < 0, new_match.squeeze(axis=-1), match[0])
        return result

class MaximumMatcher(gluon.HybridBlock):
    def __init__(self, threshold):
        super(MaximumMatcher, self).__init__()
        self._threshold = threshold

    def hybrid_forward(self, F, x):
        argmax = F.argmax(x, axis=-1)
        match = F.where(F.pick(x, argmax, axis=-1) >= self._threshold, argmax,
                        F.ones_like(argmax) * -1)
        return match

Sampler（matcher）

class NaiveSampler(gluon.HybridBlock):
    def __init__(self):
        super(NaiveSampler, self).__init__()

    def hybrid_forward(self, F, x):
        """Hybrid forward"""
        marker = F.ones_like(x)
        y = F.where(x >= 0, marker, marker * -1)
        return y

class OHEMSampler(gluon.Block):
    def forward(self, x, logits, ious):
        """Forward"""
        F = nd
        num_positive = F.sum(x > -1, axis=1)
        num_negative = self._ratio * num_positive
        num_total = x.shape[1]  # scalar
        num_negative = F.minimum(F.maximum(self._min_samples, num_negative),
                                 num_total - num_positive)
        positive = logits.slice_axis(axis=2, begin=1, end=-1)
        background = logits.slice_axis(axis=2, begin=0, end=1).reshape((0, -1))
        maxval = positive.max(axis=2)
        esum = F.exp(logits - maxval.reshape((0, 0, 1))).sum(axis=2)
        score = -F.log(F.exp(background - maxval) / esum)
        mask = F.ones_like(score) * -1
        score = F.where(x < 0, score, mask)  # mask out positive samples
        if len(ious.shape) == 3:
            ious = F.max(ious, axis=2)
        score = F.where(ious < self._thresh, score, mask)  # mask out if iou is large
        argmaxs = F.argsort(score, axis=1, is_ascend=False)

        # neg number is different in each batch, using dynamic numpy operations.
        y = np.zeros(x.shape)
        y[np.where(x.asnumpy() >= 0)] = 1  # assign positive samples
        argmaxs = argmaxs.asnumpy()
        for i, num_neg in zip(range(x.shape[0]), num_negative.asnumpy().astype(np.int32)):
            indices = argmaxs[i, :num_neg]
            y[i, indices.astype(np.int32)] = -1  # assign negative samples
        return F.array(y, ctx=x.context)

cls_encoder（samples,matches,gt_ids）

class MultiClassEncoder(gluon.HybridBlock):
    def __init__(self, ignore_label=-1):
        super(MultiClassEncoder, self).__init__()
        self._ignore_label = ignore_label

    def hybrid_forward(self, F, samples, matches, refs):
        # samples (B, N) (+1, -1, 0: ignore), matches (B, N) [0, M), refs (B, M)
        # reshape refs (B, M) -> (B, 1, M) -> (B, N, M)
        refs = F.repeat(refs.reshape((0, 1, -1)), axis=1, repeats=matches.shape[1])
        # ids (B, N, M) -> (B, N), value [0, M + 1), 0 reserved for background class
        target_ids = F.pick(refs, matches, axis=2) + 1
        # samples 0: set ignore samples to ignore_label
        targets = F.where(samples > 0.5, target_ids, nd.ones_like(target_ids) * self._ignore_label)
        # samples -1: set negative samples to 0
        targets = F.where(samples < -0.5, nd.zeros_like(targets), targets)
        return targets

bbox_encoder（samples, matches, anchors, bboxs）

class NormalizedBoxCenterEncoder(gluon.Block):
    def __init__(self, stds=(0.1, 0.1, 0.2, 0.2), means=(0., 0., 0., 0.)):
        super(NormalizedBoxCenterEncoder, self).__init__()
        assert len(stds) == 4, "Box Encoder requires 4 std values."
        self._stds = stds
        self._means = means
        with self.name_scope():
            self.corner_to_center = BBoxCornerToCenter(split=True)

    def forward(self, samples, matches, anchors, refs):
        F = nd
        # TODO(zhreshold): batch_pick, take multiple elements?
        # refs [B, M, 4], anchors [B, N, 4], samples [B, N], matches [B, N]
        # refs [B, M, 4] -> reshape [B, 1, M, 4] -> repeat [B, N, M, 4]
        ref_boxes = F.repeat(refs.reshape((0, 1, -1, 4)), axis=1, repeats=matches.shape[1])
        # refs [B, N, M, 4] -> 4 * [B, N, M]
        ref_boxes = F.split(ref_boxes, axis=-1, num_outputs=4, squeeze_axis=True)
        # refs 4 * [B, N, M] -> pick from matches [B, N, 1] -> concat to [B, N, 4]
        ref_boxes = F.concat(*[F.pick(ref_boxes[i], matches, axis=2).reshape((0, -1, 1)) \
                               for i in range(4)], dim=2)
        # transform based on x, y, w, h
        # g [B, N, 4], a [B, N, 4] -> codecs [B, N, 4]
        g = self.corner_to_center(ref_boxes)
        a = self.corner_to_center(anchors)
        t0 = ((g[0] - a[0]) / a[2] - self._means[0]) / self._stds[0]
        t1 = ((g[1] - a[1]) / a[3] - self._means[1]) / self._stds[1]
        t2 = (F.log(g[2] / a[2]) - self._means[2]) / self._stds[2]
        t3 = (F.log(g[3] / a[3]) - self._means[3]) / self._stds[3]
        codecs = F.concat(t0, t1, t2, t3, dim=2)
        # samples [B, N] -> [B, N, 1] -> [B, N, 4] -> boolean
        temp = F.tile(samples.reshape((0, -1, 1)), reps=(1, 1, 4)) > 0.5
        # fill targets and masks [B, N, 4]
        targets = F.where(temp, codecs, F.zeros_like(codecs))
        masks = F.where(temp, F.ones_like(temp), F.zeros_like(temp))
        return targets, masks