Faster Rcnn 源码解析（二）—— proposals_layers.py

最新推荐文章于 2023-05-09 16:46:17 发布

傲娇的程序猿

最新推荐文章于 2023-05-09 16:46:17 发布

阅读量543

点赞数

分类专栏：深度学习文章标签：深度学习 FasterRcnn 源码 proposals_layers.py

本文链接：https://blog.csdn.net/qq_23126625/article/details/80337703

版权

深度学习专栏收录该内容

10 篇文章 0 订阅

订阅专栏

ProposalLayer层

功能：利用NMS提取前topN proposals

输入：

ProposalLayer有三个输入
bottom[0]：是每一个anchor是否是前景的概率，大小为：(batch_size,2*A,w,h)，A表示anchor的数量
bottom[1]: 每个anchor的坐标偏移量，大小为：(batch_size,4*A,w,h)
bottom[2]: 'im_info',图片w,h,scale

输出：

top[0]:M行5列，M表示proposals的个数（batch_ind,start_w,start_h,end_w,end_h），batch_ind表示属于batch img的哪一张图片，这都是0
top[1]:M行1列，每个proposals的score，这里只有属于fg的proposals才有得分。

源码：

import caffe
import numpy as np
import yaml
from fast_rcnn.config import cfg
from generate_anchors import generate_anchors
from fast_rcnn.bbox_transform import bbox_transform_inv, clip_boxes
from fast_rcnn.nms_wrapper import nms

DEBUG = False

class ProposalLayer(caffe.Layer):
    """
    Outputs object detection proposals by applying estimated bounding-box
    transformations to a set of regular boxes (called "anchors").
    """
    #setup这个函数从配置文件中读取配置信息，
    # 包括共享卷积层最后一层的feature map 相对于原始图像的缩放比例'
    #以及anchor的宽高比和基准尺寸之类的,和anchor_target_layer一样'
    def setup(self, bottom, top):
        # parse the layer parameter string, which must be valid YAML
        layer_params = yaml.load(self.param_str_)

        self._feat_stride = layer_params['feat_stride']
        anchor_scales = layer_params.get('scales', (8, 16, 32))
        self._anchors = generate_anchors(scales=np.array(anchor_scales))
        self._num_anchors = self._anchors.shape[0]

        if DEBUG:
            print 'feat_stride: {}'.format(self._feat_stride)
            print 'anchors:'
            print self._anchors

        # rois blob: holds R regions of interest, each is a 5-tuple
        # (n, x1, y1, x2, y2) specifying an image batch index （？）n and a
        # rectangle (x1, y1, x2, y2)
        top[0].reshape(1, 5)

        # scores blob: holds scores for R regions of interest
        if len(top) > 1:
            top[1].reshape(1, 1, 1, 1)

    def forward(self, bottom, top):
        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        assert bottom[0].data.shape[0] == 1, \
            'Only single item batches are supported'

        cfg_key = str(self.phase) # either 'TRAIN' or 'TEST'
        pre_nms_topN  = cfg[cfg_key].RPN_PRE_NMS_TOP_N #做NMS之前的proposals
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N #做NMS之后的proposals
        nms_thresh    = cfg[cfg_key].RPN_NMS_THRESH #做MNS的阈值
        min_size      = cfg[cfg_key].RPN_MIN_SIZE#proposals的最小尺寸，相对于原图

        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs, which we want
        scores = bottom[0].data[:, self._num_anchors:, :, :]#大小为：(batch_size,2*k,w,h)，从第9个开始，到最后。
        bbox_deltas = bottom[1].data#与最高IOU的偏移量。
        im_info = bottom[2].data[0, :]

        if DEBUG:
            print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
            print 'scale: {}'.format(im_info[2])

        # 1. Generate proposals from bbox deltas and shifted anchors
        height, width = scores.shape[-2:]#feature map的大小

        if DEBUG:
            print 'score map size: {}'.format(scores.shape)

        # Enumerate all shifts
        #将anchor的位置映射回原图
        shift_x = np.arange(0, width) * self._feat_stride
        shift_y = np.arange(0, height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                            shift_x.ravel(), shift_y.ravel())).transpose()

        # Enumerate all shifted anchors:
        #
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = self._num_anchors
        K = shifts.shape[0]
        anchors = self._anchors.reshape((1, A, 4)) + \
                  shifts.reshape((1, K, 4)).transpose((1, 0, 2))
        #得到了所有anchors的（x,y,w,h）信息
        anchors = anchors.reshape((K * A, 4))

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:
        #
        # bbox deltas will be (1, 4 * A, H, W) format
        # transpose to (1, H, W, 4 * A)
        # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
        # in slowest to fastest order
        #相当于按照一个h,w,a 的三重for循环的格式存放bbox_deltas,和scores
        bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

        # Same story for the scores:
        #
        # scores are (1, A, H, W) format
        # transpose to (1, H, W, A)
        # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
        scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

        # Convert anchors into proposals via bbox transformations
        # 根据anchors和bbox_deltas进行坐标偏移，经过这一步之后anchor边成了proposals
        # shape=(1 * H * W * A, 4)
        proposals = bbox_transform_inv(anchors, bbox_deltas)#bbox_transform_inv请戳博主另一篇博客FsaterRcnn源码解析（）
        # 2. clip predicted boxes to image
        # 然后 将proposal限制在原图像范围之内
        proposals = clip_boxes(proposals, im_info[:2])#clip_boxes请戳博主另一篇博客FsaterRcnn源码解析（三）
        # 3. remove predicted boxes with either height or width < threshold
        # (NOTE: convert min_size to input image scale stored in im_info[2])
        #去除不满足最小尺寸的限制的proposals，这里最小的尺寸是16*16？？？
        keep = _filter_boxes(proposals, min_size * im_info[2])
        proposals = proposals[keep, :]
        scores = scores[keep]

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        #将索引值按照scores由大到小排序
        order = scores.ravel().argsort()[::-1]
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]#在NMS之前取pre_nms_topN个proposa
        scores = scores[order]

        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        keep = nms(np.hstack((proposals, scores)), nms_thresh)#进行NMS
        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]
        proposals = proposals[keep, :]#取NMS之后的post_nms_topN
        scores = scores[keep]#获取对应分数

        # Output rois blob
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0,
        # 获取batch_size的索引，将proposal对应到batch_size的那一张图片。RPN只支持单张图片的输入，所以batch inds 都是 0 ；
        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
        blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
        top[0].reshape(*(blob.shape))
        top[0].data[...] = blob

        # [Optional] output scores blob
        if len(top) > 1:
            top[1].reshape(*(scores.shape))
            top[1].data[...] = scores

    def backward(self, top, propagate_down, bottom):
        """This layer does not propagate gradients."""
        pass

    def reshape(self, bottom, top):
        """Reshaping happens during the call to forward."""
        pass

def _filter_boxes(boxes, min_size):
    """Remove all boxes with any side smaller than min_size."""
    ws = boxes[:, 2] - boxes[:, 0] + 1
    hs = boxes[:, 3] - boxes[:, 1] + 1
    keep = np.where((ws >= min_size) & (hs >= min_size))[0]
    return keep

傲娇的程序猿

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
Faster Rcnn 源码解析（二）—— proposals_layers.py

输入：ProposalLayer有三个输入bottom[0]：是每一个anchor是否是前景的概率，大小为：(batch_size,2*A,w,h)，A表示anchor的数量bottom[1]: 每个anchor的坐标偏移量，大小为：(batch_size,4*A,w,h)bottom[2]: 'im_info',图片w,h,scale输出：top[0]:M行5列，M表示proposals的...
复制链接

扫一扫