FPN中的proposal_layer(网络中的proposal)

最后有nms的实现,看看

layer {

  name: 'proposal'
  type: 'Python'
    bottom: 'im_info'
    bottom: 'rpn_bbox_pred/p2'
    bottom: 'rpn_bbox_pred/p3'
bottom: 'rpn_bbox_pred/p4'
bottom: 'rpn_bbox_pred/p5'
bottom: 'rpn_bbox_pred/p6'       rpn_bbox_pred是通过 conv层产生的偏移量(proposal vs anchor)详情见rpn_data
  bottom: 'fpn_out_reshape/p2'
bottom: 'fpn_out_reshape/p3'
bottom: 'fpn_out_reshape/p4'
bottom: 'fpn_out_reshape/p5'
bottom: 'fpn_out_reshape/p6'    fpn_out_reshape是通过softmax层fpn_out产生的cls score
  top: 'rpn_rois'
  python_param {
    module: 'rpn.proposal_layer'
    layer: 'ProposalLayer'
    param_str: "'feat_stride': 4,8,16,32,64"


  }

}



# --------------------------------------------------------

# Faster R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick and Sean Bell
# --------------------------------------------------------


import caffe
import numpy as np
import yaml
from fast_rcnn.config import cfg
from generate_anchors import generate_anchors
from fast_rcnn.bbox_transform import bbox_transform_inv, clip_boxes
from fast_rcnn.nms_wrapper import nms
import numpy.random as npr


DEBUG = False
def vis_all_detection(im_array, detections, class_names, scale):
    """
    visualize all detections in one image
    :param im_array: [b=1 c h w] in rgb
    :param detections: [ numpy.ndarray([[x1 y1 x2 y2 score]]) for j in classes ]
    :param class_names: list of names in imdb
    :param scale: visualize the scaled image
    :return:
    """
   # print im_array.shape
    import matplotlib  
    matplotlib.use('Agg') 
    import matplotlib.pyplot as plt
    from matplotlib.pyplot import savefig  
    import random
    a =  [103.06 ,115.9 ,123.15]
    a = np.array(a)
    im = transform_inverse(im_array,a)
    plt.imshow(im)
    for j in range(detections.shape[0]):
        # if class_names[j] == 0:
        #     continue
        color = (random.random(), random.random(), random.random())  # generate a random color
        dets = detections[j]
        det =dets
        bbox = det[0:] 
        score = det[0]
        rect = plt.Rectangle((bbox[0], bbox[1]),
                                 bbox[2] - bbox[0],
                                 bbox[3] - bbox[1], fill=False,
                                 edgecolor=color, linewidth=3.5)
        plt.gca().add_patch(rect)
        # plt.gca().text(bbox[0], bbox[1] - 2,
        #                    '{:s} {:.3f}'.format(str(class_names[j]), score),
        #                    bbox=dict(facecolor=color, alpha=0.5), fontsize=12, color='white')
    plt.show()
    name = np.mean(im)
    savefig ('vis/'+str(name)+'.png')
    plt.clf()
    plt.cla()


    plt. close(0)


class ProposalLayer(caffe.Layer):
    """
    Outputs object detection proposals by applying estimated bounding-box
    transformations to a set of regular boxes (called "anchors").
    """


    def setup(self, bottom, top):
        # parse the layer parameter string, which must be valid YAML
        layer_params = yaml.load(self.param_str_)


        
        self._feat_stride = [int(i) for i in layer_params['feat_stride'].split(',')]
        self._scales = cfg.FPNRSCALES      默认2 ** np.arange(4,6)
    
        self._ratios = cfg.FPNRATIOS    默认[0.5,1,2]
        self._min_sizes = 16
        self._num_anchors = len(self._scales)*len(self._ratios)
        self._output_score = False






        if DEBUG:
            print 'feat_stride: {}'.format(self._feat_stride)
            print 'anchors:'
            print self._anchors


        # rois blob: holds R regions of interest, each is a 5-tuple
        # (n, x1, y1, x2, y2) specifying an image batch index n and a
        # rectangle (x1, y1, x2, y2)
        top[0].reshape(1, 5)    输出矩形框


        # scores blob: holds scores for R regions of interest
        if len(top) > 1:
            top[1].reshape(1, 1, 1, 1)    输出score


    def forward(self, bottom, top):
        cfg_key = str(self.phase) # either 'TRAIN' or 'TEST'
        pre_nms_topN  = cfg[cfg_key].RPN_PRE_NMS_TOP_N      默认12000/6000   train/test
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N    默认2000/1000
        nms_thresh    = cfg[cfg_key].RPN_NMS_THRESH    默认0.7
        min_size = self._min_sizes  
        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs, which we want
    

        im_info = bottom[0].data[0, :]   对应'im_info',来自roi_data_layer的minibatch.py

         # blobs['im_info'] = np.array(
            #[[im_blob.shape[2], im_blob.shape[3], im_scales[0]]],
            #dtype=np.float32)

            batch_size = bottom[1].data.shape[0]        if batch_size > 1:
            raise ValueError("Sorry, multiple images each device is not implemented")


        cls_prob_dict = {                                     fpn_out_reshape是通过softmax层fpn_out产生的cls score

            'stride64': bottom[10].data,             'fpn_out_reshape/p6
            'stride32': bottom[9].data,                'fpn_out_reshape/p5'
            'stride16': bottom[8].data,                'fpn_out_reshape/p4'
            'stride8': bottom[7].data,                  'fpn_out_reshape/p3'
            'stride4': bottom[6].data,                  'fpn_out_reshape/p2'
        }
        bbox_pred_dict = {                                  不同尺寸对应的预测框偏移量,后面会用来生成预测框
            'stride64': bottom[5].data,                  'rpn_bbox_pred/p6'
            'stride32': bottom[4].data,                  'rpn_bbox_pred/p5'
            'stride16': bottom[3].data,                  'rpn_bbox_pred/p4'
            'stride8': bottom[2].data,                    'rpn_bbox_pred/p3'
            'stride4': bottom[1].data,                    'rpn_bbox_pred/p2'
        }
      
        proposal_list = []
        score_list = []
        for s in self._feat_stride:

            stride = int(s)

 原图第一个位置的9个anchor

            sub_anchors = generate_anchors(base_size=stride, scales=self._scales, ratios=self._ratios)
    
            scores = cls_prob_dict['stride' + str(s)][:, self._num_anchors:, :, :]
            bbox_deltas = bbox_pred_dict['stride' + str(s)]

              bbox_deltas应该是偏移量(中心+长宽),与rpn_bbox_pred层的权重有关。之后通过这个偏移以及原图anchor算出预测框proposal(bbox_transform_inv函数)。后面更新的应该是rpn_bbox_pred的权重

            # 1. Generate proposals from bbox_deltas and shifted anchors
            # use real image size instead of padded feature map sizes
            height, width = int(im_info[0] / stride), int(im_info[1] / stride)    原图尺寸/stride = 当前feature map尺寸


            # Enumerate all shifts  映射到原图需要的偏移,用来生成anchor
            shift_x = np.arange(0, width) * stride            遍历feature map上每个点,*stride为原图上对应位置
            shift_y = np.arange(0, height) * stride
            shift_x, shift_y = np.meshgrid(shift_x, shift_y)
            shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()


            # Enumerate all shifted anchors:
            #
            # add A anchors (1, A, 4) to
            # cell K shifts (K, 1, 4) to get
            # shift anchors (K, A, 4)
            # reshape to (K*A, 4) shifted anchors
            A = self._num_anchors
            K = shifts.shape[0]

    

           在原图产生所有anchor

            anchors = sub_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))
            anchors = anchors.reshape((K * A, 4))


            # Transpose and reshape predicted bbox transformations to get them
            # into the same order as the anchors:
            #
            # bbox deltas will be (1, 4 * A, H, W) format
            # transpose to (1, H, W, 4 * A)
            # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
            # in slowest to fastest order
            bbox_deltas = _clip_pad(bbox_deltas, (height, width))
            bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))


            # Same story for the scores:
            #
            # scores are (1, A, H, W) format
            # transpose to (1, H, W, A)
            # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
            scores = _clip_pad(scores, (height, width))
            scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))


            # Convert anchors into proposals via bbox transformations

            偏移量+anchor = 预测框 proposal

            proposals = bbox_transform_inv(anchors, bbox_deltas)


            # 2. clip predicted boxes to image

            切边

            proposals = clip_boxes(proposals, im_info[:2])

            去掉小框
            # 3. remove predicted boxes with either height or width < threshold
            # (NOTE: convert min_size to input image scale stored in im_info[2])
            keep = _filter_boxes(proposals, min_size * im_info[2])
            proposals = proposals[keep, :]
            scores = scores[keep]


            proposal_list.append(proposals)
            score_list.append(scores)


        proposals = np.vstack(proposal_list)
        scores = np.vstack(score_list)

        按score排序后nms
        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        order = scores.ravel().argsort()[::-1]
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]


        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        det = np.hstack((proposals, scores)).astype(np.float32)
        keep = nms(det,nms_thresh)
        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]
        # pad to ensure output size remains unchanged
        if len(keep) < post_nms_topN:
            pad = npr.choice(keep, size=post_nms_topN - len(keep))
            keep = np.hstack((keep, pad))

        多退少补
        # pad to ensure output size remains unchanged
        if len(keep) < post_nms_topN:
            try:
                pad = npr.choice(keep, size=post_nms_topN - len(keep))
            except:
                proposals = np.zeros((post_nms_topN, 4), dtype=np.float32)
                proposals[:,2] = 16
                proposals[:,3] = 16
                batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
                blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))  预留第一列位置全0
                top[0].reshape(*(blob.shape))
                top[0].data[...] = blob
                return      
            keep = np.hstack((keep, pad))
           
        proposals = proposals[keep, :]
        scores = scores[keep]


        # Output rois array
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
        blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
        # if is_train:
    
        top[0].reshape(*(blob.shape))
        top[0].data[...] = blob
            


    def backward(self, top, propagate_down, bottom):
        """This layer does not propagate gradients."""
        pass


    def reshape(self, bottom, top):
        """Reshaping happens during the call to forward."""
        pass




def _filter_boxes(boxes, min_size):
    """ Remove all boxes with any side smaller than min_size """
    ws = boxes[:, 2] - boxes[:, 0] + 1
    hs = boxes[:, 3] - boxes[:, 1] + 1
    keep = np.where((ws >= min_size) & (hs >= min_size))[0]
    return keep
def _clip_pad(tensor, pad_shape):
    """
    Clip boxes of the pad area.
    :param tensor: [n, c, H, W]
    :param pad_shape: [h, w]
    :return: [n, c, h, w]
    """
    H, W = tensor.shape[2:]
    h, w = pad_shape


    if h < H or w < W:
        tensor = tensor[:, :, :h, :w].copy()


    return tensor


def bbox_transform_inv(boxes, deltas):
    if boxes.shape[0] == 0:
        return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype)


    boxes = boxes.astype(deltas.dtype, copy=False)


    widths = boxes[:, 2] - boxes[:, 0] + 1.0
    heights = boxes[:, 3] - boxes[:, 1] + 1.0
    ctr_x = boxes[:, 0] + 0.5 * widths
    ctr_y = boxes[:, 1] + 0.5 * heights


    dx = deltas[:, 0::4]
    dy = deltas[:, 1::4]
    dw = deltas[:, 2::4]
    dh = deltas[:, 3::4]


    
    pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis]
    pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis]
    pred_w = np.exp(dw) * widths[:, np.newaxis]
    pred_h = np.exp(dh) * heights[:, np.newaxis]  
  
      
  




    pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype)
    # x1
    pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w
    # y1
    pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h
    # x2
    pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w
    # y2
    pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h


    return pred_boxes



def py_cpu_nms(dets, thresh):
    """Pure Python NMS baseline."""
    x1 = dets[:, 0]
    y1 = dets[:, 1]
    x2 = dets[:, 2]
    y2 = dets[:, 3]
    scores = dets[:, 4]


    areas = (x2 - x1 + 1) * (y2 - y1 + 1)
    order = scores.argsort()[::-1]


    keep = []
    while order.size > 0:
        i = order[0]
        keep.append(i)
        xx1 = np.maximum(x1[i], x1[order[1:]])      与其余框比较
        yy1 = np.maximum(y1[i], y1[order[1:]])
        xx2 = np.minimum(x2[i], x2[order[1:]])
        yy2 = np.minimum(y2[i], y2[order[1:]])


        w = np.maximum(0.0, xx2 - xx1 + 1)
        h = np.maximum(0.0, yy2 - yy1 + 1)
        inter = w * h
        ovr = inter / (areas[i] + areas[order[1:]] - inter) 


        inds = np.where(ovr <= thresh)[0]      inds是连续的,且inds[0]对应order[0]下一个位置(score仅次于order[0]),所以要+1

        order = order[inds + 1]


    return keep

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值