"""
#注意在这个位置就生成了预置框了_anchors= generate_anchors(scales=np.array(anchor_scales))
_num_anchors= _anchors.shape[0]im_info= im_info[0]
assertrpn_cls_prob_reshape.shape[0] == 1,'Only single item batches are supported'# cfg_key = str(self.phase) # either 'TRAIN' or 'TEST'# cfg_key = 'TEST'pre_nms_topN= cfg[cfg_key].RPN_PRE_NMS_TOP_Npost_nms_topN= cfg[cfg_key].RPN_POST_NMS_TOP_N
#这个阈值很重要nms_thresh= cfg[cfg_key].RPN_NMS_THRESH
min_size= cfg[cfg_key].RPN_MIN_SIZE
# the first set of _num_anchors channels are bg probs# the second set are the fg probs, which we wantscores= rpn_cls_prob_reshape[:, _num_anchors:, :, :]bbox_deltas= rpn_bbox_pred# im_info = bottom[2].data[0, :]
# 1. Generate proposals from bbox deltas and shifted anchorsheight,width = scores.shape[-2:]
# Enumerate all shiftsshift_x= np.arange(0, width) * _feat_strideshift_y= np.arange(0, height) * _feat_strideshift_x,shift_y = np.meshgrid(shift_x, shift_y)shifts= np.vstack((shift_x.ravel, shift_y.ravel,shift_x.ravel,shift_y.ravel)).transpose
# Enumerate all shifted anchors:## add A anchors (1, A, 4) to# cell K shifts (K, 1, 4) to get# shift anchors (K, A, 4)# reshape to (K*A, 4) shifted anchorsA= _num_anchorsK= shifts.shape[0]anchors= _anchors.reshape((1, A, 4)) +shifts.reshape((1, K, 4)).transpose((1, 0, 2))anchors= anchors.reshape((K * A, 4))
# Transpose and reshape predicted bbox transformations to get them# into the same order as the anchors:## bbox deltas will be (1, 4 * A, H, W) format# transpose to (1, H, W, 4 * A)# reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)# in slowest to fastest orderbbox_deltas= bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))
# Same story for the scores:## scores are (1, A, H, W) format# transpose to (1, H, W, A)# reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)scores= scores.transpose((0, 2, 3, 1)).reshape((-1, 1))
# Convert anchors into proposals via bbox transformationsproposals= bbox_transform_inv(anchors, bbox_deltas)
# 2. clip predicted boxes to imageproposals= clip_boxes(proposals, im_info[:2])
# 3. remove predicted boxes with either height or width < threshold# (NOTE:convert min_size to input image scale stored in im_info[2])keep= _filter_boxes(proposals, min_size * im_info[2])proposals= proposals[keep, :]scores= scores[keep]
# # remove irregular boxes, too fat too tall# keep = _filter_irregular_boxes(proposals)# proposals = proposals[keep, :]# scores = scores[keep]
# 4. sort all (proposal, score) pairs by score from highest to lowest# 5. take top pre_nms_topN (e.g. 6000)order= scores.ravel.argsort[::-1]ifpre_nms_topN > 0:order= order[:pre_nms_topN]proposals= proposals[order, :]scores= scores[order]
# 6. apply nms (e.g. threshold = 0.7)# 7. take after_nms_topN (e.g. 300)# 8. return the top proposals (-> RoIs top)keep= nms(np.hstack((proposals, scores)), nms_thresh)
ifpost_nms_topN > 0:keep= keep[:post_nms_topN]proposals= proposals[keep, :]scores= scores[keep]
# Output rois blob# Our RPN implementation only supports a single input image, so all# batch inds are 0batch_inds= np.zeros((proposals.shape[0], 1), dtype=np.float32)blob= np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))returnblob# top[0].reshape(*(blob.shape))# top[0].data[...] = blob
# [Optional] output scores blob# if len(top) > 1:# top[1].reshape(*(scores.shape))# top[1].data[...] = scores
Generate Anchors
在上一步的 proposal layer 中我们会发现,anchor 的生成过程处于 region proposal 流程的最前端,那 generate 到底干了些什么呢?首先从 BackBone 输入影像,到输出 featuremap 由于在取卷积的过程中的一些非padding 的操作使得数据层尺寸越来越小,典型的 800^2 经过 VGG 下采样后尺寸为 50^2,这个阶段我们先用简单的代码来研究这其中的原理
我们使用 锚点缩放参数 8,16,32 长宽比 0.5,1,2 下采样倍数为 16 现在每个 featuremap 上的像素都映射了原图 1616 像素的区域,如上图所示 1.我们首先需要生成在这个 1616 像素的顶端生成锚点框,然后沿着 xy 轴去生成所有锚点框
import numpy as npsub_sample=16ratio = [0.5, 1, 2]anchor_scales = [8, 16, 32]anchor_base = np.zeros((len(ratios) * len(scales), 4), dtype=np.float32)ctr_y = sub_sample / 2.ctr_x = sub_sample / 2.print(ctr_y, ctr_x)for i in range(len(ratios)):for j in range(len(anchor_scales)):h = sub_sample * anchor_scales[j] * np.sqrt(ratios[i])w = sub_sample * anchor_scales[j] * np.sqrt(1./ ratios[i])index = i * len(anchor_scales) + janchor_base[index, 0] = ctr_y - h / 2.anchor_base[index, 1] = ctr_x - w / 2.anchor_base[index, 2] = ctr_y + h / 2.anchor_base[index, 3] = ctr_x + w / 2.
以上的输出为featuremap上第一个像素位置的anchor,
我们必须依照这个流程生成所有像素位置上的anchor:
2.在 feature map 上的每个像素位置,我们需要生成