Faster-Rcnn经过VGG16卷积得到的特征图后使用RPN网络产生候选框。首先在特征图每个点上产生9个框,然后用512个3×3的卷积核将特征图卷积到512维,特征图长宽尺度不变,再进行分类和回归。
分类是用1×1的卷积核卷到18维,特征图长宽尺度依旧不变,特征图每个点拥有18个通道,对应每个点产生的9个框的前景和背景的得分,一共19个score。边框回归卷积到36维,每个点有36个通道,对应9个边框的[x,y,width,height]。
# Build anchor component
self._anchor_component()
# Create RPN Layer
rpn = slim.conv2d(net, 512, [3, 3], trainable=is_training, weights_initializer=initializer, scope="rpn_conv/3x3")
self._act_summaries.append(rpn)
rpn_cls_score = slim.conv2d(rpn, self._num_anchors * 2, [1, 1], trainable=is_training, weights_initializer=initializer, padding='VALID', activation_fn=None, scope='rpn_cls_score')
# Change it so that the score has 2 as its channel size
rpn_cls_score_reshape = self._reshape_layer(rpn_cls_score, 2, 'rpn_cls_score_reshape')
rpn_cls_prob_reshape = self._softmax_layer(rpn_cls_score_reshape, "rpn_cls_prob_reshape")
rpn_cls_prob = self._reshape_layer(rpn_cls_prob_reshape, self._num_anchors * 2, "rpn_cls_prob")
rpn_bbox_pred = slim.conv2d(rpn, self._num_anchors * 4, [1, 1], trainable=is_training, weights_initializer=initializer, padding='VALID', activation_fn=None, scope='rpn_bbox_pred')
return rpn_cls_prob, rpn_bbox_pred, rpn_cls_score, rpn_cls_score_reshape