目录
參考自:
详细的Faster R-CNN源码解析之proposal_layer和proposal_target_layer源码解析
详细的Faster R-CNN源码解析之RPN源码解析
Tensorflow 版本 Faster RCNN 代码解读
Faster R-CNN 源码解析(Tensorflow版)
_build_network
里面包含了_region_proposal,_crop_pool_layer,_reigon_classification三个主要的方法,分别表示了方法最主要的流程走向,生成推荐区域,crop,再判定
先上源码及注释:
def _build_network(self, is_training=True):
# select initializers
# 里面包含了_region_proposal,_crop_pool_layer,_reigon_classification
# 三个主要的方法,分别表示了方法最主要的流程走向,生成推荐区域,crop,再判定
if cfg.TRAIN.TRUNCATED:
# 从截断的正态分布中获取随机值
# 从具有指定平均值和标准偏差的正态分布,如果生成的值大于平均值2个标准偏差的值则丢弃重新选择。
initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01)
initializer_bbox = tf.truncated_normal_initializer(mean=0.0, stddev=0.001)
else:
initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01)
initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001)
net_conv = self._image_to_head(is_training)
with tf.variable_scope(self._scope, self._scope):
# variable_scope建立一个变量的作用域
# build the anchors for the image
self._anchor_component()
# 建立anchor
# region proposal network
rois = self._region_proposal(net_conv, is_training, initializer)
# region of interest pooling
if cfg.POOLING_MODE == 'crop':
pool5 = self._crop_pool_layer(net_conv, rois, "pool5")
# 对生成的rois进行roi池化
else:
raise NotImplementedError
fc7 = self._head_to_tail(pool5, is_training)
with tf.variable_scope(self._scope, self._scope):
# region classification
cls_prob, bbox_pred = self._region_classification(fc7, is_training,
initializer, initializer_bbox)
self._score_summaries.update(self._predictions)
return rois, cls_prob, bbox_pred
_anchor_component
_anchor_component====>generate_anchors_pre_tf=====>generate_anchors
最终的目的是先根据(0,0,15,15)
生成一组anchor,然后进行平移得到最后的anchor
# array([[ -83., -39., 100., 56.],
# [-175., -87., 192., 104.],
# [-359., -183., 376., 200.],
# [ -55., -55., 72., 72.],
# [-119., -119., 136., 136.],
# [-247., -247., 264., 264.],
# [ -35., -79., 52., 96.],
# [ -79., -167., 96., 184.],
# [-167., -343., 184., 360.]])
画出来:
_region_proposal
def _region_proposal(self, net_conv, is_training, initializer):
'''
:param net_conv:
:param is_training:
:param initializer:
:return:
'''
rpn = slim.conv2d(net_conv, cfg.RPN_CHANNELS, [3, 3], trainable=is_training, weights_initializer=initializer,
scope="rpn_conv/3x3")
# 第一层:3*3的卷积层,shape=(1,?,?,512)
self._act_summaries.append(rpn)
rpn_cls_score = slim.conv2d(rpn, self._num_anchors * 2, [1, 1], trainable=is_training,
weights_initializer=initializer,
padding='VALID', activation_fn=None, scope='rpn_cls_score')
# shape=(1,?,?,18),9个anchor,一个anchor两个分数
# change it so that the score has 2 as its channel size
rpn_cls_score_reshape = self._reshape_layer(rpn_cls_score, 2, 'rpn_cls_score_reshape')
# shape=(1,?,?,2)
rpn_cls_prob_reshape = self._softmax_layer(rpn_cls_score_reshape, "rpn_cls_prob_reshape")
# shape=(1,?,?,2)softmax,用于判断bbox中是否含有物体
rpn_cls_pred = tf.argmax(tf.reshape(rpn_cls_score_reshape, [-1, 2]), axis=1, name="rpn_cls_pred")
# 返回最大的值所在的下标,是/否含有物体
rpn_cls_prob = self._reshape_layer(rpn_cls_prob_reshape, self._num_anchors * 2, "rpn_cls_prob")
# shape=(1,?,?,36)
#####################################################################################
# 预测bbox的坐标,(height,width,9*4)
rpn_bbox_pred = slim.conv2d(rpn, self._num_anchors * 4, [1, 1], trainable=is_training,
weights_initializer=initializer,
padding='VALID', activation_fn=None, scope='rpn_bbox_pred')
上面这一部分搭建了rpn的结构:
下面的部分主要是根据RPN输出的前景分数选择出roi和为选择出的roi置ground truth类别和坐标变换的代码
首先是如何选择出合适的rois,对应的函数是_proposal_layer;
其次是如何为选择出的rois找到训练所需的ground truth类别和坐标变换信息,该代码文件是proposal_target_layer.py
先放出_region_proposal中剩余部分的代码,然后再对这两部分进行讲解。
if is_training:
rois, roi_scores = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois")
# rois是bbox的角点坐标,size=(?,5),roi_scores是bbox对应的分数
# 处理anchor,裁剪,nms,筛选
rpn_labels = self._anchor_target_layer(rpn_cls_score, "anchor")
# # rpn_labels是标签值,1,0,-1
# Try to have a deterministic order for the computing graph, for reproducibility
with tf.control_dependencies([rpn_labels]):
rois, _ = self._proposal_target_layer(rois, roi_scores, "rpn_rois")
else:
if cfg.TEST.MODE == 'nms':
rois, _ = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois")
elif cfg.TEST.MODE == 'top':
rois, _ = self._proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, "rois")
else:
raise NotImplementedError
self._predictions["rpn_cls_score"] = rpn_cls_score
self._predictions["rpn_cls_score_reshape"] = rpn_cls_score_reshape
self._predictions["rpn_cls_prob"] = rpn_cls_prob
self._predictions["rpn_cls_pred"] = rpn_cls_pred
self._predictions["rpn_bbox_pred"] = rpn_bbox_pred
self._predictions["rois"] = rois
return rois
proposal_layer的代码如下
使用bbox_transform_inv
函数对anchor进行坐标变换,从ctr_x,ctr_y,w,h变换成anchor的左下角和右上角;
使用clip_boxes
将改变坐标信息后超过图像边界的框的边框裁剪一下,使之在图像边界之内
pre_nms_topN
,post_nms_topN
,nms_thresh
是用来筛选anchors的三个指标:
pre_nms_topN
取的6000,将scores
降序排序,取前pre_nms_topN
;- 用
nms
函数进行非极大值抑制,把ioU大于nms_thresh
的删掉。 - 再次按照nms后的foreground softmax由大到小排列,提取前
post_nms_topN
(300)结果作为proposals
的输出
proposal_layer
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors):
'''
A simplified version compared to fast/er RCNN
For details please see the technical report
:param rpn_cls_prob:
:param rpn_bbox_pred:
:param im_info: [M,N,scale_factor]保存了将任意图像缩放到M×N的所有信息
:param cfg_key:
:param _feat_stride:feat_stride=16用于计算anchor的偏移量
:param anchors:
:param num_anchors:
:return:
'''
if type(cfg_key) == bytes:
cfg_key = cfg_key.decode(