Faster R-CNN tf代码解析
训练阶段
_region_proposal函数
_region_proposal流程图如下:
各个变量的含义及维度如下表所示:
Name | Info | Dim |
---|---|---|
_num_anchors | 多尺度多ratio anchors的数目 | 9 |
rpn | RPN特征层 | n*w*h*cfg.RPN_CHANNELS |
rpn_cls_score | archors分类特征 | n*h*w*(9*2) |
rpn_cls_score_reshape | rpn_cls_score形变 | n*(h*9)*w*2 |
rpn_cls_prob_reshape | archors的类别概率 | n*(h*9)*w*2 |
rpn_cls_pred | archors的类别 | (n*h*9*w)*1 |
rpn_cls_prob | archors的类别概率 | n*h*w*(9*2) |
rpn_bbox_pred | archors的回归位置偏移 | n*h*w*(9*4) |
rois | 候选ROI区域(类别+坐标) | n*256*5 |
roi_scores | 初选(post_nms_topN)rois在rpn_cls_score中对应值 | n*post_nms_topN*1 |
def _region_proposal(self, net_conv, is_training, initializer):
rpn = slim.conv2d(net_conv, cfg.RPN_CHANNELS, [3, 3], trainable=is_training, weights_initializer=initializer, scope="rpn_conv/3x3")
self._act_summaries.append(rpn)
# anchor classification
rpn_cls_score = slim.conv2d(rpn, self._num_anchors * 2, [1, 1], trainable=is_training, weights_initializer=initializer, padding='VALID', activation_fn=None, scope='rpn_cls_score')
rpn_cls_score_reshape = self._reshape_layer(rpn_cls_score, 2, 'rpn_cls_score_reshape')
rpn_cls_prob_reshape = self._softmax_layer(rpn_cls_score_reshape, "rpn_cls_prob_reshape")
rpn_cls_pred = tf.argmax(tf.reshape(rpn_cls_score_reshape, [-1, 2]), axis=1, name="rpn_cls_pred")
rpn_cls_prob = self._reshape_layer(rpn_cls_prob_reshape, self._num_anchors * 2, "rpn_cls_prob")
## bounding box regression
rpn_bbox_pred = slim.conv2d(rpn, self._num_anchors * 4, [1, 1], trainable=is_training, weights_initializer=initializer, padding='VALID', activation_fn=None, scope='rpn_bbox_pred')
if is_training:
# output:post_nms_topN=2000个archors的位置(包括全0的batch_inds)及为1的概率
rois, roi_scores = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois")
# rpn_labels:特征图中每个位置对应的是正样本、负样本还是不关注
rpn_labels = self._anchor_target_layer(rpn_cls_score, "anchor")
# Try to have a deterministic order for the computing graph, for reproducibility
with tf.control_dependencies([rpn_labels]):
#通过post_nms_topN个archors的位置及为1(正样本)的概率得到256个rois(第一列的全0更新为每个archors对应的类别)及对应信息
rois, _ = self._proposal_target_layer(rois, roi_scores, "rpn_rois")
else:
if cfg.TEST.MODE == 'nms':
# 每个位置的9个archors的类别概率和每个位置的9个archors的回归位置偏移得到post_nms_topN=300个archors的位置(包括全0的batch_inds)及为1的概率
rois, _ = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois")
elif cfg.TEST.MODE == 'top':
rois, _ = self._proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, "rois")
else:
raise NotImplementedError
self._predictions["rpn_cls_score"] = rpn_cls_score
self._predictions["rpn_cls_score_reshape"] = rpn_cls_score_reshape
self._predictions["rpn_cls_prob"] = rpn_cls_prob
self._predictions["rpn_cls_pred"] = rpn_cls_pred
self._predictions["rpn_bbox_pred"] = rpn_bbox_pred
self._predictions["rois"] = rois
return rois # 返回256个archors的类别(第一维,训练时为每个archors的类别,测试时全0)及位置(后四维)
def _reshape_layer(self, bottom, num_dim, name):
input_shape = tf.shape(bottom)
with tf.variable_scope(name) as scope:
# NHWC(TF数据格式)变成NCHW(caffe格式)
to_caffe = tf.transpose(bottom, [0, 3, 1, 2])
# 1*(num_dim*9)*h*w==>1*num_dim*(9*h)*w
reshaped = tf.reshape(to_caffe, tf.concat(axis=0, values=[[1, num_dim, -1], [input_shape[2]]]))
# NCHW(caffe格式)变成NHWC(TF数据格式) 1*num_dim*(9*h)*w --> 1*(9*h)*w*num_dim
to_tf = tf.transpose(reshaped, [0, 2, 3, 1])
return to_tf
def _softmax_layer(self, bottom, name):
if name.startswith('rpn_cls_prob_reshape'):
# bottom:1*(h*9)*w*2
input_shape = tf.shape(bottom)
# 只保留最后一维,用于计算softmax的概率,其他的全合并:1*(h*9)*w*2==>(1*h*9*w)*2
bottom_reshaped = tf.reshape(bottom, [-1, input_shape[-1]])
# 得到所有特征的概率
reshaped_score = tf.nn.softmax(bottom_reshaped, name=name)
# 恢复到bottom的shape,(1*h*9*w)*2==>1*(h*9)*w*2
return tf.reshape(reshaped_score, input_shape)
return tf.nn.softmax(bottom, name=name)
参考网址:
https://www.cnblogs.com/darkknightzh/p/10043864.html
论文:https://arxiv.org/abs/1506.01497
tf的第三方faster rcnn:https://github.com/endernewton/tf-faster-rcnn