【tensorflow + Faster RCNN】anchor_target_layer、proposal_target_layer、proposal_layer

最新推荐文章于 2020-12-27 22:51:12 发布

Mr_health

最新推荐文章于 2020-12-27 22:51:12 发布

阅读量4k

点赞数 10

本文链接：https://blog.csdn.net/mr_health/article/details/84952190

版权

tensorflow 同时被 2 个专栏收录

19 篇文章 1 订阅

订阅专栏

faster rcnn

5 篇文章 1 订阅

订阅专栏

接在tensorflow+faster rcnn代码理解（一）：构建vgg前端和RPN网络之后，对于每张输入图像（600×800）RPN会产生17100个anchor，构建RPN后会输出4个tensor，维度如下：

rpn_cls_prob：（1,38,50,18）
rpn_bbox_pred：（1,38,50,36）
rpn_cls_score：（1,38,50,18）
rpn_cls_score_reshape：（1,342,50,2）

先放出来总的结构图：

1.构建proposal（build_proposals函数）

    def build_proposals(self, is_training, rpn_cls_prob, rpn_bbox_pred, rpn_cls_score):
        if is_training:
            rois, roi_scores = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois")     #调用network.py中的_proposal_layer函数，根据anchor的概率（rpn_cls_prob）和位置（bbox_pred）选出rois
            rpn_labels = self._anchor_target_layer(rpn_cls_score, "anchor")                  #调用network.py中的_anchor_target_layer函数生成258个anchor用以训练RPN

            # Try to have a deterministic order for the computing graph, for reproducibility
            with tf.control_dependencies([rpn_labels]):
                rois, _ = self._proposal_target_layer(rois, roi_scores, "rpn_rois")        #调用network.py中的_proposal_target_layer，从rois中选择128个rois训练fastrcnn
        else:
            if cfg.FLAGS.test_mode == 'nms':
                rois, _ = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois")
            elif cfg.FLAGS.test_mode == 'top':
                rois, _ = self._proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, "rois")
            else:
                raise NotImplementedError
        return rois

在训练中要完成：

anchor_target_layer：从17100个anchor中选择256个训练RPN网络，即rpn_labels = self._anchor_target_layer(rpn_cls_score, "anchor")
proposal_layer：从17100个anchor中选择出选择12000个作为rois供给fast rcnn部分，即rois, roi_scores = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois")
proposal_target_layer：在完成（2）的基础上从2000个rois中选择出128个训练fast rcnn，即rois, _ = self._proposal_target_layer(rois, roi_scores, "rpn_rois")

在测试（test or inference）中只需要完成proposal_layer：从17100个anchor中选择出选择300（测试的时候是供给300个proposal）给fast rcnn部分。图中蓝线部分。

1.1 anchor_target_layer，训练RPN

代码：

def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, _feat_stride, all_anchors, num_anchors):
    """
    Same as the anchor target layer in original Fast/er RCNN 
    从17100个anchor中选出256个anchor训练rpn
    """
    A = num_anchors
    total_anchors = all_anchors.shape[0]
    K = total_anchors / num_anchors
    im_info = im_info[0]
    
    # allow boxes to sit over the edge by a small amount
    _allowed_border = 0

    # map of shape (..., H, W)
    height, width = rpn_cls_score.shape[1:3]   #rpn_cls_score =（1,38,50,18）

    # only keep anchors inside the image
    inds_inside = np.where(
        (all_anchors[:, 0] >= -_allowed_border) &
        (all_anchors[:, 1] >= -_allowed_border) &
        (all_anchors[:, 2] < im_info[1] + _allowed_border) &  # width
        (all_anchors[:, 3] < im_info[0] + _allowed_border)  # height
    )[0]

    # keep only inside anchors
    anchors = all_anchors[inds_inside, :]

    # label: 1 is positive, 0 is negative, -1 is dont care
    labels = np.empty((len(inds_inside),), dtype=np.float32)
    labels.fill(-1)

    # overlaps between the anchors and the gt boxes
    # overlaps (ex, gt)   overlap的行数代表的是anchor个数，列数代表的ground-truth的个数
    overlaps = bbox_overlaps(
        np.ascontiguousarray(anchors, dtype=np.float),
        np.ascontiguousarray(gt_boxes, dtype=np.float))
    argmax_overlaps = overlaps.argmax(axis=1)    #返回列号，表示每一个anchor与哪个gt有最大重叠 （axis=1表示返回每一行的最大值）
    max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]  #重叠的百分率
    gt_argmax_overlaps = overlaps.argmax(axis=0)  #返回与每一个gt重叠率最高的anchor的序号 （axis=0表示返回每一列的最大值）
    gt_max_overlaps = overlaps[gt_argmax_overlaps,    
                               np.arange(overlaps.shape[1])]  #最高的重叠率
    gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] #返回与每一个gt重叠率最高的anchor的序号  这时算上了相同最高重叠率的序号

    if not cfg.FLAGS.rpn_clobber_positives:
        # assign bg labels first so that positive labels can clobber them
        # first set the negatives
        labels[max_overlaps < cfg.FLAGS.rpn_negative_overlap] = 0

    # fg label: for each gt, anchor with highest overlap
    labels[gt_argmax_overlaps] = 1

    # fg label: above threshold IOU
    labels[max_overlaps >= cfg.FLAGS.rpn_positive_overlap] = 1

    if cfg.FLAGS.rpn_clobber_positives:
        # assign bg labels last so that negative labels can clobber positives
        labels[max_overlaps < cfg.FLAGS.rpn_negative_overlap] = 0

    # subsample positive labels if we have too many
    num_fg = int(cfg.FLAGS.rpn_fg_fraction * cfg.FLAGS.rpn_batchsize)
    fg_inds = np.where(labels == 1)[0]
    if len(fg_inds) > num_fg:
        disable_inds = npr.choice(
            fg_inds, size=(len(fg_inds) - num_fg), replace=False)
        labels[disable_inds] = -1

    # subsample negative labels if we have too many
    num_bg = cfg.FLAGS.rpn_batchsize - np.sum(labels == 1)
    bg_inds = np.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        disable_inds = npr.choice(
            bg_inds, size=(len(bg_inds) - num_bg), replace=False)
        labels[disable_inds] = -1

    bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) #计算box和gt的偏移量

    bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    # only the positive ones have regression targets
    bbox_inside_weights[labels == 1, :] = np.array(cfg.FLAGS2["bbox_inside_weights"])

    bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    if cfg.FLAGS.rpn_positive_weight < 0:
        # uniform weighting of examples (given non-uniform sampling)
        num_examples = np.sum(labels >= 0)
        positive_weights = np.ones((1, 4)) * 1.0 / num_examples
        negative_weights = np.ones((1, 4)) * 1.0 / num_examples
    else:
        assert ((cfg.FLAGS.rpn_positive_weight > 0) &
                (cfg.FLAGS.rpn_positive_weight < 1))
        positive_weights = (cfg.FLAGS.rpn_positive_weight /
                            np.sum(labels == 1))
        negative_weights = ((1.0 - cfg.FLAGS.rpn_positive_weight) /
                            np.sum(labels == 0))
    bbox_outside_weights[labels == 1, :] = positive_weights
    bbox_outside_weights[labels == 0, :] = negative_weights

    # map up to original set of anchors
    labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
    bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
    bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0)
    bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0)

    # labels
    labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2)
    labels = labels.reshape((1, 1, A * height, width))
    rpn_labels = labels

    # bbox_targets
    bbox_targets = bbox_targets \
        .reshape((1, height, width, A * 4))

    rpn_bbox_targets = bbox_targets
    # bbox_inside_weights
    bbox_inside_weights = bbox_inside_weights \
        .reshape((1, height, width, A * 4))

    rpn_bbox_inside_weights = bbox_inside_weights

    # bbox_outside_weights
    bbox_outside_weights = bbox_outside_weights \
        .reshape((1, height, width, A * 4))

    rpn_bbox_outside_weights = bbox_outside_weights
    return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights

主要的输入：rpn_cls_score = (1,38,50,18)（用于获取W，H）gt_boxes（3,4），all_anchors(17100,4)。

步骤：

1.1.1 筛选（就当做是预处理）

（1）对于生成的all_anchors,保留在image内部的，这些内部的anchor的序号为inds_inside。17100->9340

（2）初始label = （9340，），值为-1

1.1.2 重叠率的计算

（1）假设该图片的gt数量为3，则计算每一个anchor与gt的重叠率，得到overlaps，overlaps = （9340,3）

（2）计算每一个anchor与哪个gt有最大重叠，即argmax_overlaps。argmax_overlaps = （9340，）。argmax_overlaps的值为gt的序号：如0、1...

（3）得到上述的重叠率max_overlaps。max_overlaps = （9340，）

（4）返回与每一个gt重叠率最高的anchor的序号gt_argmax_overlaps，gt_argmax_overlaps = （3，）

（5）的到上述的重叠率gt_argmax_overlaps。gt_argmax_overlaps=（3，）

（6）因为（4）只选出了与每一个gt重叠率最高的其中一个anchor，还存在其他anchor也有相同的重叠率，返回这些anchor的序号，gt_argmax_overlaps。gt_argmax_overlaps = （171，）
1.1.3 labels的计算

（1）首先将与每一个gt重叠率最高的anchor设置为fg（这些anchor的序号为gt_argmax_overlaps ），labels = 1

（2）将重叠率（max_overlaps）大于0.5的anchor设置为fg，labels = 1

（3）规定的fg数量为256*0.5=128。
① 如果1、2步骤得到的fg>128，则采样出128个，则剩余的labels=-1
② 如果1、2步骤得到的fg<=128，则无需采样

（4）将重叠率（max_overlaps）小于0.3的anchor设置为bg，labels = 0

（5）规定的bg数量为256 - fg_num.
① 如果（4）步骤得到的bg> 256 - fg_num,，则进行采样，剩余的labels = -1
② 如果4步骤得到的bg< 256 - fg_num,，则无需采样
ps：一般情况fg的数量都会<=128，因为fg的要求重叠率>0.5，一般很难完全找满128个。而重叠率<0.3也就是标记为bg的数量远远会多，因此经常发生的情况是fg不进行下采样，而bg进行下采样，也就是256个anchor中，fg的数量一般会小于bg的数量。

1.1.4 计算偏移参数
（1）计算每一个anchor与其重叠率最大的gt之间的偏移参数（dx，dy，dw，dh），记为bbox_targets。bbox_targets = （9340,4）

（2）对于每一个acnhor生成bbox_inside_weights，bbox_inside_weights = （9340,4）但是对于fg，为1；bg为0；忽略为0

（3）对于每一个acnhor生成bbox_outside_weights，bbox_outside_weights = （9340,4），fg和bg的值都为1/256，忽略为0

1.1.5 map up to original set of anchors

因为上面的labels、bbox_targets、bbox_inside_weights，bbox_outside_weights行数都是9340，也就是都是在落在图片内的anchor的基础上编号的，需要将其回复到原始的17100下的编号，此步骤需要用到inds_inside。
（1）恢复labels，那些落在图像外的anchor的label为-1
（2）恢复bbox_targets，那些落在图像外的anchor的位置为0
（3）恢复bbox_inside_weights，那些落在图像外的anchor的位置为0
（4）恢复bbox_outside_weights，那些落在图像外的anchor的位置为0

1.1.6 变换成需要的形式
（1）label(17100,)reshape(1,38,50,9)transpose(1,9,38,50)reshape(1,1,342,50)，将其赋予给rpn_labels
（2）bbox_targets(17100,4)reshape(1,38,50,36),将其赋予给rpn_bbox_targets
（3）bbox_inside_weights(17100,4)reshape(1,38,50,36),将其赋予给rpn_bbox_inside_weights
（4）bbox_outside_weights(17100,4)reshape(1,38,50,36),将其赋予给rpn_bbox_outside_weights

anchor_target_layer的各个输出及维度为：

rpn_labels：(1,1,342,50)
rpn_bbox_targets：(1,38,50,36)
rpn_bbox_inside_weights：(1,38,50,36)
rpn_bbox_outside_weights：(1,38,50,36)

顺便生成训练RPN网络的标签信息：

self._anchor_targets['rpn_labels'] = rpn_labels
self._anchor_targets['rpn_bbox_targets'] = rpn_bbox_targets
self._anchor_targets['rpn_bbox_inside_weights'] = rpn_bbox_inside_weights
self._anchor_targets['rpn_bbox_outside_weights'] = rpn_bbox_outside_weights

1.2 proposal_layer，供给候选区

代码：

def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors):
    """A simplified version compared to fast/er RCNN
       For details please see the technical report
       对应于proposalCreator，也就是根据概率从17100个anchor中选择12000（6000）个anchor,
       再经过NMS生成2000（300）个anchor送给fast-rcnn
    """
    if type(cfg_key) == bytes:
        cfg_key = cfg_key.decode('utf-8')

    if cfg_key == "TRAIN":
        pre_nms_topN = cfg.FLAGS.rpn_train_pre_nms_top_n      #12000
        post_nms_topN = cfg.FLAGS.rpn_train_post_nms_top_n    #2000
        nms_thresh = cfg.FLAGS.rpn_train_nms_thresh           #NMS= 0.7
    else:
        pre_nms_topN = cfg.FLAGS.rpn_test_pre_nms_top_n       #6000
        post_nms_topN = cfg.FLAGS.rpn_test_post_nms_top_n     #300
        nms_thresh = cfg.FLAGS.rpn_test_nms_thresh            #NMS= 0.7

    im_info = im_info[0]
    # 得到scores和bounding boxes  rpn_cls_prob = (1,38,50,18)  其中第四维度前9位是背景的分数，后9位是前景的分数
    scores = rpn_cls_prob[:, :, :, num_anchors:] #取出前景的分数 scores = (1,38,50,9) 
    rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4)) #rpn_bbox_pred = （1,38,50,36）->(38*50*9 = 17100,4)
    scores = scores.reshape((-1, 1))  #scores = (17100,1)
    proposals = bbox_transform_inv(anchors, rpn_bbox_pred)  #经过修正后的anchors，即proposals
    proposals = clip_boxes(proposals, im_info[:2])
    
    # Pick the top region proposals  选出分数较高的12000个anchors
    order = scores.ravel().argsort()[::-1]
    if pre_nms_topN > 0:
        order = order[:pre_nms_topN]
    proposals = proposals[order, :]
    scores = scores[order]
   
    # Non-maximal suppression  非极大值抑制
    keep = nms(np.hstack((proposals, scores)), nms_thresh)

    # Pick th top region proposals after NMS
    if post_nms_topN > 0:
        keep = keep[:post_nms_topN]
    proposals = proposals[keep, :]
    scores = scores[keep]
    
    # Only support single image as input
    batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
    blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))

    return blob, scores

主要的输入：rpn_cls_prob(1,38,50,18)、rpn_bbox_pred（1,38,50,36）、anchors(17100,4)

步骤：

（1）rpn_cls_prob中第四维度，前9位是背景的概率，后9位是前景的概率，所以首先要取出前景的概率，即scores = (1,38,50,9) ，之后reshape成(1×38×50×9,1)即（17100,1）

（2）将rpn_bbox_pred = （1,38,50,36） reshape成为（1×38×50×9,4），即rpn_bbox_pred=（17100,4）
（3）根据产生anchors和rpn_bbox_pred，对anchor进行修正，得到proposals=(17100,4)
（4）对scores进行降序排列
① 首先选出12000个概率最高的，此时proposals = (12000,4),scores =(12000,1)
② 利用proposals 和scores进行非极大值抑制，结果为proposals = (1214,4),scores =(1214,1)
（5）最后返回rois，rois在proposals 的基础上多了一列，为第一列，默认全为0，rois = (1214,5)
rois_scores = scores =(1214,1)

最终proposal_layer的输出及维度为：

rois： (1214,5)
rois_scores：(1214,1)

1.3 proposal_target_layer，提供rois训练fastrcnn

代码

def proposal_target_layer(rpn_rois, rpn_scores, gt_boxes, _num_classes):
    """
    Assign object detection proposals to ground-truth targets. Produces proposal
    classification labels and bounding-box regression targets.
    """
    #pdb.set_trace()
    # Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
    # (i.e., rpn.proposal_layer.ProposalLayer), or any other source
    all_rois = rpn_rois
    all_scores = rpn_scores

    # Include ground-truth boxes in the set of candidate rois
    if cfg.FLAGS.proposal_use_gt:
        zeros = np.zeros((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype)
        all_rois = np.vstack(
            (all_rois, np.hstack((zeros, gt_boxes[:, :-1])))
        )
        # not sure if it a wise appending, but anyway i am not using it
        all_scores = np.vstack((all_scores, zeros))

    num_images = 1
    rois_per_image = cfg.FLAGS.batch_size / num_images
    fg_rois_per_image = np.round(cfg.FLAGS.proposal_fg_fraction * rois_per_image) #每张图设置的fg数量

    # Sample rois with classification labels and bounding box regression
    # targets
    labels, rois, roi_scores, bbox_targets, bbox_inside_weights = _sample_rois(
        all_rois, all_scores, gt_boxes, fg_rois_per_image,
        rois_per_image, _num_classes)

    rois = rois.reshape(-1, 5)
    roi_scores = roi_scores.reshape(-1)
    labels = labels.reshape(-1, 1)
    bbox_targets = bbox_targets.reshape(-1, _num_classes * 4)
    bbox_inside_weights = bbox_inside_weights.reshape(-1, _num_classes * 4)
    bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32)

    return rois, roi_scores, labels, bbox_targets, bbox_inside_weights, bbox_outside_weights

def _sample_rois(all_rois, all_scores, gt_boxes, fg_rois_per_image, rois_per_image, num_classes):
    """Generate a random sample of RoIs comprising foreground and background
    examples.
    """
    # overlaps: (rois x gt_boxes)
    pdb.set_trace()
    overlaps = bbox_overlaps(
        np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
        np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
    gt_assignment = overlaps.argmax(axis=1)  #返回每一个anchor与第几个gt的重叠率最大
    max_overlaps = overlaps.max(axis=1)  #返回重叠率的数值
    labels = gt_boxes[gt_assignment, 4]
    
    # Select foreground RoIs as those with >= FG_THRESH overlap
    fg_inds = np.where(max_overlaps >= cfg.FLAGS.roi_fg_threshold)[0]
    # Guard against the case when an image has fewer than fg_rois_per_image
    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
    bg_inds = np.where((max_overlaps < cfg.FLAGS.roi_bg_threshold_high) &
                       (max_overlaps >= cfg.FLAGS.roi_bg_threshold_low))[0]

    # Small modification to the original version where we ensure a fixed number of regions are sampled
    if fg_inds.size > 0 and bg_inds.size > 0:
        fg_rois_per_image = min(fg_rois_per_image, fg_inds.size)
        fg_inds = npr.choice(fg_inds, size=int(fg_rois_per_image), replace=False)
        bg_rois_per_image = rois_per_image - fg_rois_per_image
        to_replace = bg_inds.size < bg_rois_per_image
        bg_inds = npr.choice(bg_inds, size=int(bg_rois_per_image), replace=to_replace)
    elif fg_inds.size > 0:
        to_replace = fg_inds.size < rois_per_image
        fg_inds = npr.choice(fg_inds, size=int(rois_per_image), replace=to_replace)
        fg_rois_per_image = rois_per_image
    elif bg_inds.size > 0:
        to_replace = bg_inds.size < rois_per_image
        bg_inds = npr.choice(bg_inds, size=int(rois_per_image), replace=to_replace)
        fg_rois_per_image = 0
    else:
        #pdb.set_trace()
        bg_inds = np.where((max_overlaps < cfg.FLAGS.roi_bg_threshold_high) &
                       (max_overlaps >= 0.01))[0]
        to_replace = bg_inds.size < rois_per_image
        bg_inds = npr.choice(bg_inds, size=int(rois_per_image), replace=to_replace)
        fg_rois_per_image = 0
        if bg_inds.size ==0:
            pdb.set_trace()
   
    
    # The indices that we're selecting (both fg and bg)
    keep_inds = np.append(fg_inds, bg_inds)
    # Select sampled values from various arrays:
    labels = labels[keep_inds]
    # Clamp labels for the background RoIs to 0
    labels[int(fg_rois_per_image):] = 0
    rois = all_rois[keep_inds]
    roi_scores = all_scores[keep_inds]

    bbox_target_data = _compute_targets(
        rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels)

    bbox_targets, bbox_inside_weights = \
        _get_bbox_regression_labels(bbox_target_data, num_classes)

    return labels, rois, roi_scores, bbox_targets, bbox_inside_weights

输入：proposal_layer 输出的rois(1214,5)和rois_scores(1214,1)，以及gt_boxes(3,5)最后一列为标签

步骤：

1.3.1 准备

计算正负样本的数量。正样本 = batch_size×proposal_fg_fraction = 128×0.25 = 32，负样本 = 128-32 = 96
1.3.2 进行样本的采样过程_sample_rois
（1）计算重叠率和标签
① 计算rois与gt的重叠率overlaps，overlaps = （1214,3）
② 返回每一个anchor与第几个gt的重叠率最大，gt_assignment = （1214，）。gt_assignment的值为gt的序号：如0、1...
③ 返回上述的重叠率，即max_overlaps = （1214，）
④ 生成labels，max_overlaps = （1214，），其值是对应的gt的label，也就是为每一个anchor打上了标签
（2）从anchor中选择出正负样本
① 正样本的数量为fg_inds = overlaps>=0.5，负样本的数量为bg_inds = 0.5>overlaps>0.1
② 根据正负样本的数量进行抽样
if 正样本数量>32,从中抽样出32个，剩余96个为负样本
if 正样本数量<=32，则全部保留，此时负样本抽样出96个。
最终得到的fg_inds + bg_inds = 128.这里假设fg_inds=14，则bg_inds = 114
③ 根据fg_inds、bg_inds，得到最终样本。其中labels = (128,)(负样本的label置为0)，rois = (128,5)（第一列为0），rois_scores = (128,)
（3）计算bbox_target_data和bbox_target
① 计算rois和gt的bbox_target_data。bbox_target_data = （128,5）第一列为label
② 根据训练的类别数K和bbox_target_data，计算bbox_target=(128,4K)和bbox_inside_weights=(128,4K)
这里假定K=3（包括背景）,bbox_target = (128，3×4) = (128,12),相当于前4列为背景，中间4列为第1个类别，最后4列为第2个类别。而其中只有对应类的bbox_targets才为非0,同理只有对应类的bbox_inside_weights为1。
proposal_target_layer的输出及维度为：

rois = (128,5)（第一列为0）
rois_scores =(128,)
labels = (128,1)
bbox_target=(128,12)
bbox_inside_weights = (128,12)
bbox_outside_weights = (128,12)

生成训练分类和回归网络的RoI以及对应的标签信息：

self._proposal_targets['rois'] = rois
self._proposal_targets['labels'] = tf.to_int32(labels, name="to_int32")
self._proposal_targets['bbox_targets'] = bbox_targets
self._proposal_targets['bbox_inside_weights'] = bbox_inside_weights
self._proposal_targets['bbox_outside_weights'] = bbox_outside_weights

2.构建fastrcnn部分以及进行预测（build_predictions函数）

def build_predictions(self, net, rois, is_training, initializer, initializer_bbox):

        # Crop image ROIs
        pool5 = self._crop_pool_layer(net, rois, "pool5")
        pool5_flat = slim.flatten(pool5, scope='flatten')

        # Fully connected layers
        fc6 = slim.fully_connected(pool5_flat, 4096, scope='fc6')
        if is_training:
            fc6 = slim.dropout(fc6, keep_prob=0.5, is_training=True, scope='dropout6')

        fc7 = slim.fully_connected(fc6, 4096, scope='fc7')
        if is_training:
            fc7 = slim.dropout(fc7, keep_prob=0.5, is_training=True, scope='dropout7')

        # Scores and predictions
        cls_score = slim.fully_connected(fc7, self._num_classes, weights_initializer=initializer, trainable=is_training, activation_fn=None, scope='cls_score')
        cls_prob = self._softmax_layer(cls_score, "cls_prob")
        bbox_prediction = slim.fully_connected(fc7, self._num_classes * 4, weights_initializer=initializer_bbox, trainable=is_training, activation_fn=None, scope='bbox_pred')

        return cls_score, cls_prob, bbox_prediction

最后返回：

cls_score：(128,12)
cls_prob：（128,3）
bbox_prediction：（128,3）

3.小结

最后基于本篇博客和上篇博客给出基于vgg的faster rcnn构建的总体过程代码，

def build_network(self, sess, is_training=True):
        with tf.variable_scope('vgg_16', 'vgg_16'):

            # select initializer
            if cfg.FLAGS.initializer == "truncated":
                initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01)
                initializer_bbox = tf.truncated_normal_initializer(mean=0.0, stddev=0.001)
            else:
                initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01)
                initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001)

            # Build head
            #pdb.set_trace()
            net = self.build_head(is_training)

            # Build rpn  创建rpn网络；该函数中有anchor的设置，修改可以改变anchor
            #返回RPN二分类（前景、背景）的分数（cls_score），概率（cls_prob），以及位置修正参数（bbox_pred）
            rpn_cls_prob, rpn_bbox_pred, rpn_cls_score, rpn_cls_score_reshape = self.build_rpn(net, is_training, initializer)

            # Build proposals  
            #如果是训练，则从20000个anchors中选出258个训练RPN，并选择 20000->12000->128个rois训练fastrcnn，最终返回的128个rois
            #如果是测试，则选择 20000->6000->300个rois进行检测，最终返回300个rois
            rois = self.build_proposals(is_training, rpn_cls_prob, rpn_bbox_pred, rpn_cls_score)

            # Build predictions
            cls_score, cls_prob, bbox_pred = self.build_predictions(net, rois, is_training, initializer, initializer_bbox)

            self._predictions["rpn_cls_score"] = rpn_cls_score
            self._predictions["rpn_cls_score_reshape"] = rpn_cls_score_reshape
            self._predictions["rpn_cls_prob"] = rpn_cls_prob
            self._predictions["rpn_bbox_pred"] = rpn_bbox_pred
            self._predictions["cls_score"] = cls_score
            self._predictions["cls_prob"] = cls_prob
            self._predictions["bbox_pred"] = bbox_pred
            self._predictions["rois"] = rois

            self._score_summaries.update(self._predictions)

            return rois, cls_prob, bbox_pred