Faster R-CNN 代码解析

Faster R-CNN代码分析

build_proposals层:

该层是最难理解的,所有Faster R-CNN的核心全部都在这个函数里面。

    def build_proposals(self, is_training, rpn_cls_prob, rpn_bbox_pred, rpn_cls_score):

        if is_training:
            rois, roi_scores = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois") #针对于RPN层的输出,经过了NMS后选出来了以大概2000个框,及对应的分数
            #rpn_cls_prob: RPN层输出的objectness的值
            #rpn_bbox_pred: RPN层输出的box的取值,即:tx, ty, tw, th
            print(rois.shape)
            rpn_labels = self._anchor_target_layer(rpn_cls_score, "anchor")  #为每一个anchor中打上标签,选出来的anchor打上1和0,没有选上的打上-1,同时定义了边框回归的目标参数

            # Try to have a deterministic order for the computing graph, for reproducibility
            with tf.control_dependencies([rpn_labels]):
                rois, _ = self._proposal_target_layer(rois, roi_scores, "rpn_rois")
        else:
            if cfg.FLAGS.test_mode == 'nms':
                rois, _ = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois")
            elif cfg.FLAGS.test_mode == 'top':
                rois, _ = self._proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, "rois")
            else:
                raise NotImplementedError
        return rois

可以看到该函数主要分为了三个子函数,分别是proposal_layer,anchor_target_layer,proposal_target_layer.那么下面分别讲一下三个layer分别做了哪些事情。这里一定要注意一个细节,就是proposal_target_layer的传入参数是proposal_layer的返回值,言外之意它们两个是有联系的,而anchor_target_layer是没有用到proposal_layer的返回值的,言外之意两者之间没有任何联系。

proposal_layer:

def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors):
    """A simplified version compared to fast/er RCNN
       For details please see the technical report
    """
    if type(cfg_key) == bytes:
        cfg_key = cfg_key.decode('utf-8')

    if cfg_key == "TRAIN":
        pre_nms_topN = cfg.FLAGS.rpn_train_pre_nms_top_n  #12000
        post_nms_topN = cfg.FLAGS.rpn_train_post_nms_top_n #2000
        nms_thresh = cfg.FLAGS.rpn_train_nms_thresh #0.7
    else:
        pre_nms_topN = cfg.FLAGS.rpn_test_pre_nms_top_n  #6000
        post_nms_topN = cfg.FLAGS.rpn_test_post_nms_top_n #300
        nms_thresh = cfg.FLAGS.rpn_test_nms_thresh #0.7

    im_info = im_info[0]
    # Get the scores and bounding boxes
    scores = rpn_cls_prob[:, :, :, num_anchors:]
    rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4))
    scores = scores.reshape((-1, 1))
    proposals = bbox_transform_inv(anchors, rpn_bbox_pred)  #得出每一个原始anchor,在predict后的真实坐标。

    proposals = clip_boxes(proposals, im_info[:2])  #注意!在这里将超出图像边界的proposal进行边界裁剪,使之在图像边界之内,注意啊。这里不是对超出图像边界的proposal进行剔除!只是裁剪不会改变anchor个数的!

    # Pick the top region proposals
    ##对框按照前景分数进行排序,order中指示了框的索引
    order = scores.ravel().argsort()[::-1]
    if pre_nms_topN > 0:
        order = order[:pre_nms_topN]  #在还没有进行真正的nms操作前,先取得分数前12000个候选款,注意order是索引
    proposals = proposals[order, :] #这才是真正的取出满足条件的proposals
    scores = scores[order] #这里取出这些满足条件的分数

    # Non-maximal suppression
    keep = nms(np.hstack((proposals, scores)), nms_thresh) #这里进行NMS操作

    # Pick th top region proposals after NMS
    if post_nms_topN > 0:
        keep = keep[:post_nms_topN] #我这里只要前2000个,也就是说咱们通过这一步,从大量锚点中选择了最合适的2000个
    proposals = proposals[keep, :]
    scores = scores[keep]

    # Only support single image as input
    batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)  #这里是加一个batch的索引,因为batch都是1,所以这里的索引就是0了
    blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
   #print(blob.shape)   (1500-2000,5)
   # print(scores.shape) (1500-2000,1)
    return blob, scores #那么这里返回的就是那2000个框,以及他们的一个scores

anchor_target_layer

def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, _feat_stride, all_anchors, num_anchors):
    """Same as the anchor target layer in original Fast/er RCNN """
    A = num_anchors #9
    total_anchors = all_anchors.shape[0]
    #print(total_anchors)  38*38*9
    K = total_anchors / num_anchors
    im_info = im_info[0]

    # allow boxes to sit over the edge by a small amount
    _allowed_border = 0 #允许框是紧贴图像边缘的

    # map of shape (..., H, W)
    height, width = rpn_cls_score.shape[1:3]

    # only keep anchors inside the image
    # 过滤掉不在图像范围内的Boxes,首先用where函数加条件筛选出索引,注意这里是过滤!也就是剔除!
    inds_inside = np.where(
        (all_anchors[:, 0] >= -_allowed_border) &
        (all_anchors[:, 1] >= -_allowed_border) &
        (all_anchors[:, 2] < im_info[1] + _allowed_border) &  # width
        (all_anchors[:, 3] < im_info[0] + _allowed_border)  # height
    )[0]
    # keep only inside anchors
    anchors = all_anchors[inds_inside, :]
    #print(anchors.shape)

    # label: 1 is positive, 0 is negative, -1 is dont care
    labels = np.empty((len(inds_inside),), dtype=np.float32)
    labels.fill(-1)

    # overlaps between the anchors and the gt boxes
    # overlaps (ex, gt)
    overlaps = bbox_overlaps(
        np.ascontiguousarray(anchors, dtype=np.float),
        np.ascontiguousarray(gt_boxes, dtype=np.float))
    # 得到一个array,shape=(3938, K),N=3938,(N,K),N表示候选框个数,K表示真实框个数,相当于一个表格,值为候选框与真实框的iou
    #print(overlaps.shape) (3938,1)
    argmax_overlaps = overlaps.argmax(axis=1) #按行比较 取得的是每一个被筛选出来的anchor最和哪个gt拥有最大overlaps
    #print(argmax_overlaps)
    max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]  #拿到了overlaps的值,每一个anchor和哪个gt拥有最大比例的
    gt_argmax_overlaps = overlaps.argmax(axis=0) #按列比较  哪一个anchor和gt拥有最大比例
    gt_max_overlaps = overlaps[gt_argmax_overlaps,
                               np.arange(overlaps.shape[1])]
    gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
    if not cfg.FLAGS.rpn_clobber_positives: #rpn_clobber_positives:False
        # assign bg labels first so that positive labels can clobber them
        # first set the negatives
        labels[max_overlaps < cfg.FLAGS.rpn_negative_overlap] = 0
    #对于每一个gt,重叠率最大的那个anchor为fg
    # fg label: for each gt, anchor with highest overlap
    labels[gt_argmax_overlaps] = 1

    # fg label: above threshold IOU
    labels[max_overlaps >= cfg.FLAGS.rpn_positive_overlap] = 1

    if cfg.FLAGS.rpn_clobber_positives:
        # assign bg labels last so that negative labels can clobber positives
        labels[max_overlaps < cfg.FLAGS.rpn_negative_overlap] = 0

    # subsample positive labels if we have too many
    #随机的从正样本中抽取128个
    num_fg = int(cfg.FLAGS.rpn_fg_fraction * cfg.FLAGS.rpn_batchsize) #128
    fg_inds = np.where(labels == 1)[0]
    if len(fg_inds) > num_fg:#如果超过数量,调用npr.choice()随机采样
        disable_inds = npr.choice(
            fg_inds, size=(len(fg_inds) - num_fg), replace=False)  #随机抽取 False代表不能取重复元素
        labels[disable_inds] = -1#采样之后标记为-1,计算时忽略

    # subsample negative labels if we have too many
    num_bg = cfg.FLAGS.rpn_batchsize - np.sum(labels == 1)
    bg_inds = np.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        disable_inds = npr.choice(
            bg_inds, size=(len(bg_inds) - num_bg), replace=False)
        labels[disable_inds] = -1
    #print(gt_boxes.shape) (1:5)
    #print(gt_boxes[argmax_overlaps, :].shape)  (3938,5)
    #print(gt_boxes[argmax_overlaps, :])
    #print(argmax_overlaps.shape) (3938,)
    #print(type(argmax_overlaps)) #ndarray

    bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])  #anchors为筛选过后的锚,gt_rois筛选过后的真实框 算回归损失的dx,dh,dw,dy

    bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    # only the positive ones have regression targets
    bbox_inside_weights[labels == 1, :] = np.array(cfg.FLAGS2["bbox_inside_weights"])

    bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    if cfg.FLAGS.rpn_positive_weight < 0:
        # uniform weighting of examples (given non-uniform sampling)
        num_examples = np.sum(labels >= 0)
        positive_weights = np.ones((1, 4)) * 1.0 / num_examples
        negative_weights = np.ones((1, 4)) * 1.0 / num_examples
    else:
        assert ((cfg.FLAGS.rpn_positive_weight > 0) &
                (cfg.FLAGS.rpn_positive_weight < 1))
        positive_weights = (cfg.FLAGS.rpn_positive_weight /
                            np.sum(labels == 1))
        negative_weights = ((1.0 - cfg.FLAGS.rpn_positive_weight) /
                            np.sum(labels == 0))
    bbox_outside_weights[labels == 1, :] = positive_weights
    bbox_outside_weights[labels == 0, :] = negative_weights

    # map up to original set of anchors
    labels = _unmap(labels, total_anchors, inds_inside, fill=-1)  #前面对为total_anchors的回归目标,labels以及权重进行了赋值,还有一些我们没有进行赋值,这些其实都是没用的。所以通过unmap函数将其补全,比如labels补为-1.其它的类似
    bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
    bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0)
    bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0)

    # labels
    labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2)
    labels = labels.reshape((1, 1, A * height, width))
    rpn_labels = labels

    # bbox_targets
    bbox_targets = bbox_targets \
        .reshape((1, height, width, A * 4))

    rpn_bbox_targets = bbox_targets
    # bbox_inside_weights
    bbox_inside_weights = bbox_inside_weights \
        .reshape((1, height, width, A * 4))

    rpn_bbox_inside_weights = bbox_inside_weights

    # bbox_outside_weights
    bbox_outside_weights = bbox_outside_weights \
        .reshape((1, height, width, A * 4))

    rpn_bbox_outside_weights = bbox_outside_weights
    return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights

注意这里的函数跟前面proposal_layer没有任何关系,这里要做的是为每一个anchor打上rpn标签,属于前景还是背景。同时定义了一些rpn的目标回归参数。这个rpn网络训练只选取了128个正样本,128个负样本这些都是去训练RPN网络的。这是独立的!在测试阶段是没有这一步的!

proposal_target_layer()

def proposal_target_layer(rpn_rois, rpn_scores, gt_boxes, _num_classes):
    """
    Assign object detection proposals to ground-truth targets. Produces proposal
    classification labels and bounding-box regression targets.
    """

    # Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
    # (i.e., rpn.proposal_layer.ProposalLayer), or any other source
    all_rois = rpn_rois
    all_scores = rpn_scores

    # Include ground-truth boxes in the set of candidate rois
    if cfg.FLAGS.proposal_use_gt:
        zeros = np.zeros((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype)
        all_rois = np.vstack(
            (all_rois, np.hstack((zeros, gt_boxes[:, :-1])))
        )
        # not sure if it a wise appending, but anyway i am not using it
        all_scores = np.vstack((all_scores, zeros))

    # TRAIN.BATCH_SIZE是感兴趣区域的数量
    # rois_per_image就是每一张图片允许的roi区域batch。
    # 在其他地方也遇到了rois_per_image,名字不一样,其实就是一个限制参数。
    num_images = 1
    rois_per_image = cfg.FLAGS.batch_size / num_images   #256/1
    fg_rois_per_image = np.round(cfg.FLAGS.proposal_fg_fraction * rois_per_image) #64

    # Sample rois with classification labels and bounding box regression
    # targets
    #_sample_rois函数,对每张图片的Batch按照参数设置随机采样
    labels, rois, roi_scores, bbox_targets, bbox_inside_weights = _sample_rois(
        all_rois, all_scores, gt_boxes, fg_rois_per_image,
        rois_per_image, _num_classes)

    rois = rois.reshape(-1, 5)
    roi_scores = roi_scores.reshape(-1)
    labels = labels.reshape(-1, 1)
    bbox_targets = bbox_targets.reshape(-1, _num_classes * 4)
    bbox_inside_weights = bbox_inside_weights.reshape(-1, _num_classes * 4)
    bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32)

    return rois, roi_scores, labels, bbox_targets, bbox_inside_weights, bbox_outside_weights

这里还有一个很重要的函数。_sample_rois

 

def _sample_rois(all_rois, all_scores, gt_boxes, fg_rois_per_image, rois_per_image, num_classes):
    """Generate a random sample of RoIs comprising foreground and background
    examples.
    """
    # overlaps: (rois x gt_boxes)
    overlaps = bbox_overlaps(
        np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
        np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
    gt_assignment = overlaps.argmax(axis=1) #按行比较,对于每个roi,找到跟其重叠最大的gt索引
    max_overlaps = overlaps.max(axis=1) #对于每个roi,找到与gt_box重合的最大的overlap
    labels = gt_boxes[gt_assignment, 4]  #对于每个roi,找到归属的类别

    # Select foreground RoIs as those with >= FG_THRESH overlap
    fg_inds = np.where(max_overlaps >= cfg.FLAGS.roi_fg_threshold)[0] #cfg.FLAGS.roi_fg_threshold 0.5
    # Guard against the case when an image has fewer than fg_rois_per_image
    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
    bg_inds = np.where((max_overlaps < cfg.FLAGS.roi_bg_threshold_high) &
                       (max_overlaps >= cfg.FLAGS.roi_bg_threshold_low))[0] #0-0.5之间

    # Small modification to the original version where we ensure a fixed number of regions are sampled
    if fg_inds.size > 0 and bg_inds.size > 0:
        fg_rois_per_image = min(fg_rois_per_image, fg_inds.size)
        fg_inds = npr.choice(fg_inds, size=int(fg_rois_per_image), replace=False)
        bg_rois_per_image = rois_per_image - fg_rois_per_image
        to_replace = bg_inds.size < bg_rois_per_image
        bg_inds = npr.choice(bg_inds, size=int(bg_rois_per_image), replace=to_replace)
    elif fg_inds.size > 0:
        to_replace = fg_inds.size < rois_per_image
        fg_inds = npr.choice(fg_inds, size=int(rois_per_image), replace=to_replace)
        fg_rois_per_image = rois_per_image
    elif bg_inds.size > 0:
        to_replace = bg_inds.size < rois_per_image
        bg_inds = npr.choice(bg_inds, size=int(rois_per_image), replace=to_replace)
        fg_rois_per_image = 0
    else:
        raise Exception()

    # The indices that we're selecting (both fg and bg)
    keep_inds = np.append(fg_inds, bg_inds)
    # Select sampled values from various arrays:
    labels = labels[keep_inds]
    # Clamp labels for the background RoIs to 0
    labels[int(fg_rois_per_image):] = 0
    rois = all_rois[keep_inds]
    roi_scores = all_scores[keep_inds]

    bbox_target_data = _compute_targets(
        rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels)

    bbox_targets, bbox_inside_weights = \
        _get_bbox_regression_labels(bbox_target_data, num_classes)

    return labels, rois, roi_scores, bbox_targets, bbox_inside_weights

整个函数,其实就做了几件事为Proposal_layer选出来的那些ROIs打上真实类别标签,而且选1/4的正样本(64个),负样本为(256-64=192个)去训练Fast R-CNN。同时也是定义了Fast R-CNN的一些回归参数等。注意两个256的区别!anchor_target_layer中的256是128个正样本,128个负样本,标签为前背景标签,这是去训练RPN网络的,Proposal_target_layer中的256是选出64个正样本,192个负样本。标签为真实类别的标签。这是去训练Fast R-CNN网络的!那么看到这个源码就可以理解Faster R-CNN中的整个训练过程,

Faster R-CNN = RPN +Fast R-CNN的,RPN和Fast R-CNN是两个独立的网络,类似于GAN网络那样,生成器和判别器也是两个独立的网络。那么Faster R-CNN整个训练过程应该如下所示。

  1. 在预训练的model上,训练RPN网络
  2. 利用训练好的RPN
  3. 第一次训练Fast-RCNN网络
  4. 第二次训练RPN网络
  5. 再次利用步骤4,训练好的RPN网络搜集proposals
  6. 第二次训练Fast-RCNN网络

 

  • 2
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 2
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值