Faster-RCNN_TF代码解读9:proposal_target_layer_tf.py

最新推荐文章于 2022-01-11 20:45:44 发布
一呆飞仙
最新推荐文章于 2022-01-11 20:45:44 发布
阅读量4.8k
点赞数 3
分类专栏： Faster-RCNN_TF 文章标签： python faster-r-cnn tensorflow 人工智能-神经网络算法
本文链接：https://blog.csdn.net/l297969586/article/details/78020635
版权
Faster-RCNN_TF 专栏收录该内容
20 篇文章 6 订阅
订阅专栏
# --------------------------------------------------------
# Faster R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick and Sean Bell
# --------------------------------------------------------

import yaml
import numpy as np
import numpy.random as npr
from fast_rcnn.config import cfg
from fast_rcnn.bbox_transform import bbox_transform
from utils.cython_bbox import bbox_overlaps
import pdb

DEBUG = False
#传入的数据为
# rpn_rois：blob，内容为[proposal引索(全零)，proposal]
#gt_boxes：gtound-truth
#_num_classes：类别总数，21
#函数作用：
# 产生筛选后的roi，对应labels，三个（len(rois),4*21）大小的矩阵，其中一个对fg-roi对应引索行的对应类别的4个位置填上（dx,dy,dw,dh），另两个对fg-roi对应引索行的对应类别的4个位置填上（1,1,1,1）
def proposal_target_layer(rpn_rois, gt_boxes,_num_classes):
    """
    Assign object detection proposals to ground-truth targets. Produces proposal
    classification labels and bounding-box regression targets.
    """
    # Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
    # (i.e., rpn.proposal_layer.ProposalLayer), or any other source
    all_rois = rpn_rois
    # TODO(rbg): it's annoying that sometimes I have extra info before
    # and other times after box coordinates -- normalize to one format

    # Include ground-truth boxes in the set of candidate rois
    #建立一个（gt_boxes.shape[0]+proposals.shape[0],5）的array，proposals信息在上GT信息在下面，存入all_rois，GT引索全初始化为0
    zeros = np.zeros((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype)
    #由于gt_boxes是有5列信息的（x1,y1,x2,y2,标签），此时只取前4个（gt_boxes[:, :-1]）即位置信息，存入all_rois
    all_rois = np.vstack(
        (all_rois, np.hstack((zeros, gt_boxes[:, :-1])))
    )

    # Sanity check: single batch only
    #all_rois的第一列全为0
    assert np.all(all_rois[:, 0] == 0), \
            'Only single item batches are supported'

    num_images = 1
    #cfg.TRAIN.BATCH_SIZE为128
    #设定每张图片上roi个数128
    rois_per_image = cfg.TRAIN.BATCH_SIZE / num_images
    #cfg.TRAIN.FG_FRACTION=0.25
    #设定每张图片上前景roi个数128/4=32
    fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)

    # Sample rois with classification labels and bounding box regression
    # targets
    #产生这几个参数
    labels, rois, bbox_targets, bbox_inside_weights = _sample_rois(
        all_rois, gt_boxes, fg_rois_per_image,
        rois_per_image, _num_classes)

    if DEBUG:
        print 'num fg: {}'.format((labels > 0).sum())
        print 'num bg: {}'.format((labels == 0).sum())
        _count += 1
        _fg_num += (labels > 0).sum()
        _bg_num += (labels == 0).sum()
        print 'num fg avg: {}'.format(_fg_num / _count)
        print 'num bg avg: {}'.format(_bg_num / _count)
        print 'ratio: {:.3f}'.format(float(_fg_num) / float(_bg_num))
    #除了labels从一维变成二维，其余的本身就是这个shape
    rois = rois.reshape(-1,5)
    labels = labels.reshape(-1,1)
    bbox_targets = bbox_targets.reshape(-1,_num_classes*4)
    bbox_inside_weights = bbox_inside_weights.reshape(-1,_num_classes*4)
    #就是在对应位置>0的置1,其实跟bbox_inside_weights是一样的
    bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32)

    return rois,labels,bbox_targets,bbox_inside_weights,bbox_outside_weights
#函数作用，产生两个（len(rois),4*21）大小的矩阵，其中一个对fg-roi对应引索行的对应类别的4个位置填上（dx,dy,dw,dh），另一个对fg-roi对应引索行的对应类别的4个位置填上（1,1,1,1）
def _get_bbox_regression_labels(bbox_target_data, num_classes):
    """Bounding-box regression targets (bbox_target_data) are stored in a
    compact form N x (class, tx, ty, tw, th)

    This function expands those targets into the 4-of-4*K representation used
    by the network (i.e. only one class has non-zero targets).

    Returns:
        bbox_target (ndarray): N x 4K blob of regression targets
        bbox_inside_weights (ndarray): N x 4K blob of loss weights
    """
    #取标签
    clss = np.array(bbox_target_data[:, 0], dtype=np.uint16, copy=True)
    #生成一个全零矩阵，大小（len(rois),4*21）
    bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32)
    #生成一个全零矩阵，大小同样为（len(rois),4*21）
    bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
    #取出fg-roi的index，np.where返回的是一个tuple，tuple里存的是array，所以用[0]来去掉tuple外套
    inds = np.where(clss > 0)[0]
    for ind in inds:
        cls = clss[ind]
        start = 4 * cls
        end = start + 4
        #对fg-roi对应引索行的对应类别的4个位置填上（dx,dy,dw,dh）
        bbox_targets[ind, start:end] = bbox_target_data[ind, 1:]
        #对fg-roi对应引索行的对应类别的4个位置填上（1,1,1,1）
        bbox_inside_weights[ind, start:end] = cfg.TRAIN.BBOX_INSIDE_WEIGHTS
    return bbox_targets, bbox_inside_weights

#函数作用，返回[标签，dx,dy,dw,dh]
def _compute_targets(ex_rois, gt_rois, labels):
    """Compute bounding-box regression targets for an image."""

    assert ex_rois.shape[0] == gt_rois.shape[0]
    assert ex_rois.shape[1] == 4
    assert gt_rois.shape[1] == 4
    #返回anchor相对于GT的（dx,dy,dw,dh）四个回归值，shape（len（rois），4）
    targets = bbox_transform(ex_rois, gt_rois)
    #cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED为False
    if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
        # Optionally normalize targets by a precomputed mean and stdev
        targets = ((targets - np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS))
                / np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS))
    #注意，labels传进来是（len（rois），）大小的，labels[:, np.newaxis]将转换成（len（rois），1）大小，之后与targets合并成（len（rois），5）大小
    #内容信息为：[标签，dx,dy,dw,dh]
    return np.hstack(
            (labels[:, np.newaxis], targets)).astype(np.float32, copy=False)
#函数作用：对rois进一步筛选，产生对应labels，生成bbox_targets, bbox_inside_weights两个（len(rois),4*21）矩阵
#内容：bbox_targets（对fg-roi对应引索行的对应类别的4个位置填上（dx,dy,dw,dh））
#bbox_inside_weights（对fg-roi对应引索行的对应类别的4个位置填上（1,1,1,1））
def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes):
    """Generate a random sample of RoIs comprising foreground and background
    examples.
    """
    #生成包含前景和背景的RoI的随机样本
    # overlaps: (rois x gt_boxes)
    #bbox_overlaps返回一个N*K的array，N为roi的个数，K为GT个数
    #对应元素（n，k）存的是第n个roi与第k个GT的：重叠面积/（roi面积+GT面积-重叠面积）
    overlaps = bbox_overlaps(
        np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
        np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
    #以横向相比较，取最大值引索，对比结果为每一个ROI找到与其重叠最好的GT，shape（len（all_rois）,）
    gt_assignment = overlaps.argmax(axis=1)
    #以横向相比较，取最大值，对比结果为每一个ROI找到与其重叠最好的GT的IOU：重叠面积/（roi面积+GT面积-重叠面积）,shape（len（all_rois）,）
    max_overlaps = overlaps.max(axis=1)
    #得到的标签为GT的第五维，即GT的标签,此时相当于取的是all_rois的标签
    labels = gt_boxes[gt_assignment, 4]

    # Select foreground RoIs as those with >= FG_THRESH overlap
    #cfg.TRAIN.FG_THRESH为0.5
    #找到IOU大于等于0.5的ROI，获得其引索，np.where返回的是一个tuple，存的是一个ndarray，array里是符合条件的ROI引索，所以用[0]，取出ndarray，即脱掉tuple外套
    fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
    # Guard against the case when an image has fewer than fg_rois_per_image
    # foreground RoIs
    #设定的每张图片fg最多为32,此时防止通过max_overlaps >= cfg.TRAIN.FG_THRESH的ROI还过于32，即取两者的最小值
    fg_rois_per_this_image = int(min(fg_rois_per_image, fg_inds.size))
    # Sample foreground regions without replacement
    #此时就是在筛选：如果fg_rois_per_image<fg_inds.size则不会被筛选掉，如果fg_rois_per_image>fg_inds.size则随机筛选出来fg_rois_per_this_image个fg-roi，筛选的结果是index，存入fg_inds
    if fg_inds.size > 0:
        fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False)
    #同fg一样的方式，筛选出bg-roi
    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
    #cfg.TRAIN.BG_THRESH_HI=0.5,cfg.TRAIN.BG_THRESH_LO=0.1
    bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) &
                       (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
    # Compute number of background RoIs to take from this image (guarding
    # against there being fewer than desired)
    bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
    bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
    # Sample background regions without replacement
    if bg_inds.size > 0:
        bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False)

    # The indices that we're selecting (both fg and bg)
    #bg_inds, bg_inds顺次存入keep_inds
    keep_inds = np.append(fg_inds, bg_inds)
    # Select sampled values from various arrays:
    #取出经过筛选后的roi的标签
    labels = labels[keep_inds]
    # Clamp labels for the background RoIs to 0
    #前面的fg_rois_per_this_image个roi为fg-roi，之后的为bg-roi，所以把bg-roi标签设置为0
    labels[fg_rois_per_this_image:] = 0
    #取出经过筛选后的roi的信息，存入rois
    rois = all_rois[keep_inds]
    #传入值为rois的（x1,y1,x2,y2）,对应最佳匹配GT的（x1,y1,x2,y2），对应的labels
    #返回[标签，dx,dy,dw,dh]，shape：（len（rois），5）
    bbox_target_data = _compute_targets(
        rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels)
    # 内容：bbox_targets（对fg-roi对应引索行的对应类别的4个位置填上（dx,dy,dw,dh））
    # bbox_inside_weights（对fg-roi对应引索行的对应类别的4个位置填上（1,1,1,1））
    bbox_targets, bbox_inside_weights = \
        _get_bbox_regression_labels(bbox_target_data, num_classes)

    return labels, rois, bbox_targets, bbox_inside_weights
一呆飞仙
关注
3
点赞
踩
9

收藏

觉得还不错? 一键收藏
4
评论
Faster-RCNN_TF代码解读9:proposal_target_layer_tf.py

# --------------------------------------------------------# Faster R-CNN# Copyright (c) 2015 Microsoft# Licensed under The MIT License [see LICENSE for details]# Written by Ross Girshick and Sean B
复制链接

扫一扫
专栏目录