_proposal_target_layer代码讲解

本篇博客主要讲解_proposal_target_layer
_proposal_target_layer
作用:该函数的主要作用是用来打标签(之后最终的分类回归任务)。
首先我们看一下输入:
roi:表示感兴趣区域,shape=(160409,5)
roi_scores:表示感兴趣区域的分数,shape=(1
60409,1)

  def _proposal_target_layer(self, rois, roi_scores, name):
    with tf.variable_scope(name) as scope:
      rois, roi_scores, labels, bbox_targets, bbox_inside_weights, bbox_outside_weights = tf.py_func(
        proposal_target_layer,
        [rois, roi_scores, self._gt_boxes, self._num_classes],
        [tf.float32, tf.float32, tf.float32, tf.float32, tf.float32, tf.float32],
        name="proposal_target")
      #上面调用了proposal_target_layer
      
      #以下是和之前一样的处理,变形,保存
      rois.set_shape([cfg.TRAIN.BATCH_SIZE, 5])
      roi_scores.set_shape([cfg.TRAIN.BATCH_SIZE])
      labels.set_shape([cfg.TRAIN.BATCH_SIZE, 1])
      bbox_targets.set_shape([cfg.TRAIN.BATCH_SIZE, self._num_classes * 4])
      bbox_inside_weights.set_shape([cfg.TRAIN.BATCH_SIZE, self._num_classes * 4])
      bbox_outside_weights.set_shape([cfg.TRAIN.BATCH_SIZE, self._num_classes * 4])
#变形

      self._proposal_targets['rois'] = rois
      self._proposal_targets['labels'] = tf.to_int32(labels, name="to_int32")
      self._proposal_targets['bbox_targets'] = bbox_targets
      self._proposal_targets['bbox_inside_weights'] = bbox_inside_weights
      self._proposal_targets['bbox_outside_weights'] = bbox_outside_weights
#保存
      self._score_summaries.update(self._proposal_targets)

      return rois, roi_scores

proposal_target_layer
看一下这个函数的输入:
rpn_rois:表示感兴趣区域,shape=(160409,5)
rpn_scores:表示感兴趣区域的分数,shape=(1
60409,1)
gt_boxes:真实框,shape=(K,5)
_num_classes:类别数量

#rpn_rois shape=(1*60*40*9,5)以及rpn_scores shape=(1*60*40*9,1) gt_boxes shape=(K,5)
#这一个layer用于创建训练后续分类回归的真值标签以及回归目标,打标签
def proposal_target_layer(rpn_rois, rpn_scores, gt_boxes, _num_classes):
  """
  Assign object detection proposals to ground-truth targets. Produces proposal
  classification labels and bounding-box regression targets.
  """

  # Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
  # (i.e., rpn.proposal_layer.ProposalLayer), or any other source
  all_rois = rpn_rois#shape=(1*60*40*9,5)
  all_scores = rpn_scores#shape=(1*60*40*9,1)
  
  #可以暂时不看if语句
  # Include ground-truth boxes in the set of candidate rois
  if cfg.TRAIN.USE_GT:
    zeros = np.zeros((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype)
    all_rois = np.vstack(
      (all_rois, np.hstack((zeros, gt_boxes[:, :-1])))
    )#这里在使用gt数据,剔除掉之后的label标签,加上全0,再与all_rois合并
    #shape=[?,5]
    # not sure if it a wise appending, but anyway i am not using it
    all_scores = np.vstack((all_scores, zeros))
    
  num_images = 1#image number
  rois_per_image = cfg.TRAIN.BATCH_SIZE / num_images #per batch图片张数
  fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)
  #FG_FRACTION为前景,背景比例
  #fg_rois_per_image这里表示前景张数

  # Sample rois with classification labels and bounding box regression
  # targets
  #gt_boxes[4]表示的是类别标签
  labels, rois, roi_scores, bbox_targets, bbox_inside_weights = _sample_rois(
    all_rois, all_scores, gt_boxes, fg_rois_per_image,
    rois_per_image, _num_classes)#假设抽样t1个
#抽样

  rois = rois.reshape(-1, 5)#shape=(t1,5)
  roi_scores = roi_scores.reshape(-1)#shape=(t1)
  labels = labels.reshape(-1, 1)#shape=(t1)
  bbox_targets = bbox_targets.reshape(-1, _num_classes * 4)#shape=(-1, _num_classes * 4)
  bbox_inside_weights = bbox_inside_weights.reshape(-1, _num_classes * 4)#shape=(-1, _num_classes * 4)
  bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32)#shape=(-1, _num_classes * 4)

  return rois, roi_scores, labels, bbox_targets, bbox_inside_weights, bbox_outside_weights

详细代码解析

_sample_rois
作用:计算iou,标签分类,抽样,最终用于后面的分类回归。

#all_rois shape=(1*60*40*9,5), all_scores shape=(1*60*40*9,1), gt_boxes shape=(K,5)
def _sample_rois(all_rois, all_scores, gt_boxes, fg_rois_per_image, rois_per_image, num_classes):
  """Generate a random sample of RoIs comprising foreground and background
  examples.
  """
  # overlaps: (rois x gt_boxes)
  overlaps = bbox_overlaps(
    np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
    np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))#这个上面讲过,依然假定输出为(N,K),(ancors number, true number))
  gt_assignment = overlaps.argmax(axis=1)#anchors对应的真实框最大值索引 shape=(N)
  max_overlaps = overlaps.max(axis=1)#anchors对应的真实框最大值 shape=(N)
  labels = gt_boxes[gt_assignment, 4] #shape=(N),每个anchors对应的gt_boxes label

  # Select foreground RoIs as those with >= FG_THRESH overlap
  fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]#前景索引
  # Guard against the case when an image has fewer than fg_rois_per_image
  # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
  bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) &
                     (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]#背景索引

  # Small modification to the original version where we ensure a fixed number of regions are sampled
  if fg_inds.size > 0 and bg_inds.size > 0:
    fg_rois_per_image = min(fg_rois_per_image, fg_inds.size)
    fg_inds = npr.choice(fg_inds, size=int(fg_rois_per_image), replace=False)
    #以上对前景抽样
    bg_rois_per_image = rois_per_image - fg_rois_per_image
    to_replace = bg_inds.size < bg_rois_per_image
    bg_inds = npr.choice(bg_inds, size=int(bg_rois_per_image), replace=to_replace)
    #对背景抽样
  #只有前景时
  elif fg_inds.size > 0:
    to_replace = fg_inds.size < rois_per_image
    fg_inds = npr.choice(fg_inds, size=int(rois_per_image), replace=to_replace)
    fg_rois_per_image = rois_per_image
  #只有背景时
  elif bg_inds.size > 0:
    to_replace = bg_inds.size < rois_per_image
    bg_inds = npr.choice(bg_inds, size=int(rois_per_image), replace=to_replace)
    fg_rois_per_image = 0
  else:
    import pdb
    pdb.set_trace()

  # The indices that we're selecting (both fg and bg)
  keep_inds = np.append(fg_inds, bg_inds)
  # Select sampled values from various arrays:
  labels = labels[keep_inds]#sample
  # Clamp labels for the background RoIs to 0
  labels[int(fg_rois_per_image):] = 0
  rois = all_rois[keep_inds]#sample roi
  roi_scores = all_scores[keep_inds]#sample rois scores

  bbox_target_data = _compute_targets(
    rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels)
  #计算偏移量 shape=(len(keep_inds),5)
  #用于最终的回归任务
  #以下有详细讲解
  
  bbox_targets, bbox_inside_weights = \
    _get_bbox_regression_labels(bbox_target_data, num_classes)
#以下有详细讲解

  return labels, rois, roi_scores, bbox_targets, bbox_inside_weights

最终返回的是抽样的结果
这个函数里面用到了以下函数:
1._compute_targets
2._get_bbox_regression_labels

1._compute_targets
之前也有一个_compute_targets函数和这个可不一样哦!!!
ex_rois:输出的roi
gt_roi:真实的roi
输出类别标签和偏移量(还做了标准化,两个东西拼在一起)

def _compute_targets(ex_rois, gt_rois, labels):
  """Compute bounding-box regression targets for an image."""

  assert ex_rois.shape[0] == gt_rois.shape[0]
  assert ex_rois.shape[1] == 4
  assert gt_rois.shape[1] == 4

  targets = bbox_transform(ex_rois, gt_rois)
#这个函数之后会详细讲解
  if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
    # Optionally normalize targets by a precomputed mean and stdev
    targets = ((targets - np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS))
               / np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS))
  return np.hstack(
    (labels[:, np.newaxis], targets)).astype(np.float32, copy=False)#shape=(len(keep_inds),5)

bbox_transform这个函数之前讲过,再看下吧

def bbox_transform(ex_rois, gt_rois):
  #left, bottom, right, top
  ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0#anchors widths
  ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0#anchors heights
  ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths#anchors x center
  ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights#anchors y center

  gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0
  gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0
  gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths
  gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights
  #同上
  
  targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths
  targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights
  targets_dw = np.log(gt_widths / ex_widths)
  targets_dh = np.log(gt_heights / ex_heights)
  """
  偏移量:
   △x=(x*-x_a)/w_a   △y=(y*-y_a)/h_a 
   △w=log(w*/w_a)   △h=log(h*/h_a)
  """
  targets = np.vstack(
    (targets_dx, targets_dy, targets_dw, targets_dh)).transpose()
  return targets#(N,4)

2._get_bbox_regression_labels

def _get_bbox_regression_labels(bbox_target_data, num_classes):
  """Bounding-box regression targets (bbox_target_data) are stored in a
  compact form N x (class, tx, ty, tw, th)
  This function expands those targets into the 4-of-4*K representation used
  by the network (i.e. only one class has non-zero targets).
  Returns:
      bbox_target (ndarray): N x 4K blob of regression targets
      bbox_inside_weights (ndarray): N x 4K blob of loss weights
  """
# bbox_target_data shape=(len(keep_inds),5);num_classes is anumber
  clss = bbox_target_data[:, 0]
  #shape=(len(keep_inds),1)
  bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32)
  #shape=(len(keep_inds), 4 * num_classes)
  bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
  inds = np.where(clss > 0)[0]
  for ind in inds:
    cls = clss[ind]
    start = int(4 * cls)
    end = start + 4
    bbox_targets[ind, start:end] = bbox_target_data[ind, 1:]
    bbox_inside_weights[ind, start:end] = cfg.TRAIN.BBOX_INSIDE_WEIGHTS
#以上的循环是在相应的类别位置,加入4个偏移值和权重
#猜想大于0是因为是背景吗?后面的分类回归不需要背景
  return bbox_targets, bbox_inside_weights
  • 1
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值