_anchor_target_layer代码详解

  本篇博客将对_anchor_target_layer进行讲解

_anchor_target_layer:其实也是一个中间函数
注意代码注释

  def _anchor_target_layer(self, rpn_cls_score, name):
    with tf.variable_scope(name) as scope:
      rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = tf.py_func(
        anchor_target_layer,
        [rpn_cls_score, self._gt_boxes, self._im_info, self._feat_stride, self._anchors, self._num_anchors],
        [tf.float32, tf.float32, tf.float32, tf.float32],
        name="anchor_target")
      #rpn_cls_score shape=(1, 60, 40, 18), self._gt_boxes表示真实框和类别(?,5), self._im_info表示高宽, self._feat_stride=16
      #_anchors特征图上对应的每个Anchor,_num_anchors=9
      #output rpn_labels是前景,背景标签;rpn_bbox_targets 真实窗口与anchors偏移量;之后的是两个权重,用于之后的训练
	#详细解析见下
      rpn_labels.set_shape([1, 1, None, None])
      rpn_bbox_targets.set_shape([1, None, None, self._num_anchors * 4])
      rpn_bbox_inside_weights.set_shape([1, None, None, self._num_anchors * 4])
      rpn_bbox_outside_weights.set_shape([1, None, None, self._num_anchors * 4])
#以上调整shape
      rpn_labels = tf.to_int32(rpn_labels, name="to_int32")
      self._anchor_targets['rpn_labels'] = rpn_labels
      self._anchor_targets['rpn_bbox_targets'] = rpn_bbox_targets
      self._anchor_targets['rpn_bbox_inside_weights'] = rpn_bbox_inside_weights
      self._anchor_targets['rpn_bbox_outside_weights'] = rpn_bbox_outside_weights
#保存
      self._score_summaries.update(self._anchor_targets)
      #可视化保存
    return rpn_labels#返回标签

anchor_target_layer
注意代码注释
输入说明:
rpn_cls_score shape=(1, 60, 40, 18)
gt_boxes表示真实框和类别(?,5)
im_info表示高宽通道数
feat_stride=16 原图与特征图比例
all_anchors特征图上对应的每Anchor
num_anchors=9每个点位锚

def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, _feat_stride, all_anchors, num_anchors):
  """Same as the anchor target layer in original Fast/er RCNN """
  #rpn_cls_score shape=(1, 60, 40, 18), gt_boxes表示真实框, im_info表示高宽, _feat_stride=16
  #all_anchors shape=(21600, 4) 特征图上对应的每个Anchor,_num_anchors=9
  A = num_anchors #9
  total_anchors = all_anchors.shape[0] #21600
  K = total_anchors / num_anchors #2400个点位

  _allowed_border = 0
  # allow boxes to sit over the edge by a small amount
  
  height, width = rpn_cls_score.shape[1:3]
  # map of shape (..., H, W), only H,W

  inds_inside = np.where(
    (all_anchors[:, 0] >= -_allowed_border) &
    (all_anchors[:, 1] >= -_allowed_border) &
    (all_anchors[:, 2] < im_info[1] + _allowed_border) &  # width
    (all_anchors[:, 3] < im_info[0] + _allowed_border)  # height
  )[0]
  #left, bottom, right, top有没有超出图像界限。
  # only keep anchors inside the image
  
  anchors = all_anchors[inds_inside, :]
  # keep only inside anchors
  # 这里假设没有剔除,shape=(21600, 4)
  
  labels = np.empty((len(inds_inside),), dtype=np.float32)
  labels.fill(-1)
  # label: 1 is positive, 0 is negative, -1 is dont care

  overlaps = bbox_overlaps(
    np.ascontiguousarray(anchors, dtype=np.float),
    np.ascontiguousarray(gt_boxes, dtype=np.float))
  #ascontiguousarray函数将一个内存不连续存储的数组转换为内存连续存储的数组,使得运行速度更快
  #得到一个array,shape=(21600, K),N=21600,(N,K),N表示候选框个数,K表示真实框个数,相当于一个表格,值为候选框与真实框的iou
  #下面有代码解析
  argmax_overlaps = overlaps.argmax(axis=1)
  #候选框对应的最大iou真实框索引,shape=[N]
  max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
  #候选框对应的最大iou真实框[N]
  gt_argmax_overlaps = overlaps.argmax(axis=0)
  #真实框对应的最大iou候选框索引 shape=[K]
  gt_max_overlaps = overlaps[gt_argmax_overlaps,
                             np.arange(overlaps.shape[1])]
  #候选框对应的最大真实框iou shape=(K)
  gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
  #如果只有参数condition,则函数返回为true的元素的坐标位置信息
  #此处为每个标定的真值候选区域,与其重叠比例最大的anchor的索引
  """
  np.where([[True, False],[True,False]])
  output:(array([0, 1], dtype=int64), array([0, 0], dtype=int64))
  """
  # overlaps between the anchors and the gt boxes
  # overlaps (ex, gt)

  if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
    # assign bg labels first so that positive labels can clobber them
    # first set the negatives
    #候选框与真实框最大的iou都小与cfg.TRAIN.RPN_NEGATIVE_OVERLAP,标为0
    labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

  # fg label: for each gt, anchor with highest overlap
  labels[gt_argmax_overlaps] = 1

  # fg label: above threshold IOU
  labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1

  if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
    # assign bg labels last so that negative labels can clobber positives
    #候选框与真实框最大的iou都小与cfg.TRAIN.RPN_NEGATIVE_OVERLAP,标为0
    labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
#以上就是打标签的原则,得到了label

  # subsample positive labels if we have too many
  #这里进行抽样对前景样本
  num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
  fg_inds = np.where(labels == 1)[0]#取出的是索引
  if len(fg_inds) > num_fg:
    disable_inds = npr.choice(
      fg_inds, size=(len(fg_inds) - num_fg), replace=False)
    labels[disable_inds] = -1

  # subsample negative labels if we have too many
  # 这里进行抽样对后景样本,同上
  num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
  bg_inds = np.where(labels == 0)[0]
  if len(bg_inds) > num_bg:
    disable_inds = npr.choice(
      bg_inds, size=(len(bg_inds) - num_bg), replace=False)
    labels[disable_inds] = -1
#以上是分类任务,下面为回归任务
  bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)#(N, 4)
  bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])
  #记住此时的anchor是被筛选过的,同时也对gt_boxes进行了筛选
  #gt_boxes [T,5]表示真实框,shape=(N, 5),T表示真实框数量
  #gt_boxes[argmax_overlaps, :] shape=(N, 5)
  #anchors shape=(N,4)
  #output (N,4) 偏移量
  #该函数代码解析,以下会讲
  bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
  # only the positive ones have regression targets
  bbox_inside_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS)
  #inside_weights
  bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
  if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0:
    # uniform weighting of examples (given non-uniform sampling)
    num_examples = np.sum(labels >= 0)
    positive_weights = np.ones((1, 4)) * 1.0 / num_examples
    negative_weights = np.ones((1, 4)) * 1.0 / num_examples
  else:
    assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) &
            (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1))
    positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT /
                        np.sum(labels == 1))
    negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) /
                        np.sum(labels == 0))
  bbox_outside_weights[labels == 1, :] = positive_weights
  bbox_outside_weights[labels == 0, :] = negative_weights
  #outside_weights
  #针对前后景样本的权值
  
  # map up to original set of anchors
  #labels=(N,) total_anchors=21600 inds_inside=N fill=-1
  #因为原来的一共有21600个窗口,筛去只剩N,需要map到原来的数量上
  #_unmap以下会讲
  labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
  bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
  bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0)
  bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0)
  
  # labels (N,)=> (1, height, width, A)=>(1, A, height, width)
  labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2)
  labels = labels.reshape((1, 1, A * height, width))#(1, 1, A * height, width)
  rpn_labels = labels

  # bbox_targets (N, 4)=> (1, height, width, A * 4)
  bbox_targets = bbox_targets \
    .reshape((1, height, width, A * 4))

  rpn_bbox_targets = bbox_targets
  # bbox_inside_weights (N, 4)=> (1, height, width, A * 4)
  bbox_inside_weights = bbox_inside_weights \
    .reshape((1, height, width, A * 4))

  rpn_bbox_inside_weights = bbox_inside_weights

  # bbox_outside_weights (N, 4)=> (1, height, width, A * 4)
  bbox_outside_weights = bbox_outside_weights \
    .reshape((1, height, width, A * 4))

  rpn_bbox_outside_weights = bbox_outside_weights
  return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights

总结以下整个函数就是准备分类和回归标签和权重,用于训练。

代码详细解析

bbox_overlaps
这是一个cython,但是也不难,看懂还是很容易的。
input就是锚和真实框

def bbox_overlaps(
        np.ndarray[DTYPE_t, ndim=2] boxes,
        np.ndarray[DTYPE_t, ndim=2] query_boxes):
    """
    Parameters
    ----------
    boxes: (N, 4) ndarray of float
    query_boxes: (K, 4) ndarray of float
    Returns
    -------
    overlaps: (N, K) ndarray of overlap between boxes and query_boxes
    """
    #0,1,2,3分别对应#left, bottom, right, top
    cdef unsigned int N = boxes.shape[0]
    cdef unsigned int K = query_boxes.shape[0]
    cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
    cdef DTYPE_t iw, ih, box_area
    cdef DTYPE_t ua
    cdef unsigned int k, n
    for k in range(K):
        box_area = (
            (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
            (query_boxes[k, 3] - query_boxes[k, 1] + 1)
        )#计算真实框面积
        for n in range(N):
            iw = (
                min(boxes[n, 2], query_boxes[k, 2]) -
                max(boxes[n, 0], query_boxes[k, 0]) + 1
            )#判断left, right水平上是否有交集,这里left,right值不要想成是线,想成是方块,所以需要+1
            if iw > 0:
                ih = (
                    min(boxes[n, 3], query_boxes[k, 3]) -
                    max(boxes[n, 1], query_boxes[k, 1]) + 1
                )#这个是水平线上的计算
                if ih > 0:
                    ua = float(
                        (boxes[n, 2] - boxes[n, 0] + 1) *
                        (boxes[n, 3] - boxes[n, 1] + 1) +
                        box_area - iw * ih
                    )
                    overlaps[n, k] = iw * ih / ua
    return overlaps #iou

_compute_targets
anchors:筛选过后的锚
gt_rois:筛选过后的真实框

def _compute_targets(ex_rois, gt_rois):
  """Compute bounding-box regression targets for an image."""

  assert ex_rois.shape[0] == gt_rois.shape[0]
  assert ex_rois.shape[1] == 4
  assert gt_rois.shape[1] == 5
	#调用bbox_transform
  return bbox_transform(ex_rois, gt_rois[:, :4]).astype(np.float32, copy=False)

def bbox_transform(ex_rois, gt_rois):
  #以下left, bottom, right, top转化为W,H,center_x,center_y
  ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0#anchors widths
  ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0#anchors heights
  ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths#anchors x center
  ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights#anchors y center

  gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0
  gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0
  gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths
  gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights
  #同上
  
  #求delta
  targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths
  targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights
  targets_dw = np.log(gt_widths / ex_widths)
  targets_dh = np.log(gt_heights / ex_heights)
  """
  偏移量:
   △x=(x*-x_a)/w_a   △y=(y*-y_a)/h_a 
   △w=log(w*/w_a)   △h=log(h*/h_a)
  """
  targets = np.vstack(
    (targets_dx, targets_dy, targets_dw, targets_dh)).transpose()
  return targets#(N,4)

最终求出了偏移量。

让我们看最后一个函数
input:
data:表示各种各样的标签
total_anchors:原来锚的总个数
inds_inside:现有标签原来的index
fill:用什么填充被筛选之后的标签

def _unmap(data, count, inds, fill=0):
  """ Unmap a subset of item (data) back to the original set of items (of
  size count) """
  #labels=(N) total_anchors=21600 inds_inside=N fill=-1
  if len(data.shape) == 1:
    ret = np.empty((count,), dtype=np.float32)
    ret.fill(fill)
    ret[inds] = data
  else:
    ret = np.empty((count,) + data.shape[1:], dtype=np.float32)
    ret.fill(fill)
    ret[inds, :] = data
  return ret

映射回原来的数量上去了。

OK,完工!

  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值