anchor_target_layer.py
rpn类在训练的时候主要有两个功能,第一个是get_rpn_cls_loss计算的rpn网络分类loss,第二个是get_rpn_bbox_loss计算的rpn网络的anchor边界回归loss。那么,要计算两个loss,最难的地方是如何去获得ground truth。这个ground truth的获得是通过anchor_target_layer函数实现的。源码如下:
# -*- coding: utf-8 -*-
"""
Created on Sun Jan 1 16:11:17 2017
@author: Kevin Liang (modifications)
Anchor Target Layer: Creates all the anchors in the final convolutional feature
map, assigns anchors to ground truth boxes, and applies labels of "objectness"
Adapted from the official Faster R-CNN repo:
https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/rpn/anchor_target_layer.py
"""
# --------------------------------------------------------
# Faster R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick and Sean Bell
# --------------------------------------------------------
import sys
sys.path.append('../')
import numpy as np
import numpy.random as npr
import tensorflow as tf
from Lib.bbox_overlaps import bbox_overlaps
from Lib.bbox_transform import bbox_transform
from Lib.faster_rcnn_config import cfg
from Lib.generate_anchors import generate_anchors
#该函数计算每个anchor对应的ground truth(前景/背景,坐标偏移值)
def anchor_target_layer(rpn_cls_score, gt_boxes, im_dims, _feat_stride, anchor_scales):
'''
Make Python version of _anchor_target_layer_py below Tensorflow compatible
'''
#执行_anchor_target_layer_py函数,传参有网络预测的rpn分类分数,ground_truth_box,图像的尺寸,与原图相比特征图缩小的比例和anchor的尺度
rpn_labels,rpn_bbox_targets,rpn_bbox_inside_weights,rpn_bbox_outside_weights = \
tf.py_func(_anchor_target_layer_py, [rpn_cls_score, gt_boxes, im_dims, _feat_stride, anchor_scales],
[tf.float32, tf.float32, tf.float32, tf.float32])
#转化成tensor
rpn_labels = tf.convert_to_tensor(tf.cast(rpn_labels,tf.int32), name = 'rpn_labels')
rpn_bbox_targets = tf.convert_to_tensor(rpn_bbox_targets, name = 'rpn_bbox_targets')
rpn_bbox_inside_weights = tf.convert_to_tensor(rpn_bbox_inside_weights , name = 'rpn_bbox_inside_weights')
rpn_bbox_outside_weights = tf.convert_to_tensor(rpn_bbox_outside_weights , name = 'rpn_bbox_outside_weights')
return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
def _anchor_target_layer_py(rpn_cls_score, gt_boxes, im_dims, _feat_stride, anchor_scales):
"""
Python version
Assign anchors to ground-truth targets. Produces anchor classification
labels and bounding-box regression targets.
# Algorithm:
#
# for each (H, W) location i
# generate 9 anchor boxes centered on cell i
# apply predicted bbox deltas at cell i to each of the 9 anchors
# filter out-of-image anchors
# measure GT overlap
"""
im_dims = im_dims[0] #获得原图的尺度[height, width]
_anchors = generate_anchors(scales=np.array(anchor_scales))# 生成9个锚点,shape: [9,4]
_num_anchors = _anchors.shape[0] #_num_anchors值为9
# allow boxes to sit over the edge by a small amount
_allowed_border = 0 #将anchor超出边界的限度设置为0
# Only minibatch of 1 supported 在这里核验batch_size是否为1
assert rpn_cls_score.shape[0] == 1, \
'Only single item batches are supported'
# map of shape (..., H, W)
height, width = rpn_cls_score.shape[1:3] #在这里得到了rpn输出的H和W,总的anchor数目应该是H×W×9
# 1. Generate proposals from bbox deltas and shifted anchors
#下面是在原图上生成anchor
shift_x = np.arange(0, width) * _feat_stride #shape: [width,]
shift_y = np.arange(0, height) * _feat_stride #shape: [height,]
shift_x, shift_y = np.meshgrid(shift_x, shift_y) #生成网格 shift_x shape: [height, width], shift_y shape: [height, width]
shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
shift_x.ravel(), shift_y.ravel())).transpose() # shape[height*width, 4]
# add A anchors (1, A, 4) to
# cell K shifts (K, 1, 4) to get
# shift anchors (K, A, 4)
# reshape to (K*A, 4) shifted anchors
A = _num_anchors # A = 9
K = shifts.shape[0] # K=height*width(特征图上的)
all_anchors = (_anchors.reshape((1, A, 4)) +
shifts.reshape((1, K, 4)).transpose((1, 0, 2))) #shape[K,A,4] 得到所有的anchor
all_anchors = all_anchors.reshape((K * A, 4))
total_anchors = int(K * A) #total_anchors记录anchor的数目
# anchors inside the image inds_inside所有的anchor中没有超过图像边界的
inds_inside = np.where(
(all_anchors[:, 0] >= -_allowed_border) &
(all_anchors[:, 1] >= -_allowed_border) &
(all_anchors[:, 2] < im_dims[1] + _allowed_border) & # width
(all_anchors[:, 3] < im_dims[0] + _allowed_border) # height
)[0]
# keep only inside anchors
anchors = all_anchors[inds_inside, :]#在这里选出合理的anchors,指的是没超出边界的
# label: 1 is positive, 0 is negative, -1 is dont care
labels = np.empty((len(inds_inside), ), dtype=np.float32)#labels的长度就是合法的anchor的个数
labels.fill(-1) #先用-1填充labels
# overlaps between the anchors and the gt boxes
# overlaps (ex, gt)
#对所有的没超过图像边界的anchor计算overlap,得到的shape: [len(anchors), len(gt_boxes)]
overlaps = bbox_overlaps(
np.ascontiguousarray(anchors, dtype=np.float),
np.ascontiguousarray(gt_boxes, dtype=np.float))
argmax_overlaps = overlaps.argmax(axis=1) #对于每个anchor,找到对应的gt_box坐标。shape: [len(anchors),]
max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] #对于每个anchor,找到最大的overlap的gt_box shape: [len(anchors)]
gt_argmax_overlaps = overlaps.argmax(axis=0) #对于每个gt_box,找到对应的最大overlap的anchor。shape[len(gt_boxes),]
gt_max_overlaps = overlaps[gt_argmax_overlaps,
np.arange(overlaps.shape[1])]#对于每个gt_box,找到与anchor的最大IoU值。shape[len(gt_boxes),]
gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]#再次对于每个gt_box,找到对应的最大overlap的anchor。shape[len(gt_boxes),]
if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: #如果不需要抑制positive的anchor,就先给背景anchor赋值,这样在赋前景值的时候可以覆盖。
# assign bg labels first so that positive labels can clobber them
labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 #在这里将最大IoU仍然小于阈值(0.3)的某些anchor置0
# fg label: for each gt, anchor with highest overlap
labels[gt_argmax_overlaps] = 1 #在这里将每个gt_box对应IoU最大的anchor置1
# fg label: above threshold IOU
labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 #在这里将最大IoU大于阈值(0.7)的某些anchor置1
if cfg.TRAIN.RPN_CLOBBER_POSITIVES: #如果需要抑制positive的anchor,就将背景anchor后赋值
# assign bg labels last so that negative labels can clobber positives
labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 #在这里将最大IoU仍然小于阈值(0.3)的某些anchor置0
# subsample positive labels if we have too many
num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)#计算出一个训练batch中需要的前景的数量
fg_inds = np.where(labels == 1)[0] #找出被置为前景的anchors
if len(fg_inds) > num_fg:
disable_inds = npr.choice(
fg_inds, size=(len(fg_inds) - num_fg), replace=False)
labels[disable_inds] = -1 #如果事实存在的前景anchor大于了所需值,就随机抛弃一些前景anchor
# subsample negative labels if we have too many
num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1) ##计算出一个训练batch中需要的背景的数量
bg_inds = np.where(labels == 0)[0] #找出被置为背景的anchors
if len(bg_inds) > num_bg:
disable_inds = npr.choice(
bg_inds, size=(len(bg_inds) - num_bg), replace=False)
labels[disable_inds] = -1 #如果事实存在的背景anchor大于了所需值,就随机抛弃一些背景anchor
# bbox_targets: The deltas (relative to anchors) that Faster R-CNN should
# try to predict at each anchor
# TODO: This "weights" business might be deprecated. Requires investigation
#返回的是,对于每个anchor,得到四个坐标变换值(tx,ty,th,tw)。
bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) #对每个在原图内部的anchor,用全0初始化坐标变换值
bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) #对于每个anchor,找到变换到对应的最大的overlap的gt_box的四个值
bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) #使用全0初始化inside_weights
bbox_inside_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS) #在前景anchor处赋权重
bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) #使用全0初始化outside_weights
if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0: #如果RPN_POSITIVE_WEIGHT小于0的话,
# uniform weighting of examples (given non-uniform sampling)
num_examples = np.sum(labels >= 0)
positive_weights = np.ones((1, 4)) * 1.0 / num_examples #则positive_weights和negative_weights都一样
negative_weights = np.ones((1, 4)) * 1.0 / num_examples
else:
assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) &
(cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1)) #如果RPN_POSITIVE_WEIGHT位于0和1之间的话,
positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT /
np.sum(labels == 1))
negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) /
np.sum(labels == 0)) #则positive_weights和negative_weights分别赋值
bbox_outside_weights[labels == 1, :] = positive_weights
bbox_outside_weights[labels == 0, :] = negative_weights #将positive_weights和negative_weights赋给bbox_outside_weights
# map up to original set of anchors
labels = _unmap(labels, total_anchors, inds_inside, fill=-1)#把图像内部的anchor对应的label映射回总的anchor(加上了那些超出边界的anchor,类别填充-1)
bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)#把图像内部的anchor对应的bbox_target映射回所有的anchor(加上了那些超出边界的anchor,填充0)
bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) #把图像内部的anchor对应的inside_weights映射回总的anchor(加上了那些超出边界的anchor,填充0)
bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0) #把图像内部的anchor对应的outside_weights映射回总的anchor(加上了那些超出边界的anchor,填充0)
# labels
labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2)
labels = labels.reshape((1, 1, A * height, width)) #将anchor的类别label数组形状置为[1,1,9*height,width]
rpn_labels = labels
# bbox_targets
rpn_bbox_targets = bbox_targets.reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) #将anchor的位置映射数组的形状置为[1,9*4,height,width]
# bbox_inside_weights
rpn_bbox_inside_weights = bbox_inside_weights.reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) #将anchor的inside_weights数组的形状置为[1,9*4,height,width]
# bbox_outside_weights
rpn_bbox_outside_weights = bbox_outside_weights.reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) #将anchor的outside_weights数组的形状置为[1,9*4,height,width]
return rpn_labels,rpn_bbox_targets,rpn_bbox_inside_weights,rpn_bbox_outside_weights #返回所有的ground truth值
def _unmap(data, count, inds, fill=0): #_unmap函数将图像内部的anchor映射回到生成的所有的anchor
""" Unmap a subset of item (data) back to the original set of items (of
size count) """
if len(data.shape) == 1:
ret = np.empty((count, ), dtype=np.float32)
ret.fill(fill)
ret[inds] = data
else:
ret = np.empty((count, ) + data.shape[1:], dtype=np.float32)
ret.fill(fill)
ret[inds, :] = data
return ret
def _compute_targets(ex_rois, gt_rois): #_compute_targets函数计算anchor和对应的gt_box的位置映射
"""Compute bounding-box regression targets for an image."""
assert ex_rois.shape[0] == gt_rois.shape[0]
assert ex_rois.shape[1] == 4
assert gt_rois.shape[1] == 5
return bbox_transform(ex_rois, gt_rois[:, :4]).astype(np.float32, copy=False)
anchor_target_layer函数主要还是调用了_anchor_target_layer_py函数,然后将输出转化为tensor。下面,我们就来仔细分析一下_anchor_target_layer_py函数。在该函数中,首先通过generate_anchors函数生成了9个候选框,然后按照在共享特征上每滑动一次对应到原图的位置生成候选框,即all_anchors。紧接着,排除了全部边框超过图像边界的候选框,得到anchors,之后的操作都是针对图像内部的anchors。然后,通过bbox_overlaps函数计算了所有边界内anchor与包围框之间的IoU值。接着,排除了IoU在0.3到0.7之间的anchor(通过将labels对应的值置为-1),并且为训练安排了合适数量的前景anchor和背景anchor。然后,通过_compute_targets函数计算出了每个anchor对应的坐标变换值(tx,ty,th,tw),存在bbox_targets数组里面。再计算了bbox_inside_weights和bbox_outside_weights,这两个数组在训练anchor边框修正时有重大作用。最后,通过_unmap函数将所有图像边框内部的anchor映射回所有的anchor。
anchor_target_layer主要就是为了得到两个东西,第一个东西是对应的一张图像生成的anchor的类别,在训练时需要赋予一定数量的正样本(前景)和一定数量的负样本(背景),其余的需要全部置成-1,表示训练的时候会忽略掉。第二个东西是对于每一个anchor的边框修正,在进行边框修正loss的计算时,只有前景anchor会起作用,可以看到这是bbox_inside_weights和bbox_outside_weights在实现。非前景和背景anchor对应的bbox_inside_weights和bbox_outside_weights都为0。
在anchor_target_layer函数中,有几个比较重要的函数,第一个函数就是generate_anchors,这个函数的主要作用是生成9个anchor,包含3种长宽比和3种面积。源代码及注释如下:
# -*- coding: utf-8 -*-
"""
Created on Sun Jan 1 16:11:17 2017
@author: Kevin Liang (modifications)
generate_anchors and supporting functions: generate reference windows (anchors)
for Faster R-CNN. Specifically, it creates a set of k (default of 9) relative
coordinates. These references will be added on to all positions of the final
convolutional feature maps.
Adapted from the official Faster R-CNN repo:
https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/rpn/generate_anchors.py
Note: the produced anchors have indices off by 1 of what the comments claim.
Probably due to MATLAB being 1-indexed, while Python is 0-indexed.
"""
# --------------------------------------------------------
# Faster R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick and Sean Bell
# --------------------------------------------------------
import numpy as np
# Verify that we compute the same anchors as Shaoqing's matlab implementation:
#
# >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat
# >> anchors
#
# anchors =
#
# -83 -39 100 56
# -175 -87 192 104
# -359 -183 376 200
# -55 -55 72 72
# -119 -119 136 136
# -247 -247 264 264
# -35 -79 52 96
# -79 -167 96 184
# -167 -343 184 360
#array([[ -83., -39., 100., 56.],
# [-175., -87., 192., 104.],
# [-359., -183., 376., 200.],
# [ -55., -55., 72., 72.],
# [-119., -119., 136., 136.],
# [-247., -247., 264., 264.],
# [ -35., -79., 52., 96.],
# [ -79., -167., 96., 184.],
# [-167., -343., 184., 360.]])
def generate_anchors(base_size=16, ratios=[0.5, 1, 2],
scales=2**np.arange(3, 6)):
"""
Generate anchor (reference) windows by enumerating aspect ratios X
scales wrt a reference (0, 0, 15, 15) window.
"""
#请注意anchor的表示形式有两种,一种是记录左上角和右下角的坐标,一种是记录中心坐标和宽高
#这里生成一个基准anchor,采用左上角和右下角的坐标表示[0,0,15,15]
base_anchor = np.array([1, 1, base_size, base_size]) - 1 #[0,0,15,15]
ratio_anchors = _ratio_enum(base_anchor, ratios) #shape: [3,4],返回的是不同长宽比的anchor
anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales)
for i in range(ratio_anchors.shape[0])])#生成九个候选框 shape: [9,4]
return anchors
def _whctrs(anchor):#传入anchor的左上角和右下角的坐标,返回anchor的中心坐标和长宽
"""
Return width, height, x center, and y center for an anchor (window).
"""
w = anchor[2] - anchor[0] + 1
h = anchor[3] - anchor[1] + 1
x_ctr = anchor[0] + 0.5 * (w - 1)
y_ctr = anchor[1] + 0.5 * (h - 1)
return w, h, x_ctr, y_ctr
def _mkanchors(ws, hs, x_ctr, y_ctr):#由anchor中心和长宽坐标返回window,记录左上角和右下角的坐标
"""
Given a vector of widths (ws) and heights (hs) around a center
(x_ctr, y_ctr), output a set of anchors (windows).
"""
ws = ws[:, np.newaxis] #shape: [3,1]
hs = hs[:, np.newaxis] #shape: [3,1]
anchors = np.hstack((x_ctr - 0.5 * (ws - 1),
y_ctr - 0.5 * (hs - 1),
x_ctr + 0.5 * (ws - 1),
y_ctr + 0.5 * (hs - 1)))
return anchors #shape [3,4],对于每个anchor,返回了左上角和右下角的坐标值
def _ratio_enum(anchor, ratios): #这个函数计算不同长宽尺度下的anchor的坐标
"""
Enumerate a set of anchors for each aspect ratio wrt an anchor.
"""
w, h, x_ctr, y_ctr = _whctrs(anchor) #找到anchor的中心点和长宽
size = w * h #返回anchor的面积
size_ratios = size / ratios #为了计算anchor的长宽尺度设置的数组:array([512.,256.,128.])
ws = np.round(np.sqrt(size_ratios)) #计算不同长宽比下的anchor的宽:array([23.,16.,11.])
hs = np.round(ws * ratios) #计算不同长宽比下的anchor的长 array([12.,16.,22.])
#请大家注意,对应位置上ws和hs相乘,面积都为256左右
anchors = _mkanchors(ws, hs, x_ctr, y_ctr)#返回新的不同长宽比的anchor 返回的数组shape:[3,4],请注意anchor记录的是左上角和右下角的坐标
return anchors
def _scale_enum(anchor, scales): #这个函数对于每一种长宽比的anchor,计算不同面积尺度的anchor坐标
"""
Enumerate a set of anchors for each scale wrt an anchor.
"""
w, h, x_ctr, y_ctr = _whctrs(anchor) #找到anchor的中心坐标
ws = w * scales #shape [3,] 得到不同尺度的新的宽
hs = h * scales #shape [3,] 得到不同尺度的新的高
anchors = _mkanchors(ws, hs, x_ctr, y_ctr) #得到不同面积尺度的anchor信息,对应的是左上角和右下角的坐标
return anchors
if __name__ == '__main__':
import time
t = time.time()
a = generate_anchors()
print(time.time() - t)
print(a)
from IPython import embed; embed()
在上面的代码中,主要的原理就是最开始生成一个基准anchor。然后,通过这个基准anchor生成三个不同长宽比,面积一样的anchor。最后,对每个长宽比anchor生成三个不同面积尺度的anchor,最终生成9个anchor,详情请见代码注释。
第二个重要的函数,是bbox_overlaps函数,这个函数对于每一个anchor,和所有的ground truth box计算IoU值,代码如下:
# -*- coding: utf-8 -*-
"""
Created on Sun Jan 1 20:25:19 2017
@author: Kevin Liang (modification)
Calculates bounding box overlaps between N bounding boxes(ground truth), and K query boxes (anchors) and return a matrix of overlap proportions
Written in Cython for optimization.
"""
# --------------------------------------------------------
# Fast R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Sergey Karayev
# --------------------------------------------------------
cimport cython
import numpy as np
cimport numpy as np
DTYPE = np.float
ctypedef np.float_t DTYPE_t
def bbox_overlaps(#计算重合程度,两个框之间的重合区域的面积 / 两个区域一共加起来的面积
np.ndarray[DTYPE_t, ndim=2] boxes,
np.ndarray[DTYPE_t, ndim=2] query_boxes):
"""
Parameters
----------
boxes: (N, 4) ndarray of float
query_boxes: (K, 4) ndarray of float
Returns
-------
overlaps: (N, K) ndarray of overlap between boxes and query_boxes
"""
cdef unsigned int N = boxes.shape[0]
cdef unsigned int K = query_boxes.shape[0]
cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
cdef DTYPE_t iw, ih, box_area
cdef DTYPE_t ua
cdef unsigned int k, n
#iw,ih为重叠部分的宽和高
for k in range(K):
box_area = (
(query_boxes[k, 2] - query_boxes[k, 0] + 1) *
(query_boxes[k, 3] - query_boxes[k, 1] + 1)
)
for n in range(N):
iw = (
min(boxes[n, 2], query_boxes[k, 2]) -
max(boxes[n, 0], query_boxes[k, 0]) + 1
)
if iw > 0:
ih = (
min(boxes[n, 3], query_boxes[k, 3]) -
max(boxes[n, 1], query_boxes[k, 1]) + 1
)
if ih > 0:
ua = float(
(boxes[n, 2] - boxes[n, 0] + 1) *
(boxes[n, 3] - boxes[n, 1] + 1) +
box_area - iw * ih
)
overlaps[n, k] = iw * ih / ua
return overlaps
第三个重要的部分是,在计算anchor的坐标变换值的时候,使用到了bbox_transform函数,请注意在计算坐标变换的时候是将anchor的表示形式变成中心坐标与长宽。该函数代码及注释如下所示:
# -*- coding: utf-8 -*-
"""
Created on Sun Jan 1 21:18:58 2017
@author: Kevin Liang (modifications)
bbox_transform and its inverse operation
"""
# --------------------------------------------------------
# Fast R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick
# --------------------------------------------------------
import numpy as np
def bbox_transform(ex_rois, gt_rois):
'''
Receives two sets of bounding boxes, denoted by two opposite corners
(x1,y1,x2,y2), and returns the target deltas that Faster R-CNN should aim
for.
'''
ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0
ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0
ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights #计算得到每个anchor的中心坐标和长宽
gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0
gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0
gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths
gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights #计算每个anchor对应的ground truth box对应的中心坐标和长宽
targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths #计算四个坐标变换值
targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights
targets_dw = np.log(gt_widths / ex_widths)
targets_dh = np.log(gt_heights / ex_heights)
targets = np.vstack(
(targets_dx, targets_dy, targets_dw, targets_dh)).transpose()#对于每一个anchor,得到四个关系值 shape: [4, num_anchor]
return targets
到这里,anchor_target_layers解析就完成了。这是rpn源码中最重要的函数之一,因为会返回所有anchor对应的类别和对应的边框修正值,方便在计算loss时计算。
RPN代码中比较巧妙的部分笔者认为有如下两个:
(1)如何生成H×W×9个anchor:做法是先生成9个不同长宽比不同面积anchor,然后在图上各个滑动区域上都生成这9个anchor。
(2)如何计算每个anchor的类别(前景背景)和边框变换值。做法是首先为每个anchor计算与ground truth box对应的IoU值,排除IoU为0.3~0.7的anchor。0.3以下的为背景anchor,0.7以上的为前景anchor。对于边框变化值,是计算的anchor与IoU重合最大的ground truth box对应的tx,ty,th,tw四个值。
转自:https://blog.csdn.net/jiongnima/article/details/79781792 讲的非常好,感谢。