本篇博客主要讲解_proposal_target_layer
_proposal_target_layer
作用:该函数的主要作用是用来打标签(之后最终的分类回归任务)。
首先我们看一下输入:
roi:表示感兴趣区域,shape=(160409,5)
roi_scores:表示感兴趣区域的分数,shape=(160409,1)
def _proposal_target_layer(self, rois, roi_scores, name):
with tf.variable_scope(name) as scope:
rois, roi_scores, labels, bbox_targets, bbox_inside_weights, bbox_outside_weights = tf.py_func(
proposal_target_layer,
[rois, roi_scores, self._gt_boxes, self._num_classes],
[tf.float32, tf.float32, tf.float32, tf.float32, tf.float32, tf.float32],
name="proposal_target")
#上面调用了proposal_target_layer
#以下是和之前一样的处理,变形,保存
rois.set_shape([cfg.TRAIN.BATCH_SIZE, 5])
roi_scores.set_shape([cfg.TRAIN.BATCH_SIZE])
labels.set_shape([cfg.TRAIN.BATCH_SIZE, 1])
bbox_targets.set_shape([cfg.TRAIN.BATCH_SIZE, self._num_classes * 4])
bbox_inside_weights.set_shape([cfg.TRAIN.BATCH_SIZE, self._num_classes * 4])
bbox_outside_weights.set_shape([cfg.TRAIN.BATCH_SIZE, self._num_classes * 4])
#变形
self._proposal_targets['rois'] = rois
self._proposal_targets['labels'] = tf.to_int32(labels, name="to_int32")
self._proposal_targets['bbox_targets'] = bbox_targets
self._proposal_targets['bbox_inside_weights'] = bbox_inside_weights
self._proposal_targets['bbox_outside_weights'] = bbox_outside_weights
#保存
self._score_summaries.update(self._proposal_targets)
return rois, roi_scores
proposal_target_layer
看一下这个函数的输入:
rpn_rois:表示感兴趣区域,shape=(160409,5)
rpn_scores:表示感兴趣区域的分数,shape=(160409,1)
gt_boxes:真实框,shape=(K,5)
_num_classes:类别数量
#rpn_rois shape=(1*60*40*9,5)以及rpn_scores shape=(1*60*40*9,1) gt_boxes shape=(K,5)
#这一个layer用于创建训练后续分类回归的真值标签以及回归目标,打标签
def proposal_target_layer(rpn_rois, rpn_scores, gt_boxes, _num_classes):
"""
Assign object detection proposals to ground-truth targets. Produces proposal
classification labels and bounding-box regression targets.
"""
# Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
# (i.e., rpn.proposal_layer.ProposalLayer), or any other source
all_rois = rpn_rois#shape=(1*60*40*9,5)
all_scores = rpn_scores#shape=(1*60*40*9,1)
#可以暂时不看if语句
# Include ground-truth boxes in the set of candidate rois
if cfg.TRAIN.USE_GT:
zeros = np.zeros((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype)
all_rois = np.vstack(
(all_rois, np.hstack((zeros, gt_boxes[:, :-1])))
)#这里在使用gt数据,剔除掉之后的label标签,加上全0,再与all_rois合并
#shape=[?,5]
# not sure if it a wise appending, but anyway i am not using it
all_scores = np.vstack((all_scores, zeros))
num_images = 1#image number
rois_per_image = cfg.TRAIN.BATCH_SIZE / num_images #per batch图片张数
fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)
#FG_FRACTION为前景,背景比例
#fg_rois_per_image这里表示前景张数
# Sample rois with classification labels and bounding box regression
# targets
#gt_boxes[4]表示的是类别标签
labels, rois, roi_scores, bbox_targets, bbox_inside_weights = _sample_rois(
all_rois, all_scores, gt_boxes, fg_rois_per_image,
rois_per_image, _num_classes)#假设抽样t1个
#抽样
rois = rois.reshape(-1, 5)#shape=(t1,5)
roi_scores = roi_scores.reshape(-1)#shape=(t1)
labels = labels.reshape(-1, 1)#shape=(t1)
bbox_targets = bbox_targets.reshape(-1, _num_classes * 4)#shape=(-1, _num_classes * 4)
bbox_inside_weights = bbox_inside_weights.reshape(-1, _num_classes * 4)#shape=(-1, _num_classes * 4)
bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32)#shape=(-1, _num_classes * 4)
return rois, roi_scores, labels, bbox_targets, bbox_inside_weights, bbox_outside_weights
详细代码解析
_sample_rois
作用:计算iou,标签分类,抽样,最终用于后面的分类回归。
#all_rois shape=(1*60*40*9,5), all_scores shape=(1*60*40*9,1), gt_boxes shape=(K,5)
def _sample_rois(all_rois, all_scores, gt_boxes, fg_rois_per_image, rois_per_image, num_classes):
"""Generate a random sample of RoIs comprising foreground and background
examples.
"""
# overlaps: (rois x gt_boxes)
overlaps = bbox_overlaps(
np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))#这个上面讲过,依然假定输出为(N,K),(ancors number, true number))
gt_assignment = overlaps.argmax(axis=1)#anchors对应的真实框最大值索引 shape=(N)
max_overlaps = overlaps.max(axis=1)#anchors对应的真实框最大值 shape=(N)
labels = gt_boxes[gt_assignment, 4] #shape=(N),每个anchors对应的gt_boxes label
# Select foreground RoIs as those with >= FG_THRESH overlap
fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]#前景索引
# Guard against the case when an image has fewer than fg_rois_per_image
# Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) &
(max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]#背景索引
# Small modification to the original version where we ensure a fixed number of regions are sampled
if fg_inds.size > 0 and bg_inds.size > 0:
fg_rois_per_image = min(fg_rois_per_image, fg_inds.size)
fg_inds = npr.choice(fg_inds, size=int(fg_rois_per_image), replace=False)
#以上对前景抽样
bg_rois_per_image = rois_per_image - fg_rois_per_image
to_replace = bg_inds.size < bg_rois_per_image
bg_inds = npr.choice(bg_inds, size=int(bg_rois_per_image), replace=to_replace)
#对背景抽样
#只有前景时
elif fg_inds.size > 0:
to_replace = fg_inds.size < rois_per_image
fg_inds = npr.choice(fg_inds, size=int(rois_per_image), replace=to_replace)
fg_rois_per_image = rois_per_image
#只有背景时
elif bg_inds.size > 0:
to_replace = bg_inds.size < rois_per_image
bg_inds = npr.choice(bg_inds, size=int(rois_per_image), replace=to_replace)
fg_rois_per_image = 0
else:
import pdb
pdb.set_trace()
# The indices that we're selecting (both fg and bg)
keep_inds = np.append(fg_inds, bg_inds)
# Select sampled values from various arrays:
labels = labels[keep_inds]#sample
# Clamp labels for the background RoIs to 0
labels[int(fg_rois_per_image):] = 0
rois = all_rois[keep_inds]#sample roi
roi_scores = all_scores[keep_inds]#sample rois scores
bbox_target_data = _compute_targets(
rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels)
#计算偏移量 shape=(len(keep_inds),5)
#用于最终的回归任务
#以下有详细讲解
bbox_targets, bbox_inside_weights = \
_get_bbox_regression_labels(bbox_target_data, num_classes)
#以下有详细讲解
return labels, rois, roi_scores, bbox_targets, bbox_inside_weights
最终返回的是抽样的结果
这个函数里面用到了以下函数:
1._compute_targets
2._get_bbox_regression_labels
1._compute_targets
之前也有一个_compute_targets函数和这个可不一样哦!!!
ex_rois:输出的roi
gt_roi:真实的roi
输出类别标签和偏移量(还做了标准化,两个东西拼在一起)
def _compute_targets(ex_rois, gt_rois, labels):
"""Compute bounding-box regression targets for an image."""
assert ex_rois.shape[0] == gt_rois.shape[0]
assert ex_rois.shape[1] == 4
assert gt_rois.shape[1] == 4
targets = bbox_transform(ex_rois, gt_rois)
#这个函数之后会详细讲解
if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
# Optionally normalize targets by a precomputed mean and stdev
targets = ((targets - np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS))
/ np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS))
return np.hstack(
(labels[:, np.newaxis], targets)).astype(np.float32, copy=False)#shape=(len(keep_inds),5)
bbox_transform这个函数之前讲过,再看下吧
def bbox_transform(ex_rois, gt_rois):
#left, bottom, right, top
ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0#anchors widths
ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0#anchors heights
ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths#anchors x center
ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights#anchors y center
gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0
gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0
gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths
gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights
#同上
targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths
targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights
targets_dw = np.log(gt_widths / ex_widths)
targets_dh = np.log(gt_heights / ex_heights)
"""
偏移量:
△x=(x*-x_a)/w_a △y=(y*-y_a)/h_a
△w=log(w*/w_a) △h=log(h*/h_a)
"""
targets = np.vstack(
(targets_dx, targets_dy, targets_dw, targets_dh)).transpose()
return targets#(N,4)
2._get_bbox_regression_labels
def _get_bbox_regression_labels(bbox_target_data, num_classes):
"""Bounding-box regression targets (bbox_target_data) are stored in a
compact form N x (class, tx, ty, tw, th)
This function expands those targets into the 4-of-4*K representation used
by the network (i.e. only one class has non-zero targets).
Returns:
bbox_target (ndarray): N x 4K blob of regression targets
bbox_inside_weights (ndarray): N x 4K blob of loss weights
"""
# bbox_target_data shape=(len(keep_inds),5);num_classes is anumber
clss = bbox_target_data[:, 0]
#shape=(len(keep_inds),1)
bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32)
#shape=(len(keep_inds), 4 * num_classes)
bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
inds = np.where(clss > 0)[0]
for ind in inds:
cls = clss[ind]
start = int(4 * cls)
end = start + 4
bbox_targets[ind, start:end] = bbox_target_data[ind, 1:]
bbox_inside_weights[ind, start:end] = cfg.TRAIN.BBOX_INSIDE_WEIGHTS
#以上的循环是在相应的类别位置,加入4个偏移值和权重
#猜想大于0是因为是背景吗?后面的分类回归不需要背景
return bbox_targets, bbox_inside_weights