目录
forward(self, img, img_meta, return_loss=True, **kwargs):
训练代码
train
tools,训练代码
parse_args
用来接收各种参数
main
建立模型
model = build_detector(
cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg)
建立数据集
datasets = [build_dataset(cfg.data.train)]
调用训练api
train_detector(
model,
datasets,
cfg,
distributed=distributed,
validate=args.validate,
logger=logger)
train
mmdet/api,训练代码接口
train_detector
调用分布训练_dist_train
或者非分布训练_non_dist_train
batch_processor/parse_losses
整合batch中的loss
build_optimizer
配置optimizer
_non_dist_train/_dist_train
训练 主要看一下 _non_dist_train
载入数据
datasets = [build_dataset(cfg.data.train)]
配置dataloader,采样是GroupSampler,然后是官方的dataloader
data_loaders = [
build_dataloader(
ds, cfg.data.imgs_per_gpu, cfg.data.workers_per_gpu, dist=True)
for ds in dataset
]
建立model
model = MMDistributedDataParallel(model.cuda())
建立optimizer,函数在本文件里就有
optimizer = build_optimizer(model, cfg.optimizer)
配置runner
runner = Runner(model, batch_processor, optimizer, cfg.work_dir,
cfg.log_level)
Runner是mmcv自带
class Runner(object):
"""A training helper for PyTorch.
Args:
model (:obj:`torch.nn.Module`): The model to be run.
batch_processor (callable): A callable method that process a data
batch. The interface of this method should be
`batch_processor(model, data, train_mode) -> dict`
optimizer (dict or :obj:`torch.optim.Optimizer`): If it is a dict,
runner will construct an optimizer according to it.
work_dir (str, optional): The working directory to save checkpoints
and logs.
log_level (int): Logging level.
logger (:obj:`logging.Logger`): Custom logger. If `None`, use the
default logger.
meta (dict | None): A dict records some import information such as
environment info and seed, which will be logged in logger hook.
"""
gogogo 开始跑
runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
数据
DATASET是Registry,保存了CocoDataset,PIPLINE在CustomDataset,这个类是数据类的父类,后面有
def build_dataset(cfg, default_args=None):
...
dataset = build_from_cfg(cfg, DATASETS, default_args)
...
return dataset
CocoDataset
load_annotations
读入图片id
读入类别并转换成label
返回图片信息
getitem
直接继承的父类
_filter_imgs
改写,加了一个过滤没有gt的功能
get_ann_info
读取img[index]的annotaion,送入_parse_ann_info
处理
_parse_ann_info
这是处理数据的主要部分
gt_bboxes = []
gt_labels = []
gt_bboxes_ignore = []
gt_masks_ann = []
...
ann = dict(
bboxes=gt_bboxes,
labels=gt_labels,
bboxes_ignore=gt_bboxes_ignore,
masks=gt_masks_ann,
seg_map=seg_map)
CustomDataset
getitem
继承DATASET 分别对test train数据进行处理
prepare_test_img/prepare_train_img
prepare_train_img
get_ann_info获得标注
pre_pipeline 给结果中加入 标注信息(图片地址)
pipeline PIPELIN处理
self.pipeline = Compose(pipeline)
模型
CascadeRPNHead -> (继承)-> CascadeAnchorHead -> (loss中的函数)-> region_anchor_target ->(region_anchor_target_single中类调用bbox_assigner.assign)-> RegionAssigner.assign ->返回一个AssignResult类
build detector
def build(cfg, registry, default_args=None):
if isinstance(cfg, list):
modules = [
build_from_cfg(cfg_, registry, default_args) for cfg_ in cfg
]
return nn.Sequential(*modules)
else:
return build_from_cfg(cfg, registry, default_args)
Cascade RPN
继承了BaseDetector作为模板。
继承了RPNTestMixin,没看出来有啥用,此类源码看起来是用来跑RPN的test
架构
self.backbone = builder.build_backbone(backbone)
self.neck = builder.build_neck(neck) if neck is not None else None
self.rpn_head = nn.ModuleList()
for head in rpn_head:
self.rpn_head.append(builder.build_head(head))
forward_train
def forward_train(self, img, img_meta, gt_bboxes=None):
#FPN+RESNET
x = self.extract_feat(img)
featmap_sizes = [featmap.size()[-2:] for featmap in x]
#generate anchor
anchor_list, valid_flag_list = self.rpn_head[0].init_anchors(
featmap_sizes, img_meta)
losses = dict()
for i in range(self.num_stages):# num_stage == 2
rpn_train_cfg = self.train_cfg.rpn[i]
rpn_head = self.rpn_head[i]
if rpn_head.feat_adapt:#not in first stage
offset_list = anchor_offset(anchor_list,
rpn_head.anchor_strides,
featmap_sizes)
else:
offset_list = None
x, cls_score, bbox_pred = rpn_head(x, offset_list)
rpn_loss_inputs = (anchor_list, valid_flag_list, cls_score,
bbox_pred, gt_bboxes, img_meta, rpn_train_cfg)
stage_loss = rpn_head.loss(*rpn_loss_inputs)
for name, value in stage_loss.items():
losses['s{}.{}'.format(i, name)] = value
# refine boxes
if i < self.num_stages - 1:
anchor_list = rpn_head.refine_bboxes(anchor_list, bbox_pred,
img_meta)
return losses
用到了rpn_head的方法
- init_anchors
- loss,loss的input:
rpn_loss_inputs = (anchor_list,valid_flag_list, cls_score, bbox_pred, gt_bboxes, img_meta, rpn_train_cfg)
- refine_bboxes
- anchor_offset(这个不是rpn_head)
anchor_offset(anchor_list, rpn_head.anchor_strides,featmap_sizes)
CascadeRPNHead
继承了CascadeAnchorHead作为模板
AdaptiveConv
adapt=True
则为 DeformConv,否则为普通的dilation
forward_single
forward 继承父类 forward
def forward_single(self, x, offset):
bridged_x = x
x = self.relu(self.rpn_conv(x, offset))#AdaptiveConv
#第一次传出过AdaptiveConv的feature 第二次传出不过的
if self.bridged_feature:
bridged_x = x # update feature
cls_score = self.rpn_cls(x) if self.with_cls else None
bbox_pred = self.rpn_reg(x)
return bridged_x, cls_score, bbox_pred
loss1
loss 继承父类 loss
加了一个是否分类的判断,第一层只回归,第二次分类+回归
if self.with_cls:
return dict(
loss_rpn_cls=losses['loss_cls'],
loss_rpn_reg=losses['loss_reg'])
return dict(loss_rpn_reg=losses['loss_reg'])
get_bboxes_single
暂时涉及不到,这个是生成后续proposal的,和rpn无关
CascadeAnchorHead
forward
def forward(self, feats, offset_list=None):
if offset_list is None:
offset_list = [None for _ in range(len(feats))]
return multi_apply(self.forward_single, feats, offset_list)
init_anchors
valid_flag_list list
:判断anhcor在合法范围内
生成的 anchor_list
, valid_flag_list
大小为:img*FPN*anchors
#anchor_list
#torch.Size([60800, 4])
#torch.Size([15200, 4])
#torch.Size([3800, 4])
#torch.Size([950, 4])
#torch.Size([247, 4])
#valid_flag_list
#torch.Size([60800])
#torch.Size([15200])
#torch.Size([3800])
#torch.Size([950])
#torch.Size([247])
测试了一下,估计是config文件里的pad设置,所有的valid_flag_list
都是1
refine_bboxes
只在第一层起作用,利用第一层的regression微调anchor
delta2bbox计算就是正常的计算方式
\delta_x = (t_x-a_x)/a_w
\delta_y = (t_y-a_y)/a_h
\delta_w = log(t_w/a_w)
\delta_h = log(t_h/a_h)
so δ去预测变化得到a' 尽可能地向t靠近
a^{'}_x = \delta_x*a_w+a_x
a^{'}_y = \delta_y*a_h+a_y
a^{'}_w = exp(\delta_w)*a_w
a^{'}_h = exp(\delta_h)*a_h
loss
assigner 第一阶段是RegionAssigner
,第二阶段是MaxIoUAssigner
RegionAssigner
这阶段不用找到负样本 只需要正样本来返回回归就可以了
def region_anchor_target(anchor_list,
valid_flag_list,
gt_bboxes_list,
img_metas,
featmap_sizes,
anchor_scale,
anchor_strides,
target_means,
target_stds,
cfg,
gt_bboxes_ignore_list=None,
gt_labels_list=None,
label_channels=1,
sampling=True,
unmap_outputs=True):
# TODO add docs
"""Compute regression and classification targets for anchors.
Args:
anchor_list (list[list]): Multi level anchors of each image.
valid_flag_list (list[list]): Multi level valid flags of each image.
gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image.
img_metas (list[dict]): Meta info of each image.
target_means (Iterable): Mean value of regression targets.
target_stds (Iterable): Std value of regression targets.
cfg (dict): RPN train configs.
Returns:
tuple
"""
region_anchor_target_single
生成all_labels, all_label_weights, all_bbox_targets, all_bbox_weights,pos_inds_list, neg_inds_list
,变换每张图的各个变量到每个level上
region_anchor_target_single
调用RegionAssigner
at/DATA7_DB7/data/tfhou/rpn/Cascade-RPN/mmdet/core/bbox/assigners/region_assigner.py
代码里有文档,这里仅列出摘要
each bbox will be assigned with -1, 0, or a positive number. -1 means don't care, 0 means negative sample
- Assign every anchor to 0 (negative)
For each gt_bboxes:
- Compute ignore flags based on ignore_region then
assign -1 to anchors w.r.t. ignore flags- Compute pos flags based on center_region then
assign gt_bboxes to anchors w.r.t. pos flags- Compute ignore flags based on adjacent anchor lvl then
assign -1 to anchors w.r.t. ignore flags- Assign anchor outside of image to -1
返回的result为num_gts
gt数量, gt_inds
0,-1,1标注
第一阶段sampling为False
第一阶段就是回归loss,并且对所有的anchor进行了微调,并且对特征进行adptive的卷积进入下一层
MaxIoUAssigner
第二阶段首先计算anchor_offset,根据现在的anchor位置和预定的位置进行计算
正常的iou寻找正负样本
def anchor_target(anchor_list,
valid_flag_list,
gt_bboxes_list,
img_metas,
target_means,
target_stds,
cfg,
gt_bboxes_ignore_list=None,
gt_labels_list=None,
label_channels=1,
sampling=True,
unmap_outputs=True):
"""Compute regression and classification targets for anchors.
Args:
anchor_list (list[list]): Multi level anchors of each image.
valid_flag_list (list[list]): Multi level valid flags of each image.
gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image.
img_metas (list[dict]): Meta info of each image.
target_means (Iterable): Mean value of regression targets.
target_stds (Iterable): Std value of regression targets.
cfg (dict): RPN train configs.
Returns:
tuple
"""
由于有了采样,进入assign_and_sample
函数
bbox_assigner = build_assigner(cfg.assigner)
bbox_sampler = build_sampler(cfg.sampler)
assign_result = bbox_assigner.assign(bboxes, gt_bboxes,
gt_bboxes_ignore,gt_labels)
sampling_result = bbox_sampler.sample(assign_result, bboxes,
gt_bboxes,gt_labels)
assigner为MaxIoUAssigner
sampling为RandomSampler
BaseDetector
forward(self, img, img_meta, return_loss=True, **kwargs):
forward_train没写 留给儿砸
def forward(self, img, img_meta, return_loss=True, **kwargs):
if return_loss:
return self.forward_train(img, img_meta, **kwargs)
else:
return self.forward_test(img, img_meta, **kwargs)
RPNTestMixin
简单跑个rpn前向吧
simple_test_rpn
aug_test_rpn
附录代码
RegionAssigner代码
import torch
from .assign_result import AssignResult
from .base_assigner import BaseAssigner
def calc_region(bbox, ratio, stride, featmap_size=None):
# Base anchor locates in (stride - 1) * 0.5
f_bbox = (bbox - (stride - 1) * 0.5) / stride
x1 = torch.round((1 - ratio) * f_bbox[0] + ratio * f_bbox[2])
y1 = torch.round((1 - ratio) * f_bbox[1] + ratio * f_bbox[3])
x2 = torch.round(ratio * f_bbox[0] + (1 - ratio) * f_bbox[2])
y2 = torch.round(ratio * f_bbox[1] + (1 - ratio) * f_bbox[3])
if featmap_size is not None:
x1 = x1.clamp(min=0, max=featmap_size[1] - 1)
y1 = y1.clamp(min=0, max=featmap_size[0] - 1)
x2 = x2.clamp(min=0, max=featmap_size[1] - 1)
y2 = y2.clamp(min=0, max=featmap_size[0] - 1)
return (x1, y1, x2, y2)
def anchor_ctr_inside_region_flags(anchors, stride, region):
x1, y1, x2, y2 = region
f_anchors = (anchors - (stride - 1) * 0.5) / stride
x = (f_anchors[:, 0] + f_anchors[:, 2]) * 0.5
y = (f_anchors[:, 1] + f_anchors[:, 3]) * 0.5
flags = (x >= x1) & (x <= x2) & (y >= y1) & (y <= y2)
return flags
def anchor_outside_flags(flat_anchors,
valid_flags,
img_shape,
allowed_border=0):
img_h, img_w = img_shape[:2]
if allowed_border >= 0:
inside_flags = valid_flags & \
(flat_anchors[:, 0] >= -allowed_border) & \
(flat_anchors[:, 1] >= -allowed_border) & \
(flat_anchors[:, 2] < img_w + allowed_border) & \
(flat_anchors[:, 3] < img_h + allowed_border)
else:
inside_flags = valid_flags
outside_flags = ~inside_flags
return outside_flags
class RegionAssigner(BaseAssigner):
"""Assign a corresponding gt bbox or background to each bbox.
Each proposals will be assigned with `-1`, `0`, or a positive integer
indicating the ground truth index.
- -1: don't care
- 0: negative sample, no assigned gt
- positive integer: positive sample, index (1-based) of assigned gt
Args:
pos_iou_thr (float): IoU threshold for positive bboxes.
neg_iou_thr (float or tuple): IoU threshold for negative bboxes.
min_pos_iou (float): Minimum iou for a bbox to be considered as a
positive bbox. Positive samples can have smaller IoU than
pos_iou_thr due to the 4th step (assign max IoU sample to each gt).
"""
# TODO update docs
def __init__(self, center_ratio=0.2, ignore_ratio=0.5):
self.center_ratio = center_ratio
self.ignore_ratio = ignore_ratio
def assign(self,
mlvl_anchors,
mlvl_valid_flags,
gt_bboxes,
img_meta,
featmap_sizes,
anchor_scale,
anchor_strides,
gt_bboxes_ignore=None,
gt_labels=None,
allowed_border=0):
"""Assign gt to anchors.
This method assign a gt bbox to every bbox (proposal/anchor), each bbox
will be assigned with -1, 0, or a positive number. -1 means don't care,
0 means negative sample, positive number is the index (1-based) of
assigned gt.
The assignment is done in following steps, the order matters.
1. Assign every anchor to 0 (negative)
For each gt_bboxes:
2. Compute ignore flags based on ignore_region then
assign -1 to anchors w.r.t. ignore flags
3. Compute pos flags based on center_region then
assign gt_bboxes to anchors w.r.t. pos flags
4. Compute ignore flags based on adjacent anchor lvl then
assign -1 to anchors w.r.t. ignore flags
5. Assign anchor outside of image to -1
Args:
bboxes (Tensor): Bounding boxes to be assigned, shape(n, 4).
gt_bboxes (Tensor): Groundtruth boxes, shape (k, 4).
gt_bboxes_ignore (Tensor, optional): Ground truth bboxes that are
labelled as `ignored`, e.g., crowd boxes in COCO.
gt_labels (Tensor, optional): Label of gt_bboxes, shape (k, ).
Returns:
:obj:`AssignResult`: The assign result.
"""
# TODO support gt_bboxes_ignore
if gt_bboxes_ignore is not None:
raise NotImplementedError
if gt_bboxes.shape[0] == 0:
raise ValueError('No gt bboxes')
num_gts = gt_bboxes.shape[0]
num_lvls = len(mlvl_anchors)
r1 = (1 - self.center_ratio) / 2 #0.4
r2 = (1 - self.ignore_ratio) / 2 #0.25
scale = torch.sqrt((gt_bboxes[:, 2] - gt_bboxes[:, 0] + 1) *
(gt_bboxes[:, 3] - gt_bboxes[:, 1] + 1))
min_anchor_size = scale.new_full(
(1, ), float(anchor_scale * anchor_strides[0]))
#find the gt in which level
target_lvls = torch.floor(
torch.log2(scale) - torch.log2(min_anchor_size) + 0.5)
target_lvls = target_lvls.clamp(min=0, max=num_lvls - 1).long()
# 1. assign 0 (negative) by default
mlvl_assigned_gt_inds = []
mlvl_ignore_flags = []
for lvl in range(num_lvls):
h, w = featmap_sizes[lvl]
assert h * w == mlvl_anchors[lvl].shape[0]
assigned_gt_inds = gt_bboxes.new_full((h * w, ),
0,
dtype=torch.long)
ignore_flags = torch.zeros_like(assigned_gt_inds)
mlvl_assigned_gt_inds.append(assigned_gt_inds)
mlvl_ignore_flags.append(ignore_flags)
for gt_id in range(num_gts):
lvl = target_lvls[gt_id].item()
featmap_size = featmap_sizes[lvl]
stride = anchor_strides[lvl]
anchors = mlvl_anchors[lvl]
gt_bbox = gt_bboxes[gt_id, :4]
# Compute regions
# the different regions just depends on gt
# func: calc_region
# w & h become 0.5 for ignore / 0.2 for ctr
ignore_region = calc_region(gt_bbox, r2, stride, featmap_size)
#print('ig',gt_bbox,ignore_region)
ctr_region = calc_region(gt_bbox, r1, stride, featmap_size)
#print('cr',gt_bbox,ctr_region)
# 2. Assign -1 to ignore flags
ignore_flags = anchor_ctr_inside_region_flags(
anchors, stride, ignore_region)
mlvl_assigned_gt_inds[lvl][ignore_flags > 0] = -1
# 3. Assign gt_bboxes to pos flags
pos_flags = anchor_ctr_inside_region_flags(anchors, stride,
ctr_region)
mlvl_assigned_gt_inds[lvl][pos_flags > 0] = gt_id + 1
# 4. Assign -1 to ignore adjacent lvl
if lvl > 0:
d_lvl = lvl - 1
d_anchors = mlvl_anchors[d_lvl]
d_featmap_size = featmap_sizes[d_lvl]
d_stride = anchor_strides[d_lvl]
#print('!',d_stride,r2)
# r2 & d_stride?
d_ignore_region = calc_region(gt_bbox, d_stride, r2,
d_featmap_size)
#print('!',gt_bbox,d_ignore_region)
# x1 > x2 ?
# tensor([473.8398, 384.1748, 804.3822, 467.8830], device='cuda:0')
# (tensor(167., dvice='cuda:0'),
# tensor(99., device='cuda:0'),
# tensor(0., device='cuda:0'),
# tensor(0., device='cuda:0'))
ignore_flags = anchor_ctr_inside_region_flags(
d_anchors, d_stride, d_ignore_region)
mlvl_ignore_flags[d_lvl][ignore_flags > 0] = 1
if lvl < num_lvls - 1:
u_lvl = lvl + 1
u_anchors = mlvl_anchors[u_lvl]
u_featmap_size = featmap_sizes[u_lvl]
u_stride = anchor_strides[u_lvl]
#print('?',u_stride,r2)
# r2 & u_stride?
u_ignore_region = calc_region(gt_bbox, u_stride,r2,
u_featmap_size)
#print('?',gt_bbox,u_ignore_region)
ignore_flags = anchor_ctr_inside_region_flags(
u_anchors, u_stride, u_ignore_region)
mlvl_ignore_flags[u_lvl][ignore_flags > 0] = 1
# 4. (cont.) Assign -1 to ignore adjacent lvl
for lvl in range(num_lvls):
ignore_flags = mlvl_ignore_flags[lvl]
mlvl_assigned_gt_inds[lvl][ignore_flags > 0] = -1
# 5. Assign -1 to anchor outside of image
flat_assigned_gt_inds = torch.cat(mlvl_assigned_gt_inds)
flat_anchors = torch.cat(mlvl_anchors)
flat_valid_flags = torch.cat(mlvl_valid_flags)
assert (flat_assigned_gt_inds.shape[0] == flat_anchors.shape[0] ==
flat_valid_flags.shape[0])
outside_flags = anchor_outside_flags(flat_anchors, flat_valid_flags,
img_meta['img_shape'],
allowed_border)
flat_assigned_gt_inds[outside_flags] = -1
if gt_labels is not None:
assigned_labels = torch.zeros_like(flat_assigned_gt_inds)
pos_flags = assigned_gt_inds > 0
assigned_labels[pos_flags] = gt_labels[
flat_assigned_gt_inds[pos_flags] - 1]
else:
assigned_labels = None
return AssignResult(
num_gts, flat_assigned_gt_inds, None, labels=assigned_labels)
RandomSampler代码
import numpy as np
import torch
from .base_sampler import BaseSampler
class RandomSampler(BaseSampler):
#BaseSampler has function sampl;e and it use pos_sample and neg_sample
# sample num is 256*0.5==128 but pos is always less than 128 neg is about 50000+
def __init__(self,
num,
pos_fraction,
neg_pos_ub=-1,
add_gt_as_proposals=True,
**kwargs):
super(RandomSampler, self).__init__(num, pos_fraction, neg_pos_ub,
add_gt_as_proposals)
@staticmethod
def random_choice(gallery, num):
"""Random select some elements from the gallery.
It seems that Pytorch's implementation is slower than numpy so we use
numpy to randperm the indices.
"""
assert len(gallery) >= num
if isinstance(gallery, list):
gallery = np.array(gallery)
cands = np.arange(len(gallery))
np.random.shuffle(cands)
rand_inds = cands[:num]
if not isinstance(gallery, np.ndarray):
rand_inds = torch.from_numpy(rand_inds).long().to(gallery.device)
return gallery[rand_inds]
def _sample_pos(self, assign_result, num_expected, **kwargs):
"""Randomly sample some positive samples."""
pos_inds = torch.nonzero(assign_result.gt_inds > 0)
if pos_inds.numel() != 0:
pos_inds = pos_inds.squeeze(1)
if pos_inds.numel() <= num_expected:
return pos_inds
else:
return self.random_choice(pos_inds, num_expected)
def _sample_neg(self, assign_result, num_expected, **kwargs):
"""Randomly sample some negative samples."""
neg_inds = torch.nonzero(assign_result.gt_inds == 0)
if neg_inds.numel() != 0:
neg_inds = neg_inds.squeeze(1)
if len(neg_inds) <= num_expected:
return neg_inds
else:
return self.random_choice(neg_inds, num_expected)
MaxIoUAssigner代码
import torch
from ..geometry import bbox_overlaps
from .assign_result import AssignResult
from .base_assigner import BaseAssigner
class MaxIoUAssigner(BaseAssigner):
"""Assign a corresponding gt bbox or background to each bbox.
Each proposals will be assigned with `-1`, `0`, or a positive integer
indicating the ground truth index.
- -1: don't care
- 0: negative sample, no assigned gt
- positive integer: positive sample, index (1-based) of assigned gt
Args:
pos_iou_thr (float): IoU threshold for positive bboxes.
neg_iou_thr (float or tuple): IoU threshold for negative bboxes.
min_pos_iou (float): Minimum iou for a bbox to be considered as a
positive bbox. Positive samples can have smaller IoU than
pos_iou_thr due to the 4th step (assign max IoU sample to each gt).
gt_max_assign_all (bool): Whether to assign all bboxes with the same
highest overlap with some gt to that gt.
ignore_iof_thr (float): IoF threshold for ignoring bboxes (if
`gt_bboxes_ignore` is specified). Negative values mean not
ignoring any bboxes.
ignore_wrt_candidates (bool): Whether to compute the iof between
`bboxes` and `gt_bboxes_ignore`, or the contrary.
"""
def __init__(self,
pos_iou_thr,
neg_iou_thr,
min_pos_iou=.0,
gt_max_assign_all=True,
ignore_iof_thr=-1,
ignore_wrt_candidates=True):
self.pos_iou_thr = pos_iou_thr
self.neg_iou_thr = neg_iou_thr
self.min_pos_iou = min_pos_iou
self.gt_max_assign_all = gt_max_assign_all
self.ignore_iof_thr = ignore_iof_thr
self.ignore_wrt_candidates = ignore_wrt_candidates
def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None):
"""Assign gt to bboxes.
This method assign a gt bbox to every bbox (proposal/anchor), each bbox
will be assigned with -1, 0, or a positive number. -1 means don't care,
0 means negative sample, positive number is the index (1-based) of
assigned gt.
The assignment is done in following steps, the order matters.
1. assign every bbox to -1
2. assign proposals whose iou with all gts < neg_iou_thr to 0
3. for each bbox, if the iou with its nearest gt >= pos_iou_thr,
assign it to that bbox
4. for each gt bbox, assign its nearest proposals (may be more than
one) to itself
Args:
bboxes (Tensor): Bounding boxes to be assigned, shape(n, 4).
gt_bboxes (Tensor): Groundtruth boxes, shape (k, 4).
gt_bboxes_ignore (Tensor, optional): Ground truth bboxes that are
labelled as `ignored`, e.g., crowd boxes in COCO.
gt_labels (Tensor, optional): Label of gt_bboxes, shape (k, ).
Returns:
:obj:`AssignResult`: The assign result.
"""
if bboxes.shape[0] == 0 or gt_bboxes.shape[0] == 0:
raise ValueError('No gt or bboxes')
bboxes = bboxes[:, :4]
# def bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False)
#calculate the iou
overlaps = bbox_overlaps(gt_bboxes, bboxes)
if (self.ignore_iof_thr > 0) and (gt_bboxes_ignore is not None) and (
gt_bboxes_ignore.numel() > 0):
if self.ignore_wrt_candidates:
ignore_overlaps = bbox_overlaps(
bboxes, gt_bboxes_ignore, mode='iof')
ignore_max_overlaps, _ = ignore_overlaps.max(dim=1)
else:
ignore_overlaps = bbox_overlaps(
gt_bboxes_ignore, bboxes, mode='iof')
ignore_max_overlaps, _ = ignore_overlaps.max(dim=0)
overlaps[:, ignore_max_overlaps > self.ignore_iof_thr] = -1
assign_result = self.assign_wrt_overlaps(overlaps, gt_labels)
return assign_result
def assign_wrt_overlaps(self, overlaps, gt_labels=None):
"""Assign w.r.t. the overlaps of bboxes with gts.
Args:
overlaps (Tensor): Overlaps between k gt_bboxes and n bboxes,
shape(k, n).
gt_labels (Tensor, optional): Labels of k gt_bboxes, shape (k, ).
Returns:
:obj:`AssignResult`: The assign result.
"""
if overlaps.numel() == 0:
raise ValueError('No gt or proposals')
num_gts, num_bboxes = overlaps.size(0), overlaps.size(1)
# 1. assign -1 by default
assigned_gt_inds = overlaps.new_full((num_bboxes, ),
-1,
dtype=torch.long)
# for each anchor, which gt best overlaps with it
# for each anchor, the max iou of all gts
max_overlaps, argmax_overlaps = overlaps.max(dim=0)
# for each gt, which anchor best overlaps with it
# for each gt, the max iou of all proposals
gt_max_overlaps, gt_argmax_overlaps = overlaps.max(dim=1)
# 2. assign negative: below
if isinstance(self.neg_iou_thr, float):
assigned_gt_inds[(max_overlaps >= 0)
& (max_overlaps < self.neg_iou_thr)] = 0
elif isinstance(self.neg_iou_thr, tuple):
assert len(self.neg_iou_thr) == 2
assigned_gt_inds[(max_overlaps >= self.neg_iou_thr[0])
& (max_overlaps < self.neg_iou_thr[1])] = 0
# 3. assign positive: above positive IoU threshold
pos_inds = max_overlaps >= self.pos_iou_thr
assigned_gt_inds[pos_inds] = argmax_overlaps[pos_inds] + 1
# 4. assign fg: for each gt, proposals with highest IoU
for i in range(num_gts):
if gt_max_overlaps[i] >= self.min_pos_iou:
if self.gt_max_assign_all:
max_iou_inds = overlaps[i, :] == gt_max_overlaps[i]
assigned_gt_inds[max_iou_inds] = i + 1
else:
assigned_gt_inds[gt_argmax_overlaps[i]] = i + 1
if gt_labels is not None:
assigned_labels = assigned_gt_inds.new_zeros((num_bboxes, ))
pos_inds = torch.nonzero(assigned_gt_inds > 0).squeeze()
if pos_inds.numel() > 0:
assigned_labels[pos_inds] = gt_labels[
assigned_gt_inds[pos_inds] - 1]
else:
assigned_labels = None
return AssignResult(
num_gts, assigned_gt_inds, max_overlaps, labels=assigned_labels)