Detectron2 Faster R-CNN 代码流程
- Trainer.train()
- engine/train_loop.py
- modeling/meta_arch/rcnn.py
- modeling/proposal_generator/rpn.py
- modeling/proposal_generator/proposal_utils.py
- modeling/proposal_generator/rpn.py
- modeling/meta_arch/rcnn.py
- modeling/roi_heads/roi_heads.py
- modeling/roi_heads/fast_rcnn.py
- modeling/roi_heads/roi_heads.py
- modeling/roi_heads/fast_rcnn.py
- modeling/meta_arch/rcnn.py
- engine/train_loop.py
self.build_model(cfg)
modeling/meta_arch/build.py
meta_arch = cfg.MODEL.META_ARCHITECTURE #注册机制建立网络
META_ARCH_REGISTRY.get(meta_arch)(cfg)
modeling/meta_arch/rcnn.py
self.backbone = build_backbone(cfg) #建立backbone
self._SHAPE_ = self.backbone.output_shape() #获取backbone最后输出通道数
self.proposal_generator = build_proposal_generator(cfg, self._SHAPE_) #建立RPN
self.roi_heads = build_roi_heads(cfg, self._SHAPE_) #建立ROIHead
build_backbone(cfg)
建立res50网络结构,最后输出层为res4 block,输出通道数为1024
build_proposal_generator(cfg, self.SHAPE)
包含RPNHead和AnchorGenerator:
RPN(
(rpn_head): StandardRPNHead(
(conv): Conv2d(
1024, 1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)
(activation): ReLU()
)
(objectness_logits): Conv2d(1024, 15, kernel_size=(1, 1), stride=(1, 1))
(anchor_deltas): Conv2d(1024, 60, kernel_size=(1, 1), stride=(1, 1))
)
(anchor_generator): DefaultAnchorGenerator(
(cell_anchors): BufferList()
)
)
build_roi_heads(cfg, self.SHAPE)
Res5ROIHeads(
(pooler): ROIPooler(
(level_poolers): ModuleList(
(0): ROIAlign(output_size=(7, 7), spatial_scale=0.0625, sampling_ratio=0, aligned=True)
)
)
(res5): Sequential(
(0): BottleneckBlock(
(shortcut): Conv2d(
1024, 2048, kernel_size=(1, 1), stride=(2, 2), bias=False
(norm): FrozenBatchNorm2d(num_features=2048, eps=1e-05)
)
(conv1): Conv2d(
1024, 512, kernel_size=(1, 1), stride=(2, 2), bias=False
(norm): FrozenBatchNorm2d(num_features=512, eps=1e-05)
)
(conv2): Conv2d(
512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
(norm): FrozenBatchNorm2d(num_features=512, eps=1e-05)
)
(conv3): Conv2d(
512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False
(norm): FrozenBatchNorm2d(num_features=2048, eps=1e-05)
)
)
(1): BottleneckBlock(
(conv1): Conv2d(
2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False
(norm): FrozenBatchNorm2d(num_features=512, eps=1e-05)
)
(conv2): Conv2d(
512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
(norm): FrozenBatchNorm2d(num_features=512, eps=1e-05)
)
(conv3): Conv2d(
512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False
(norm): FrozenBatchNorm2d(num_features=2048, eps=1e-05)
)
)
(2): BottleneckBlock(
(conv1): Conv2d(
2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False
(norm): FrozenBatchNorm2d(num_features=512, eps=1e-05)
)
(conv2): Conv2d(
512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
(norm): FrozenBatchNorm2d(num_features=512, eps=1e-05)
)
(conv3): Conv2d(
512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False
(norm): FrozenBatchNorm2d(num_features=2048, eps=1e-05)
)
)
)
(box_predictor): FOODFastRCNNOutputLayers(
(cls_score): Linear(in_features=2048, out_features=22, bias=False)
(bbox_pred): Linear(in_features=2048, out_features=4, bias=True)
(up_loss): UPLoss()
(iou_loss): IOULoss()
(e_loss): ELoss()
(encoder): MLP(
(head): Sequential(
(0): Linear(in_features=2048, out_features=2048, bias=True)
(1): ReLU(inplace=True)
(2): Linear(in_features=2048, out_features=2048, bias=True)
)
)
(ic_loss_loss): ICLoss()
)
)
self.build_optimizer(cfg, model)
solver/build.py
self.build_train_loader(cfg)
dataloader/build.py
#获取datasets
datasets = get_detection_dataset_dicts(dataset_names, filter_empty=True, min_keypoints=0, proposal_files=None)
dataset_dicts = [DatasetCatalog.get(dataset_name) for dataset_name in dataset_names]
#list:每一个list里:
#{'file_name': 'datasets\\VOC2007\\JPEGImages\\000007.jpg', 'image_id': '000007', 'height': 333, 'width': 500, 'annotations': [{'category_id': 6, 'bbox': [140.0, 49.0, 500.0, 330.0], 'bbox_mode': <BoxMode.XYXY_ABS: 0>}]}
dataset_dicts = list(itertools.chain.from_iterable(dataset_dicts))
return dataset_dicts
#获取mapper sampler
mapper = DatasetMapper(cfg, True)
sampler = TrainingSampler(len(dataset))
dataset = DatasetFromList(dataset, copy=False)
dataset = MapDataset(dataset, mapper)
#获取dataloader
build_batch_data_loader(dataset, sampler, total_batch_size, *, aspect_ratio_grouping=False,
num_workers=0)
data_loader = torch.utils.data.DataLoader(
dataset,
sampler=sampler,
num_workers=num_workers,
batch_sampler=None,
collate_fn=operator.itemgetter(0), # don't batch, but yield individual elements
worker_init_fn=worker_init_reset_seed,
)
return AspectRatioGroupedDataset(data_loader, batch_size)
self.build_lr_scheduler(cfg, optimizer)
self.checkpointer = DetectionCheckpointer()
self.register_hooks(self.build_hooks())
Trainer.train()
engine/train_loop.py
def run_step(self):
#取出一个batch的data数据
data = next(self._data_loader_iter)
#输入网络计算损失
loss_dict = self.model(data)
modeling/meta_arch/rcnn.py
def forward(self, batched_inputs):
gt_instances = [x["instances"].to(self.device) for x in batched_inputs]
#batched_inputs:
#{'file_name': 'datasets\\VOC2012\\JPEGImages\\2010_002370.jpg', 'image_id': '2010_002370','height': 375, 'width': 500, 'image': tensor([xxx], dtype=torch.uint8), 'instances':Instances(num_instances=5, image_height=512, image_width=683, fields=[gt_boxes:Boxes(tensor([[xx],[xx],[xx],[xx],[xx]])), gt_classes: tensor([8, 8, 8, 8, 8])])}
proposal_losses, detector_losses, _, _ = self._forward_once_(batched_inputs, gt_instances)
def _forward_once_(self, batched_inputs, gt_instances=None):
#预处理输入图像(x-mean)/std,(N,3,H,W)
images = self.preprocess_image(batched_inputs)
#输入到backbone中提取特征 'res4':(N,1024,H/16,W/16)
features = self.backbone(images.tensor)
#计算生成的proposals(2000个)和RPN损失
proposals, proposal_losses = self.proposal_generator(images, features, gt_instances)
modeling/proposal_generator/rpn.py
def forward(self, images: ImageList, features: Dict[str, torch.Tensor], gt_instances: Optional[List[Instances]] = None,):
#产生(H*W*尺度*大小)个Anchor,H,W为特征图尺寸
anchors = self.anchor_generator(features)
#特征图经过3×3卷积,两个1×1卷积得到objectness分数(N,尺度*大小,H,W)和回归分数(N,4*尺度*大小,H,W)
#转换为(N,尺度*大小*H*W)和(N,尺度*大小*H*W,4)
pred_objectness_logits, pred_anchor_deltas = self.rpn_head(features)
#给每个Anchor打标签
gt_labels, gt_boxes = self.label_and_sample_anchors(anchors, gt_instances)
def label_and_sample_anchors(anchors, gt_instances):
#取出每张图片的gtbox
gt_boxes = [x.gt_boxes for x in gt_instances]
#计算Anchor和gtbox的两两iou值,(Anchor数,gtbox数)
match_quality_matrix = retry_if_cuda_oom(pairwise_iou)(gt_boxes, anchors)
#根据iou矩阵计算每个Anchor匹配的对应gtboxID以及gt标签(0背景,1前景,-1忽略)[0.3,0.7]
matched_idx, gt_labels = retry_if_cuda_oom(self.anchor_matcher)(match_quality_matrix)
#在self.positive_fraction=0.5的正负样本比例下,从所有anchor中选择256个计算损失,其余gt标签设置为-1
gt_labels = self._subsample_labels(gt_labels)
#(Anchor数,4)每个Anchor匹配对应的gtbox
matched_gt_boxes = gt_boxes[matched_idxs].tensor
return gt_labels, matched_gt_boxes
def forward(self, images: ImageList, features: Dict[str, torch.Tensor], gt_instances: Optional[List[Instances]] = None,):
#计算RPN损失
losses = self.losses(anchors, pred_objectness_logits, gt_labels, pred_anchor_deltas, gt_boxes)
def losses(self,anchors,pred_objectness_logits, gt_labels, pred_anchor_deltas, gt_boxes,):
pos_mask = gt_labels == 1 #生成正Anchor样本掩码
num_pos_anchors = pos_mask.sum().item() #正Anchor样本数
num_neg_anchors = (gt_labels == 0).sum().item() #负Anchor样本数
#用所有正样本计算回归框损失
localization_loss = _dense_box_regression_loss(
anchors,
self.box2box_transform,
pred_anchor_deltas,
gt_boxes,
pos_mask,
box_reg_loss_type=self.box_reg_loss_type,
smooth_l1_beta=self.smooth_l1_beta,
)
#所有正负Anchor样本掩码
valid_mask = gt_labels >= 0
#计算交叉熵损失
objectness_loss = F.binary_cross_entropy_with_logits(
cat(pred_objectness_logits, dim=1)[valid_mask],
gt_labels[valid_mask].to(torch.float32),
reduction="sum",
)
normalizer = self.batch_size_per_image * num_images #一个batch中所有参与计算损失的Anchor数
losses = {
"loss_rpn_cls": objectness_loss / normalizer,
"loss_rpn_loc": localization_loss / normalizer,
}
return losses
def forward(self, images: ImageList, features: Dict[str, torch.Tensor], gt_instances: Optional[List[Instances]] = None,):
#生成proposals
#[Instances(num_instances=2000, image_height=544, image_width=725, fields=[proposal_boxes: Boxes(tensor([[],...,[]], device='cuda:0')), objectness_logits: tensor([x,...,x],device='cuda:0')])]
proposals = self.predict_proposals(
anchors, pred_objectness_logits, pred_anchor_deltas, images.image_sizes
)
def predict_proposals(
self,
anchors: List[Boxes],
pred_objectness_logits: List[torch.Tensor],
pred_anchor_deltas: List[torch.Tensor],
image_sizes: List[Tuple[int, int]],
):
#每个anchor应用计算出的回归分数生成所有proposals(N,Anchor数,4)
pred_proposals = self._decode_proposals(anchors, pred_anchor_deltas)
#后处理得到proposals
return find_top_rpn_proposals(
pred_proposals,
pred_objectness_logits,
image_sizes,
self.nms_thresh,#0.7
self.pre_nms_topk[self.training],#12000
self.post_nms_topk[self.training],#2000
self.min_box_size,#0
self.training,#True
)
modeling/proposal_generator/proposal_utils.py
def find_top_rpn_proposals(
proposals: List[torch.Tensor],
pred_objectness_logits: List[torch.Tensor],
image_sizes: List[Tuple[int, int]],
nms_thresh: float,
pre_nms_topk: int,
post_nms_topk: int,
min_box_size: float,
training: bool,
):
#对logits做降序排序
pred_objectness_logits, idx = pred_objectness_logits.sort(descending=True, dim=1)
#取前pre_nms_topk个logits以及对应idx
topk_scores = pred_objectness_logits.narrow(1, 0, pre_nms_topk)
topk_idx = idx.narrow(1, 0, pre_nms_topk)
#得到对应的proposals
topk_proposals = proposals[batch_idx[:, None], topk_idx]
#进行NMS处理得到最终的proposals结果
keep = batched_nms(boxes.tensor, scores_per_img, lvl, nms_thresh)
keep = keep[:post_nms_topk]
res = Instances(image_size)
res.proposal_boxes = boxes[keep]
res.objectness_logits = scores_per_img[keep]
results.append(res)
return results
modeling/proposal_generator/rpn.py
def forward(self, images: ImageList, features: Dict[str, torch.Tensor], gt_instances: Optional[List[Instances]] = None,):
return proposals, losses
modeling/meta_arch/rcnn.py
def _forward_once_(self, batched_inputs, gt_instances=None):
#计算分类和回归损失,训练的情况下result返回为[]
results, detector_losses = self.roi_heads(images,
features,
proposals,
gt_instances,)
modeling/roi_heads/roi_heads.py
def forward(self, images, features, proposals, targets):
#从 2000+gtbox数 个proposals中 根据和gtbox的iou,选择512个正负proposals [0.5,0.5]正负proposal区分
#[Instances(num_instances=512, image_height=544, image_width=725, fields=[proposal_boxes: Boxes(tensor([512*[a,b,c,d]], device='cuda:0')), objectness_logits: tensor([x,,x],device='cuda:0'), gt_classes: tensor([ 8, 4, 4, 4, 8, 4, 8, 21, 21, 21, 21....], device='cuda:0'), iou: tensor([1.0000e+00, 1.0000e+00, 6.0265e-01, 5.3412e-01, 6.0212e-01, 1.0000e+00, 1.0000e+00,... ], device='cuda:0'), gt_boxes: Boxes(tensor([], device='cuda:0'))])]
proposals = self.label_and_sample_proposals(proposals, targets)
#取所有proposals
proposal_boxes = [x.proposal_boxes for x in proposals]
#所有proposals通过ROIAlign层,(512*N,1024,7,7)
x = self.pooler(features, proposal_boxes)
#经过res5提取最后的特征(512*N,2048,4,4),表示每个proposal的特征
box_features= self.res5(x)
#特征进行平均池化操作(512*N,2048)
feature_pooled = box_features.mean(dim=[2, 3])
#计算所有proposal的预测结果
predictions = self.box_predictor(feature_pooled)
modeling/roi_heads/fast_rcnn.py
def forward(self, feats):
#(512*N,2048)
reg_x = cls_x = feats
#计算X/|X|
x_norm = torch.norm(cls_x, p=2, dim=1).unsqueeze(1).expand_as(cls_x)
x_normalized = cls_x.div(x_norm + 1e-5)
#计算W/|W|
temp_norm = (
torch.norm(self.cls_score.weight.data, p=2, dim=1)
.unsqueeze(1)
.expand_as(self.cls_score.weight.data)
)
self.cls_score.weight.data = self.cls_score.weight.data.div(temp_norm + 1e-5)
#计算余弦相似度(512*N,22),作为类别分数
cos_dist = self.cls_score(x_normalized)
scores = self.scale * cos_dist
#计算回归框分数(512*N,4)
proposal_deltas = self.bbox_pred(reg_x)
return scores, proposal_deltas
modeling/roi_heads/roi_heads.py
def forward(self, images, features, proposals, targets):
#计算分类损失和回归损失
losses = self.box_predictor.losses(predictions, proposals)
return [], losses
modeling/roi_heads/fast_rcnn.py
def losses(self, predictions, proposals, input_features=None):
scores, proposal_deltas = predictions
#取出所有proposal的gtclass(512*N,)
gt_classes = cat([p.gt_classes for p in proposals], dim=0)
#取出所有proposal的boxes(512*N,4)
proposal_boxes = cat([p.proposal_boxes.tensor for p in proposals], dim=0)
#取出所有proposal匹配的gtboxes(512*N,4)
gt_boxes = cat([p.gt_boxes.tensor for p in proposals],dim=0)
#计算分类和回归损失
losses = {
"loss_cls_ce": F.cross_entropy(scores, gt_classes, reduction="mean"),
"loss_box_reg": self.box_reg_loss(
proposal_boxes, gt_boxes, proposal_deltas, gt_classes
),
}
return losses
modeling/meta_arch/rcnn.py
def _forward_once_(self, batched_inputs, gt_instances=None):
return proposal_losses, detector_losses, results, images.image_sizes
def forward(self, batched_inputs):
losses = {}
losses.update(detector_losses)
losses.update(proposal_losses)
return losses
engine/train_loop.py
def run_step(self):
losses = sum(loss_dict.values())
self.optimizer.zero_grad()
losses.backward()
self.optimizer.step()