记录了detectron2中faster R-CNN代码的训练流程(从建立模型到完成一次训练)

本文详细介绍了Detectron2中FasterR-CNN的代码流程,包括模型构建(backbone、proposalgenerator、ROIHeads)、优化器设置、数据加载、学习率调度器以及训练过程中的关键步骤。主要涉及了网络结构、损失函数和数据预处理等内容。
摘要由CSDN通过智能技术生成

self.build_model(cfg)

modeling/meta_arch/build.py

meta_arch = cfg.MODEL.META_ARCHITECTURE #注册机制建立网络
META_ARCH_REGISTRY.get(meta_arch)(cfg)

modeling/meta_arch/rcnn.py

self.backbone = build_backbone(cfg) #建立backbone
self._SHAPE_ = self.backbone.output_shape() #获取backbone最后输出通道数
self.proposal_generator = build_proposal_generator(cfg, self._SHAPE_) #建立RPN
self.roi_heads = build_roi_heads(cfg, self._SHAPE_) #建立ROIHead

build_backbone(cfg)

建立res50网络结构,最后输出层为res4 block,输出通道数为1024

build_proposal_generator(cfg, self.SHAPE)

包含RPNHead和AnchorGenerator:

RPN(
 (rpn_head): StandardRPNHead(
    (conv): Conv2d(
      1024, 1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)
      (activation): ReLU()
    )
    (objectness_logits): Conv2d(1024, 15, kernel_size=(1, 1), stride=(1, 1))
    (anchor_deltas): Conv2d(1024, 60, kernel_size=(1, 1), stride=(1, 1))
  )
 (anchor_generator): DefaultAnchorGenerator(
    (cell_anchors): BufferList()
 )
)

build_roi_heads(cfg, self.SHAPE)

Res5ROIHeads(
  (pooler): ROIPooler(
    (level_poolers): ModuleList(
      (0): ROIAlign(output_size=(7, 7), spatial_scale=0.0625, sampling_ratio=0, aligned=True)
    )
  )
  (res5): Sequential(
    (0): BottleneckBlock(
      (shortcut): Conv2d(
        1024, 2048, kernel_size=(1, 1), stride=(2, 2), bias=False
        (norm): FrozenBatchNorm2d(num_features=2048, eps=1e-05)
      )
      (conv1): Conv2d(
        1024, 512, kernel_size=(1, 1), stride=(2, 2), bias=False
        (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05)
      )
      (conv2): Conv2d(
        512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
        (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05)
      )
      (conv3): Conv2d(
        512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False
        (norm): FrozenBatchNorm2d(num_features=2048, eps=1e-05)
      )
    )
    (1): BottleneckBlock(
      (conv1): Conv2d(
        2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False
        (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05)
      )
      (conv2): Conv2d(
        512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
        (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05)
      )
      (conv3): Conv2d(
        512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False
        (norm): FrozenBatchNorm2d(num_features=2048, eps=1e-05)
      )
    )
    (2): BottleneckBlock(
      (conv1): Conv2d(
        2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False
        (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05)
      )
      (conv2): Conv2d(
        512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
        (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05)
      )
      (conv3): Conv2d(
        512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False
        (norm): FrozenBatchNorm2d(num_features=2048, eps=1e-05)
      )
    )
  )
  (box_predictor): FOODFastRCNNOutputLayers(
    (cls_score): Linear(in_features=2048, out_features=22, bias=False)
    (bbox_pred): Linear(in_features=2048, out_features=4, bias=True)
    (up_loss): UPLoss()
    (iou_loss): IOULoss()
    (e_loss): ELoss()
    (encoder): MLP(
      (head): Sequential(
        (0): Linear(in_features=2048, out_features=2048, bias=True)
        (1): ReLU(inplace=True)
        (2): Linear(in_features=2048, out_features=2048, bias=True)
      )
    )
    (ic_loss_loss): ICLoss()
  )
)

self.build_optimizer(cfg, model)

solver/build.py

self.build_train_loader(cfg)

dataloader/build.py

#获取datasets
datasets = get_detection_dataset_dicts(dataset_names, filter_empty=True, min_keypoints=0, 										proposal_files=None)
dataset_dicts = [DatasetCatalog.get(dataset_name) for dataset_name in dataset_names]
#list:每一个list里:
#{'file_name': 'datasets\\VOC2007\\JPEGImages\\000007.jpg', 'image_id': '000007', 'height': 333, 'width': 500, 'annotations': [{'category_id': 6, 'bbox': [140.0, 49.0, 500.0, 330.0], 'bbox_mode': <BoxMode.XYXY_ABS: 0>}]}
dataset_dicts = list(itertools.chain.from_iterable(dataset_dicts))
return dataset_dicts
#获取mapper sampler
mapper = DatasetMapper(cfg, True)
sampler = TrainingSampler(len(dataset))
dataset = DatasetFromList(dataset, copy=False)
dataset = MapDataset(dataset, mapper)
#获取dataloader
build_batch_data_loader(dataset, sampler, total_batch_size, *, aspect_ratio_grouping=False, 
                        num_workers=0)
data_loader = torch.utils.data.DataLoader(
            dataset,
            sampler=sampler,
            num_workers=num_workers,
            batch_sampler=None,
            collate_fn=operator.itemgetter(0),  # don't batch, but yield individual elements
            worker_init_fn=worker_init_reset_seed,
        )
return AspectRatioGroupedDataset(data_loader, batch_size)

self.build_lr_scheduler(cfg, optimizer)

self.checkpointer = DetectionCheckpointer()

self.register_hooks(self.build_hooks())

Trainer.train()

engine/train_loop.py

def run_step(self):
    #取出一个batch的data数据
    data = next(self._data_loader_iter)
    #输入网络计算损失
    loss_dict = self.model(data)

modeling/meta_arch/rcnn.py

def forward(self, batched_inputs):
    gt_instances = [x["instances"].to(self.device) for x in batched_inputs]
    #batched_inputs:
    #{'file_name': 'datasets\\VOC2012\\JPEGImages\\2010_002370.jpg', 'image_id': '2010_002370','height': 375, 'width': 500, 'image': tensor([xxx], dtype=torch.uint8), 'instances':Instances(num_instances=5, image_height=512, image_width=683, fields=[gt_boxes:Boxes(tensor([[xx],[xx],[xx],[xx],[xx]])), gt_classes: tensor([8, 8, 8, 8, 8])])}
    proposal_losses, detector_losses, _, _ = self._forward_once_(batched_inputs, gt_instances)
def _forward_once_(self, batched_inputs, gt_instances=None):
    #预处理输入图像(x-mean)/std,(N,3,H,W)
    images = self.preprocess_image(batched_inputs)
	#输入到backbone中提取特征 'res4':(N,1024,H/16,W/16)
    features = self.backbone(images.tensor)
    #计算生成的proposals(2000个)和RPN损失
    proposals, proposal_losses = self.proposal_generator(images, features, gt_instances)

modeling/proposal_generator/rpn.py

def forward(self, images: ImageList, features: Dict[str, torch.Tensor], gt_instances: Optional[List[Instances]] = None,):
    #产生(H*W*尺度*大小)个Anchor,H,W为特征图尺寸
    anchors = self.anchor_generator(features)
    #特征图经过3×3卷积,两个1×1卷积得到objectness分数(N,尺度*大小,H,W)和回归分数(N,4*尺度*大小,H,W)
    #转换为(N,尺度*大小*H*W)和(N,尺度*大小*H*W,4)
    pred_objectness_logits, pred_anchor_deltas = self.rpn_head(features)
    #给每个Anchor打标签
    gt_labels, gt_boxes = self.label_and_sample_anchors(anchors, gt_instances)
def label_and_sample_anchors(anchors, gt_instances):
    #取出每张图片的gtbox
    gt_boxes = [x.gt_boxes for x in gt_instances]
    #计算Anchor和gtbox的两两iou值,(Anchor数,gtbox数)
    match_quality_matrix = retry_if_cuda_oom(pairwise_iou)(gt_boxes, anchors)
    #根据iou矩阵计算每个Anchor匹配的对应gtboxID以及gt标签(0背景,1前景,-1忽略)[0.3,0.7]
    matched_idx, gt_labels = retry_if_cuda_oom(self.anchor_matcher)(match_quality_matrix)
    #在self.positive_fraction=0.5的正负样本比例下,从所有anchor中选择256个计算损失,其余gt标签设置为-1
    gt_labels = self._subsample_labels(gt_labels)
    #(Anchor数,4)每个Anchor匹配对应的gtbox
    matched_gt_boxes = gt_boxes[matched_idxs].tensor
    
    return gt_labels, matched_gt_boxes

def forward(self, images: ImageList, features: Dict[str, torch.Tensor], gt_instances: Optional[List[Instances]] = None,):
    #计算RPN损失
    losses = self.losses(anchors, pred_objectness_logits, gt_labels, pred_anchor_deltas, gt_boxes)
def losses(self,anchors,pred_objectness_logits, gt_labels, pred_anchor_deltas, gt_boxes,):
    pos_mask = gt_labels == 1 #生成正Anchor样本掩码
    num_pos_anchors = pos_mask.sum().item() #正Anchor样本数
    num_neg_anchors = (gt_labels == 0).sum().item() #负Anchor样本数
    #用所有正样本计算回归框损失
    localization_loss = _dense_box_regression_loss(
            anchors,
            self.box2box_transform,
            pred_anchor_deltas,
            gt_boxes,
            pos_mask,
            box_reg_loss_type=self.box_reg_loss_type,
            smooth_l1_beta=self.smooth_l1_beta,
        )
    #所有正负Anchor样本掩码
    valid_mask = gt_labels >= 0
    #计算交叉熵损失
    objectness_loss = F.binary_cross_entropy_with_logits(
            cat(pred_objectness_logits, dim=1)[valid_mask],
            gt_labels[valid_mask].to(torch.float32),
            reduction="sum",
        )
   	normalizer = self.batch_size_per_image * num_images #一个batch中所有参与计算损失的Anchor数
    losses = {
            "loss_rpn_cls": objectness_loss / normalizer,
            "loss_rpn_loc": localization_loss / normalizer,
        }
    return losses
    
def forward(self, images: ImageList, features: Dict[str, torch.Tensor], gt_instances: Optional[List[Instances]] = None,):
    #生成proposals
	#[Instances(num_instances=2000, image_height=544, image_width=725, fields=[proposal_boxes: Boxes(tensor([[],...,[]], device='cuda:0')), objectness_logits: tensor([x,...,x],device='cuda:0')])]
    proposals = self.predict_proposals(
            anchors, pred_objectness_logits, pred_anchor_deltas, images.image_sizes
        )
def predict_proposals(
    self,
    anchors: List[Boxes],
    pred_objectness_logits: List[torch.Tensor],
    pred_anchor_deltas: List[torch.Tensor],
    image_sizes: List[Tuple[int, int]],
):
    #每个anchor应用计算出的回归分数生成所有proposals(N,Anchor数,4)
    pred_proposals = self._decode_proposals(anchors, pred_anchor_deltas)
    #后处理得到proposals
    return find_top_rpn_proposals(
                pred_proposals,
                pred_objectness_logits,
                image_sizes,
                self.nms_thresh,#0.7
                self.pre_nms_topk[self.training],#12000
                self.post_nms_topk[self.training],#2000
                self.min_box_size,#0
                self.training,#True
            )

modeling/proposal_generator/proposal_utils.py

def find_top_rpn_proposals(
    proposals: List[torch.Tensor],
    pred_objectness_logits: List[torch.Tensor],
    image_sizes: List[Tuple[int, int]],
    nms_thresh: float,
    pre_nms_topk: int,
    post_nms_topk: int,
    min_box_size: float,
    training: bool,
):
    #对logits做降序排序
    pred_objectness_logits, idx = pred_objectness_logits.sort(descending=True, dim=1)
    #取前pre_nms_topk个logits以及对应idx
    topk_scores = pred_objectness_logits.narrow(1, 0, pre_nms_topk)
    topk_idx = idx.narrow(1, 0, pre_nms_topk)
    
    #得到对应的proposals
    topk_proposals = proposals[batch_idx[:, None], topk_idx]
    
    #进行NMS处理得到最终的proposals结果
    keep = batched_nms(boxes.tensor, scores_per_img, lvl, nms_thresh)
    keep = keep[:post_nms_topk]
    
    res = Instances(image_size)
    res.proposal_boxes = boxes[keep]
    res.objectness_logits = scores_per_img[keep]
    results.append(res)
    
    return results

modeling/proposal_generator/rpn.py

def forward(self, images: ImageList, features: Dict[str, torch.Tensor], gt_instances: Optional[List[Instances]] = None,):
    return proposals, losses

modeling/meta_arch/rcnn.py

def _forward_once_(self, batched_inputs, gt_instances=None):
    #计算分类和回归损失,训练的情况下result返回为[]
     results, detector_losses = self.roi_heads(images, 
                                               features, 
                                               proposals, 
                                               gt_instances,)

modeling/roi_heads/roi_heads.py

def forward(self, images, features, proposals, targets):
    #从 2000+gtbox数 个proposals中 根据和gtbox的iou,选择512个正负proposals [0.5,0.5]正负proposal区分
    #[Instances(num_instances=512, image_height=544, image_width=725, fields=[proposal_boxes: Boxes(tensor([512*[a,b,c,d]], device='cuda:0')), objectness_logits: tensor([x,,x],device='cuda:0'), gt_classes: tensor([ 8,  4,  4,  4,  8,  4,  8, 21, 21, 21, 21....], device='cuda:0'), iou: tensor([1.0000e+00, 1.0000e+00, 6.0265e-01, 5.3412e-01, 6.0212e-01, 1.0000e+00, 1.0000e+00,... ], device='cuda:0'), gt_boxes: Boxes(tensor([], device='cuda:0'))])]
    proposals = self.label_and_sample_proposals(proposals, targets)
    #取所有proposals
    proposal_boxes = [x.proposal_boxes for x in proposals]
    #所有proposals通过ROIAlign层,(512*N,1024,7,7)
    x = self.pooler(features, proposal_boxes)
    #经过res5提取最后的特征(512*N,2048,4,4),表示每个proposal的特征
    box_features= self.res5(x)
    #特征进行平均池化操作(512*N,2048)
    feature_pooled = box_features.mean(dim=[2, 3]) 
    
    #计算所有proposal的预测结果
    predictions = self.box_predictor(feature_pooled)
    

modeling/roi_heads/fast_rcnn.py

def forward(self, feats):
    #(512*N,2048)
    reg_x = cls_x = feats
    
    #计算X/|X|
    x_norm = torch.norm(cls_x, p=2, dim=1).unsqueeze(1).expand_as(cls_x)
    x_normalized = cls_x.div(x_norm + 1e-5)
    
    #计算W/|W|
    temp_norm = (
            torch.norm(self.cls_score.weight.data, p=2, dim=1)
            .unsqueeze(1)
            .expand_as(self.cls_score.weight.data)
        )
    self.cls_score.weight.data = self.cls_score.weight.data.div(temp_norm + 1e-5)
    
    #计算余弦相似度(512*N,22),作为类别分数
    cos_dist = self.cls_score(x_normalized)
    scores = self.scale * cos_dist
    #计算回归框分数(512*N,4)
    proposal_deltas = self.bbox_pred(reg_x)
    
    return scores, proposal_deltas

modeling/roi_heads/roi_heads.py

def forward(self, images, features, proposals, targets):
    #计算分类损失和回归损失
    losses = self.box_predictor.losses(predictions, proposals)
    return [], losses

modeling/roi_heads/fast_rcnn.py

def losses(self, predictions, proposals, input_features=None):
    
    scores, proposal_deltas = predictions
    #取出所有proposal的gtclass(512*N,)
    gt_classes = cat([p.gt_classes for p in proposals], dim=0) 
    #取出所有proposal的boxes(512*N,4)
    proposal_boxes = cat([p.proposal_boxes.tensor for p in proposals], dim=0)
	#取出所有proposal匹配的gtboxes(512*N,4)
    gt_boxes = cat([p.gt_boxes.tensor for p in proposals],dim=0)
    
    #计算分类和回归损失
    losses = {
            "loss_cls_ce": F.cross_entropy(scores, gt_classes, reduction="mean"),
            "loss_box_reg": self.box_reg_loss(
                proposal_boxes, gt_boxes, proposal_deltas, gt_classes
            ),
    }
    return losses
    

modeling/meta_arch/rcnn.py

def _forward_once_(self, batched_inputs, gt_instances=None):
    return proposal_losses, detector_losses, results, images.image_sizes
def forward(self, batched_inputs):
    losses = {}
    losses.update(detector_losses)
    losses.update(proposal_losses)
    return losses

engine/train_loop.py

def run_step(self):
    losses = sum(loss_dict.values())
    self.optimizer.zero_grad()
    losses.backward()
    self.optimizer.step()
  • 21
    点赞
  • 18
    收藏
    觉得还不错? 一键收藏
  • 2
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值