【MMDetection-学习记录】config配置文件说明

最新推荐文章于 2025-03-01 16:04:58 发布
乐亦亦乐
最新推荐文章于 2025-03-01 16:04:58 发布
阅读量1.7k
点赞数 1
分类专栏：笔记 pyTorch 文章标签： mmdetection
本文链接：https://blog.csdn.net/qq_41251963/article/details/112968703
版权
笔记同时被 2 个专栏收录
52 篇文章
订阅专栏
pyTorch
37 篇文章
订阅专栏
# model settings  模型设置 
#官方文档：https://mmdetection.readthedocs.io/en/latest/tutorials/config.html
model = dict(
    type='MaskRCNN', #检测器名称
    pretrained='torchvision://resnet50',#加载预训练的backbone
    backbone=dict(#backbone配置
        type='ResNet',
        depth=50, #backbone 的深度  50或101
        num_stages=4, #backbone stages数量
        out_indices=(0, 1, 2, 3), #每个阶段输出特征图的索引
        frozen_stages=1,# 冻住第一阶段的权重
        norm_cfg=dict(type='BN', requires_grad=True),#批归一化设置
        norm_eval=True,#Whether to freeze the statistics in BN
        style='pytorch'),#主干网络的形式；'pytorch' means that stride 2 layers are in 3x3 conv, 'caffe' means stride 2 layers are in 1x1 convs.
    neck=dict(
        type='FPN',
        in_channels=[256, 512, 1024, 2048],#输入通道，这与主干网的输出通道是一致的
        out_channels=256,#每个层次的输出通道的金字塔特征映射
        num_outs=5),
    rpn_head=dict(
        type='RPNHead',
        in_channels=256,#每个输入特征图的通道数
        feat_channels=256,#头部卷积层的特征通道数
        anchor_generator=dict( #anchor generator的配置
            type='AnchorGenerator',#Most of methods use AnchorGenerator, SSD Detectors uses `SSDAnchorGenerator`
            scales=[8],#Basic scale of the anchor, the area of the anchor in one position of a feature map will be scale * base_sizes
            ratios=[0.5, 1.0, 2.0],#高宽之间的比率
            strides=[4, 8, 16, 32, 64]),## The strides of the anchor generator. This is consistent with the FPN feature strides. The strides will be taken as base_sizes if base_sizes is not set.
        bbox_coder=dict( #Config of box coder to encode and decode the boxes during training and testing
            type='DeltaXYWHBBoxCoder',# Type of box coder. 'DeltaXYWHBBoxCoder' is applied for most of methods. 
            target_means=[.0, .0, .0, .0],#均值
            target_stds=[1.0, 1.0, 1.0, 1.0]),#方差
        loss_cls=dict(#分类分支的损失函数
            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),#RPN通常用于二分类，所以使用sigmoid；loss_weight：分类分支损失的权重
        loss_bbox=dict(type='L1Loss', loss_weight=1.0)),#回归分支的损失函数  type='L1Loss'损失函数的类型，支持IoU Losses and smooth L1-loss
    roi_head=dict(# RoIHead encapsulates the second stage of two-stage/cascade detectors.
        type='StandardRoIHead',#RoIHead的类型
        bbox_roi_extractor=dict(#RoI feature extractor for bbox regression.
            type='SingleRoIExtractor',# Type of the RoI feature extractor, most of methods uses SingleRoIExtractor
            roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
            out_channels=256,#输出通道数
            featmap_strides=[4, 8, 16, 32]),## Strides of multi-scale feature maps
        bbox_head=dict(
            type='Shared2FCBBoxHead',#bboxhead的类型
            in_channels=256,# bbox head的输入通道数
            fc_out_channels=1024,
            roi_feat_size=7,
            num_classes=80,#数据集的分类数
            bbox_coder=dict(
                type='DeltaXYWHBBoxCoder',
                target_means=[0., 0., 0., 0.],#均值
                target_stds=[0.1, 0.1, 0.2, 0.2]),#方差
            reg_class_agnostic=False,# Whether the regression is class agnostic.
            loss_cls=dict(
                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
            loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
        mask_roi_extractor=dict(
            type='SingleRoIExtractor',
            roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
            out_channels=256,
            featmap_strides=[4, 8, 16, 32]),
        mask_head=dict(
            type='FCNMaskHead',
            num_convs=4,
            in_channels=256,
            conv_out_channels=256,
            num_classes=80,
            loss_mask=dict(
                type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))))
# model training and testing settings
#rpn 和 rcnn 的超参数
train_cfg = dict(
    rpn=dict(
        assigner=dict(
            type='MaxIoUAssigner',
            pos_iou_thr=0.7,#iou 阈值大于等于0.7的为正样本
            neg_iou_thr=0.3,#iou阈值小于0.3的为负样本
            min_pos_iou=0.3,#iou作为最小样本的阈值
            match_low_quality=True,
            ignore_iof_thr=-1),
        sampler=dict(
            type='RandomSampler',#随机采样
            num=256,#采样数量
            pos_fraction=0.5,#正样本比率
            neg_pos_ub=-1,
            add_gt_as_proposals=False),# Whether add GT as proposals after sampling.
        allowed_border=-1,
        pos_weight=-1,#训练过程中，正样本的权重
        debug=False),#是否设置debug的模式
    rpn_proposal=dict( 
        nms_across_levels=False,
        nms_pre=2000, #NMS 之前box的数量
        nms_post=1000, #NMS 之后保存box的数量
        max_num=1000,#NMS之后需要使用的box数量
        nms_thr=0.7,#nms阈值
        min_bbox_size=0), #允许的最小盒子数
    rcnn=dict(
        assigner=dict(
            type='MaxIoUAssigner',
            pos_iou_thr=0.5,
            neg_iou_thr=0.5,
            min_pos_iou=0.5,
            match_low_quality=True,
            ignore_iof_thr=-1),
        sampler=dict(
            type='RandomSampler',
            num=512,
            pos_fraction=0.25,
            neg_pos_ub=-1,
            add_gt_as_proposals=True),
        mask_size=28,
        pos_weight=-1,
        debug=False))
test_cfg = dict(
    rpn=dict(
        nms_across_levels=False,
        nms_pre=1000,
        nms_post=1000,
        max_num=1000,
        nms_thr=0.7,
        min_bbox_size=0),
    rcnn=dict(
        score_thr=0.05,
        nms=dict(type='nms', iou_threshold=0.5),
        max_per_img=100,
        mask_thr_binary=0.5))