# model settings 模型设置
#官方文档:https://mmdetection.readthedocs.io/en/latest/tutorials/config.html
model = dict(
type='MaskRCNN', #检测器名称
pretrained='torchvision://resnet50',#加载预训练的backbone
backbone=dict(#backbone配置
type='ResNet',
depth=50, #backbone 的深度 50或101
num_stages=4, #backbone stages数量
out_indices=(0, 1, 2, 3), #每个阶段输出特征图的索引
frozen_stages=1,# 冻住第一阶段的权重
norm_cfg=dict(type='BN', requires_grad=True),#批归一化设置
norm_eval=True,#Whether to freeze the statistics in BN
style='pytorch'),#主干网络的形式;'pytorch' means that stride 2 layers are in 3x3 conv, 'caffe' means stride 2 layers are in 1x1 convs.
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],#输入通道,这与主干网的输出通道是一致的
out_channels=256,#每个层次的输出通道的金字塔特征映射
num_outs=5),
rpn_head=dict(
type='RPNHead',
in_channels=256,#每个输入特征图的通道数
feat_channels=256,#头部卷积层的特征通道数
anchor_generator=dict( #anchor generator的配置
type='AnchorGenerator',#Most of methods use AnchorGenerator, SSD Detectors uses `SSDAnchorGenerator`
scales=[8],#Basic scale of the anchor, the area of the anchor in one position of a feature map will be scale * base_sizes
ratios=[0.5, 1.0, 2.0],#高宽之间的比率
strides=[4, 8, 16, 32, 64]),## The strides of the anchor generator. This is consistent with the FPN feature strides. The strides will be taken as base_sizes if base_sizes is not set.
bbox_coder=dict( #Config of box coder to encode and decode the boxes during training and testing
type='DeltaXYWHBBoxCoder',# Type of box coder. 'DeltaXYWHBBoxCoder' is applied for most of methods.
target_means=[.0, .0, .0, .0],#均值
target_stds=[1.0, 1.0, 1.0, 1.0]),#方差
loss_cls=dict(#分类分支的损失函数
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),#RPN通常用于二分类,所以使用sigmoid;loss_weight:分类分支损失的权重
loss_bbox=dict(type='L1Loss', loss_weight=1.0)),#回归分支的损失函数 type='L1Loss'损失函数的类型,支持IoU Losses and smooth L1-loss
roi_head=dict(# RoIHead encapsulates the second stage of two-stage/cascade detectors.
type='StandardRoIHead',#RoIHead的类型
bbox_roi_extractor=dict(#RoI feature extractor for bbox regression.
type='SingleRoIExtractor',# Type of the RoI feature extractor, most of methods uses SingleRoIExtractor
roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
out_channels=256,#输出通道数
featmap_strides=[4, 8, 16, 32]),## Strides of multi-scale feature maps
bbox_head=dict(
type='Shared2FCBBoxHead',#bboxhead的类型
in_channels=256,# bbox head的输入通道数
fc_out_channels=1024,
roi_feat_size=7,
num_classes=80,#数据集的分类数
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[0., 0., 0., 0.],#均值
target_stds=[0.1, 0.1, 0.2, 0.2]),#方差
reg_class_agnostic=False,# Whether the regression is class agnostic.
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
mask_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
mask_head=dict(
type='FCNMaskHead',
num_convs=4,
in_channels=256,
conv_out_channels=256,
num_classes=80,
loss_mask=dict(
type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))))
# model training and testing settings
#rpn 和 rcnn 的超参数
train_cfg = dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,#iou 阈值大于等于0.7的为正样本
neg_iou_thr=0.3,#iou阈值小于0.3的为负样本
min_pos_iou=0.3,#iou作为最小样本的阈值
match_low_quality=True,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',#随机采样
num=256,#采样数量
pos_fraction=0.5,#正样本比率
neg_pos_ub=-1,
add_gt_as_proposals=False),# Whether add GT as proposals after sampling.
allowed_border=-1,
pos_weight=-1,#训练过程中,正样本的权重
debug=False),#是否设置debug的模式
rpn_proposal=dict(
nms_across_levels=False,
nms_pre=2000, #NMS 之前box的数量
nms_post=1000, #NMS 之后保存box的数量
max_num=1000,#NMS之后需要使用的box数量
nms_thr=0.7,#nms阈值
min_bbox_size=0), #允许的最小盒子数
rcnn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
match_low_quality=True,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
mask_size=28,
pos_weight=-1,
debug=False))
test_cfg = dict(
rpn=dict(
nms_across_levels=False,
nms_pre=1000,
nms_post=1000,
max_num=1000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
score_thr=0.05,
nms=dict(type='nms', iou_threshold=0.5),
max_per_img=100,
mask_thr_binary=0.5))