一、cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml
_BASE_: "../Base-RCNN-FPN.yaml"
MODEL:
MASK_ON: True
WEIGHTS: "catalog://ImageNetPretrained/FAIR/X-152-32x8d-IN5k"
RESNETS:
#resnetX的配置
STRIDE_IN_1X1: False # this is a C2 model
NUM_GROUPS: 32
WIDTH_PER_GROUP: 8
#resnet的深度
DEPTH: 152
#旋转卷积的开启
DEFORM_ON_PER_STAGE: [False, True, True, True]
ROI_HEADS:
NAME: "CascadeROIHeads"
ROI_BOX_HEAD:
NAME: "FastRCNNConvFCHead"
NUM_CONV: 4
NUM_FC: 1
#使用GN在FastRCNNConvFCHead
NORM: "GN"
CLS_AGNOSTIC_BBOX_REG: True
ROI_MASK_HEAD:
NUM_CONV: 8
#使用GN在ROI_MASK_HEAD
NORM: "GN"
RPN:
POST_NMS_TOPK_TRAIN: 2000
SOLVER:
IMS_PER_BATCH: 128
STEPS: (35000, 45000)
MAX_ITER: 50000
BASE_LR: 0.16
INPUT:
MIN_SIZE_TRAIN: (640, 864)
#range为短边随机resize到(640, 864)内,同时长边不超过MAX_SIZE_TRAIN,
#若超过则为MAX_SIZE_TRAIN且按比例缩小为MIN_SIZE_TRAIN。
#choice为随机选择(640, 864)中的一个
MIN_SIZE_TRAIN_SAMPLING: "range"
MAX_SIZE_TRAIN: 1440
CROP:
ENABLED: True
TEST:
EVAL_PERIOD: 2500
二、Base-RCNN-FPN.yaml
MODEL:
#总体结构组成
META_ARCHITECTURE: "GeneralizedRCNN"
#骨干网络选择
BACKBONE:
NAME: "build_resnet_fpn_backbone"
RESNETS:
OUT_FEATURES: ["res2", "res3", "res4", "res5"]
FPN:
IN_FEATURES: ["res2", "res3", "res4", "res5"]
ANCHOR_GENERATOR:
#anchor的大小和缩放比例
SIZES: [[32], [64], [128], [256], [512]] # One size for each in feature map
ASPECT_RATIOS: [[0.5, 1.0, 2.0]] # Three aspect ratios (same for all in feature maps)
RPN:
IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
#选取的RPN框个数
PRE_NMS_TOPK_TRAIN: 2000 # Per FPN level
PRE_NMS_TOPK_TEST: 1000 # Per FPN level
# Detectron1 uses 2000 proposals per-batch,
# (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
# which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
POST_NMS_TOPK_TRAIN: 1000
POST_NMS_TOPK_TEST: 1000
#ROI头的总体结构,IN_FEATURES为box,mask共用。
ROI_HEADS:
NAME: "StandardROIHeads"
IN_FEATURES: ["p2", "p3", "p4", "p5"]
#ROI下box的子头
ROI_BOX_HEAD:
NAME: "FastRCNNConvFCHead"
NUM_FC: 2
POOLER_RESOLUTION: 7
#ROI下mask的子头
ROI_MASK_HEAD:
NAME: "MaskRCNNConvUpsampleHead"
NUM_CONV: 4
POOLER_RESOLUTION: 14
DATASETS:
TRAIN: ("coco_2017_train",)
TEST: ("coco_2017_val",)
SOLVER:
IMS_PER_BATCH: 16
BASE_LR: 0.02
STEPS: (60000, 80000)
MAX_ITER: 90000
INPUT:
MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
VERSION: 2
三、defaults.py
部分常用的配置参数
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
from .config import CfgNode as CN
_C = CN()
# The version number, to upgrade from old configs to new ones if any
# changes happen. It's recommended to keep a VERSION in your config file.
_C.VERSION = 2
_C.MODEL = CN()
#mask的开启
_C.MODEL.MASK_ON = False
#模型加载的路径
_C.MODEL.WEIGHTS = ""
# -----------------------------------------------------------------------------
# INPUT
# -----------------------------------------------------------------------------
_C.INPUT = CN()
# Size of the smallest side of the image during training
_C.INPUT.MIN_SIZE_TRAIN = (800,)
# Sample size of smallest side by choice or random selection from range give by
# INPUT.MIN_SIZE_TRAIN
_C.INPUT.MIN_SIZE_TRAIN_SAMPLING = "choice"
# Maximum size of the side of the image during training
_C.INPUT.MAX_SIZE_TRAIN = 1333
# Size of the smallest side of the image during testing. Set to zero to disable resize in testing.
_C.INPUT.MIN_SIZE_TEST = 800
# Maximum size of the side of the image during testing
_C.INPUT.MAX_SIZE_TEST = 1333
# Mode for flipping images used in data augmentation during training
# choose one of ["horizontal, "vertical", "none"]
_C.INPUT.RANDOM_FLIP = "horizontal"
# `True` if cropping is used for data augmentation during training
_C.INPUT.CROP = CN({"ENABLED": False})
#if CROP.TYPE is "relative" or "relative_range"
#裁减的范围(0, 1]
_C.INPUT.CROP.SIZE = [0.1, 0.5]
# -----------------------------------------------------------------------------
# DataLoader
# -----------------------------------------------------------------------------
# Number of data loading threads
_C.DATALOADER.NUM_WORKERS = 4
# Tf True, when working on datasets that have instance annotations, the
# training dataloader will filter out images without associated annotations
_C.DATALOADER.FILTER_EMPTY_ANNOTATIONS = True
# ---------------------------------------------------------------------------- #
# FPN options
# ---------------------------------------------------------------------------- #
_C.MODEL.FPN = CN()
# Options: "" (no norm), "GN"
_C.MODEL.FPN.NORM = ""
# ---------------------------------------------------------------------------- #
# Anchor generator options
# ---------------------------------------------------------------------------- #
_C.MODEL.ANCHOR_GENERATOR = CN()
# Anchor sizes (i.e. sqrt of area) in absolute pixels w.r.t. the network input.
# Format: list[list[float]]. SIZES[i] specifies the list of sizes
# to use for IN_FEATURES[i]; len(SIZES) == len(IN_FEATURES) must be true,
# or len(SIZES) == 1 is true and size list SIZES[0] is used for all
# IN_FEATURES.
_C.MODEL.ANCHOR_GENERATOR.SIZES = [[32, 64, 128, 256, 512]]
# Anchor aspect ratios. For each area given in `SIZES`, anchors with different aspect
# ratios are generated by an anchor generator.
# Format: list[list[float]]. ASPECT_RATIOS[i] specifies the list of aspect ratios (H/W)
# to use for IN_FEATURES[i]; len(ASPECT_RATIOS) == len(IN_FEATURES) must be true,
# or len(ASPECT_RATIOS) == 1 is true and aspect ratio list ASPECT_RATIOS[0] is used
# for all IN_FEATURES.
_C.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.5, 1.0, 2.0]]
# Anchor angles.
# list[list[float]], the angle in degrees, for each input feature map.
# ANGLES[i] specifies the list of angles for IN_FEATURES[i].
_C.MODEL.ANCHOR_GENERATOR.ANGLES = [[-90, 0, 90]]
# ---------------------------------------------------------------------------- #
# ROI HEADS options
# ---------------------------------------------------------------------------- #
_C.MODEL.ROI_HEADS = CN()
# Number of foreground classes
#除去背景的类别数
_C.MODEL.ROI_HEADS.NUM_CLASSES = 80
# ---------------------------------------------------------------------------- #
# Box Head
# ---------------------------------------------------------------------------- #
_C.MODEL.ROI_BOX_HEAD = CN()
# Options: "" (no norm), "GN", "SyncBN".
_C.MODEL.ROI_BOX_HEAD.NORM = ""
# ---------------------------------------------------------------------------- #
# Mask Head
# ---------------------------------------------------------------------------- #
_C.MODEL.ROI_MASK_HEAD = CN()
# Normalization method for the convolution layers.
# Options: "" (no norm), "GN", "SyncBN".
_C.MODEL.ROI_MASK_HEAD.NORM = ""
# ---------------------------------------------------------------------------- #
# RetinaNet Head
# ---------------------------------------------------------------------------- #
_C.MODEL.RETINANET = CN()
# This is the number of foreground classes.
_C.MODEL.RETINANET.NUM_CLASSES = 80
_C.MODEL.RETINANET.IN_FEATURES = ["p3", "p4", "p5", "p6", "p7"]
# Convolutions to use in the cls and bbox tower
# NOTE: this doesn't include the last conv for logits
_C.MODEL.RETINANET.NUM_CONVS = 4
# IoU overlap ratio [bg, fg] for labeling anchors.
# Anchors with < bg are labeled negative (0)
# Anchors with >= bg and < fg are ignored (-1)
# Anchors with >= fg are labeled positive (1)
_C.MODEL.RETINANET.IOU_THRESHOLDS = [0.4, 0.5]
_C.MODEL.RETINANET.IOU_LABELS = [0, -1, 1]
# Prior prob for rare case (i.e. foreground) at the beginning of training.
# This is used to set the bias for the logits layer of the classifier subnet.
# This improves training stability in the case of heavy class imbalance.
_C.MODEL.RETINANET.PRIOR_PROB = 0.01
# Inference cls score threshold, only anchors with score > INFERENCE_TH are
# considered for inference (to improve speed)
_C.MODEL.RETINANET.SCORE_THRESH_TEST = 0.05
# Select topk candidates before NMS
_C.MODEL.RETINANET.TOPK_CANDIDATES_TEST = 1000
_C.MODEL.RETINANET.NMS_THRESH_TEST = 0.5
# Weights on (dx, dy, dw, dh) for normalizing Retinanet anchor regression targets
_C.MODEL.RETINANET.BBOX_REG_WEIGHTS = (1.0, 1.0, 1.0, 1.0)
# Loss parameters
_C.MODEL.RETINANET.FOCAL_LOSS_GAMMA = 2.0
_C.MODEL.RETINANET.FOCAL_LOSS_ALPHA = 0.25
_C.MODEL.RETINANET.SMOOTH_L1_LOSS_BETA = 0.1
# Options are: "smooth_l1", "giou"
_C.MODEL.RETINANET.BBOX_REG_LOSS_TYPE = "smooth_l1"
# One of BN, SyncBN, FrozenBN, GN
# Only supports GN until unshared norm is implemented
_C.MODEL.RETINANET.NORM = ""
# ---------------------------------------------------------------------------- #
# ResNe[X]t options (ResNets = {ResNet, ResNeXt}
# Note that parts of a resnet may be used for both the backbone and the head
# These options apply to both
# ---------------------------------------------------------------------------- #
_C.MODEL.RESNETS = CN()
#RESNETS的网络深度
_C.MODEL.RESNETS.DEPTH = 50
# Options: FrozenBN, GN, "SyncBN", "BN"
_C.MODEL.RESNETS.NORM = "FrozenBN"
# Apply Deformable Convolution in stages
# Specify if apply deform_conv on Res2, Res3, Res4, Res5
_C.MODEL.RESNETS.DEFORM_ON_PER_STAGE = [False, False, False, False]
# ---------------------------------------------------------------------------- #
# Solver
# ---------------------------------------------------------------------------- #
_C.SOLVER = CN()
# 学习率策略
_C.SOLVER.LR_SCHEDULER_NAME = "WarmupMultiStepLR"
#最大迭代次数
_C.SOLVER.MAX_ITER = 40000
#基础学习率
_C.SOLVER.BASE_LR = 0.001
#动量
_C.SOLVER.MOMENTUM = 0.9
_C.SOLVER.NESTEROV = False
_C.SOLVER.WEIGHT_DECAY = 0.0001
# The weight decay that's applied to parameters of normalization layers
# (typically the affine transformation)
_C.SOLVER.WEIGHT_DECAY_NORM = 0.0
#学习率衰减比率
_C.SOLVER.GAMMA = 0.1
# 到制定的step时进行一次衰减
_C.SOLVER.STEPS = (30000,)
#预热
_C.SOLVER.WARMUP_FACTOR = 1.0 / 1000
#从WARMUP_FACTOR到BASE_LR的step间隔
_C.SOLVER.WARMUP_ITERS = 1000
_C.SOLVER.WARMUP_METHOD = "linear"
# 保存模型间隔
_C.SOLVER.CHECKPOINT_PERIOD = 5000
# batch大小
_C.SOLVER.IMS_PER_BATCH = 16
# Enable automatic mixed precision for training
# Note that this does not change model's inference behavior.
# To use AMP in inference, run inference under autocast()
_C.SOLVER.AMP = CN({"ENABLED": False})
# ---------------------------------------------------------------------------- #
# Specific test options
# ---------------------------------------------------------------------------- #
_C.TEST = CN()
# The period (in terms of steps) to evaluate the model during training.
# Set to 0 to disable.
#间隔多少step进行测试
_C.TEST.EVAL_PERIOD = 0
#每张图片最大检出目标数
_C.TEST.DETECTIONS_PER_IMAGE = 100
# ---------------------------------------------------------------------------- #
# Misc options
# ---------------------------------------------------------------------------- #
#输出目录
_C.OUTPUT_DIR = "./output"
# Benchmark different cudnn algorithms.
# If input images have very different sizes, this option will have large overhead
# for about 10k iterations. It usually hurts total time, but can benefit for certain models.
# If input images have the same or similar sizes, benchmark is often helpful.
_C.CUDNN_BENCHMARK = False