FCOS论文及源码详解(四)
在 FCOS论文及源码详解(二)中提到,FCOS的training部分关键在于调用train_net.py,故解析FCOS源码从该文件开始。
train_net.py→train()开头
→build_detection_model()→GeneralizedRCNN→build_backbone(), build_rpn(), build_roi_heads().
build_backbone()→build_resnet_backbone()→ResNet
FCOS论文及源码详解(二)、 FCOS论文及源码详解(三)已将ResNet分析完毕。
本篇分析build_rpn().
FCOS代码
build_rpn()
build_rpn()函数调用自fcos_core.modeling.rpn.rpn
def build_rpn(cfg, in_channels):
"""
This gives the gist of it. Not super important because it doesn't change as much
"""
if cfg.MODEL.FCOS_ON:
return build_fcos(cfg, in_channels)
if cfg.MODEL.RETINANET_ON:
return build_retinanet(cfg, in_channels)
return RPNModule(cfg, in_channels)
在fcos_core.config.defaults中找到
_C.MODEL.FCOS_ON = True
_C.MODEL.RETINANET_ON = False
build_fcos()调用自fcos_core.modeling.rpn.fcos.fcos
def build_fcos(cfg, in_channels):
return FCOSModule(cfg, in_channels)
故build_rpn()→build_fcos()→FCOSModule
fcos.fcos.FCOSModule
class FCOSModule(torch.nn.Module):
"""
Module for FCOS computation. Takes feature maps from the backbone and
FCOS outputs and losses. Only Test on FPN now.
"""
def __init__(self, cfg, in_channels):
super(FCOSModule, self).__init__()
head = FCOSHead(cfg, in_channels)
box_selector_test = make_fcos_postprocessor(cfg)
loss_evaluator = make_fcos_loss_evaluator(cfg)
self.head = head
self.box_selector_test = box_selector_test
self.loss_evaluator = loss_evaluator
self.fpn_strides = cfg.MODEL.FCOS.FPN_STRIDES
类FCOSModule有4个实例变量
head:FCOSHead在上方定义
box_selector_test:make_fcos_postprocessor()调用自fcos_core.modeling.rpn.fcos.inference
loss_evaluator:make_fcos_loss_evaluator()调用自fcos_core.modeling.rpn.fcos.loss
fpn_strides:在fcos_core.config.defaults中找到
_C.MODEL.FCOS.FPN_STRIDES = [8, 16, 32, 64, 128]
head
class FCOSHead(torch.nn.Module):
def __init__(self, cfg, in_channels):
"""
Arguments:
in_channels (int): number of channels of the input feature
"""
super(FCOSHead, self).__init__()
# TODO: Implement the sigmoid version first.
num_classes = cfg.MODEL.FCOS.NUM_CLASSES - 1
self.fpn_strides = cfg.MODEL.FCOS.FPN_STRIDES
self.norm_reg_targets = cfg.MODEL.FCOS.NORM_REG_TARGETS
self.centerness_on_reg = cfg.MODEL.FCOS.CENTERNESS_ON_REG
self.use_dcn_in_tower = cfg.MODEL.FCOS.USE_DCN_IN_TOWER
在fcos_core.config.defaults中找到
_C.MODEL.FCOS.NUM_CLASSES = 81 # the number of classes including background
_C.MODEL.FCOS.FPN_STRIDES = [8, 16, 32, 64, 128]
_C.MODEL.FCOS.NORM_REG_TARGETS = False
_C.MODEL.FCOS.CENTERNESS_ON_REG = False
_C.MODEL.FCOS.USE_DCN_IN_TOWER = False
# 定义子模型cls_tower、bbox_tower
cls_tower = []
bbox_tower = []
for i in range(cfg.MODEL.FCOS.NUM_CONVS): # 4
if self.use_dcn_in_tower and \
i == cfg.MODEL.FCOS.NUM_CONVS - 1:
conv_func = DFConv2d
else:
conv_func = nn.Conv2d
cls_tower.append(
conv_func(
in_channels,
in_channels,
kernel_size=3,
stride=1,
padding=1,
bias=True
)
)
cls_tower.append(nn.GroupNorm(32, in_channels))
cls_tower.append(nn.ReLU())
bbox_tower.append(
conv_func(
# omit, same as cls_tower
self.add_module('cls_tower', nn.Sequential(*cls_tower))
self.add_module('bbox_tower', nn.Sequential(*bbox_tower))
# 定义最终分类层cls_logits、边界框预测层bbox_pred、中心度预测层centerness
self.cls_logits = nn.Conv2d(
in_channels, num_classes, kernel_size=3, stride=1,
padding=1
)
self.bbox_pred = nn.Conv2d(
in_channels, 4, kernel_size=3, stride=1,
padding=1
)
self.centerness = nn.Conv2d(
in_channels, 1, kernel_size=3, stride=1,
padding=1
)
# initialization
for modules in [self.cls_tower, self.bbox_tower,
self.cls_logits, self.bbox_pred,
self.centerness]:
for l in modules.modules():
if isinstance(l, nn.Conv2d):
torch.nn.init.normal_(l.weight, std=0.01)
torch.nn.init.constant_(l.bias, 0)
# initialize the bias for focal loss
prior_prob = cfg.MODEL.FCOS.PRIOR_PROB # 0.01
bias_value = -math.log((1 - prior_prob) / prior_prob)
torch.nn.init.constant_(self.cls_logits.bias, bias_value)
self.scales = nn.ModuleList([Scale(init_value=1.0) for _ in range(5)])
Scale调用自fcos_core.layers.scale
class Scale(nn.Module):
def __init__(self, init_value=1.0):
super(Scale, self).__init__()
self.scale = nn.Parameter(torch.FloatTensor([init_value]))
def forward(self, input):
return input * self.scale
论文中提到:
we share the heads between different feature levels
regress different size range → exp(six)
五层特征共享head,但其回归范围不同,用缩放因子scale对回归结果进行缩放。
方法__init__()到此结束
接着看FCOSHead的方法forward()
def forward(self, x):
logits = []
bbox_reg = []
centerness = []
for l, feature in enumerate(x):
cls_tower = self.cls_tower(feature)
box_tower = self.bbox_tower(feature)
logits.append(self.cls_logits(cls_tower))
if self.centerness_on_reg: # False
centerness.append(self.centerness(box_tower))
else:
centerness.append(self.centerness(cls_tower))
bbox_pred = self.scales[l](self.bbox_pred(box_tower))
if self.norm_reg_targets: # False
bbox_pred = F.relu(bbox_pred)
if self.training:
bbox_reg.append(bbox_pred)
else:
bbox_reg.append(bbox_pred * self.fpn_strides[l])
else:
bbox_reg.append(torch.exp(bbox_pred))
return logits, bbox_reg, centerness
cls_tower结构
in→conv1→gn1→relu→conv2→gn2→relu→conv3→gn3→relu→conv4→gn4→relu→out
根据self.rpn = build_rpn(cfg, self.backbone.out_channels),各层通道数都为4*256
box_tower结构
in→conv1→gn1→relu→conv2→gn2→relu→conv3→gn3→relu→conv4→gn4→relu→out
FCOSHead结构
in→cls_tower→cls_logits(conv)→out
in→box_tower
box_tower→bbox_pred(conv)→scales→exp→out
box_tower→centerness(conv)→out
即论文中这一部分: