FCOS: Fully Convolutional One-Stage Object Detection
如果对你有帮助的话,希望帮我点个赞~
文章目录
FCOS网络结构以及论文中重点内容
FCOS网络结构
注意文中提到的share weight指的是5个fpn层输出的特征图经过各自的share head,其中share head分为回归的4个conv 以及 分类的4个conv,共享权重指的是5个head 共享回归的conv权重以及分类的conv权重,而在同一个FPN中回归和分类各自的权重并不共享。
详情见代码部分。
feature = self.share_tower(feature) # torch.Size([1, 256, 52, 76]) torch.Size([1, 256, 26, 38])
cls_tower = self.cls_tower(feature) # torch.Size([1, 256, 52, 76])
bbox_tower = self.bbox_tower(feature) # torch.Size([1, 256, 52, 76])
self.cls_tower 以及 self.bbox_tower 用的都是统一的分类conv以及回归的conv,即共享权重。
5层FPN上的每个点映射回原图的公式。(s/2 + xs, s/2+ys)
根据max(l*, t*, r*, b*),FPN上的挑选postivie axample的限制条件。
centerness计算公式
1. AdelaiDet/adet/modeling/fcos/fcos.py
import math
from typing import List, Dict
import torch
from torch import nn
from torch.nn import functional as F
from detectron2.layers import ShapeSpec, NaiveSyncBatchNorm
from detectron2.modeling.proposal_generator.build import PROPOSAL_GENERATOR_REGISTRY
from adet.layers import DFConv2d, NaiveGroupNorm
from adet.utils.comm import compute_locations
from .fcos_outputs import FCOSOutputs
import pdb
__all__ = ["FCOS"]
INF = 100000000
class Scale(nn.Module):
def __init__(self, init_value=1.0):
super(Scale, self).__init__()
self.scale = nn.Parameter(torch.FloatTensor([init_value]))
def forward(self, input):
return input * self.scale
class ModuleListDial(nn.ModuleList):
def __init__(self, modules=None):
super(ModuleListDial, self).__init__(modules)
self.cur_position = 0
def forward(self, x):
result = self[self.cur_position](x)
self.cur_position += 1
if self.cur_position >= len(self):
self.cur_position = 0
return result
# 从detectron2/detectron2/modeling/proposal_generator/build.py PROPOSAL_GENERATOR_REGISTRY.get(name)(cfg, input_shape)
@PROPOSAL_GENERATOR_REGISTRY.register()
class FCOS(nn.Module):
"""
Implement FCOS (https://arxiv.org/abs/1904.01355).
"""
def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]):
super().__init__()
self.in_features = cfg.MODEL.FCOS.IN_FEATURES # ["p3", "p4", "p5", "p6", "p7"]
self.fpn_strides = cfg.MODEL.FCOS.FPN_STRIDES # [8, 16, 32, 64, 128]
self.yield_proposal = cfg.MODEL.FCOS.YIELD_PROPOSAL # False
# 调用FCOSHead()函数
self.fcos_head = FCOSHead(cfg, [input_shape[f] for f in self.in_features])
pdb.set_trace()
self.in_channels_to_top_module = self.fcos_head.in_channels_to_top_module # 256
# 调用FCOSOutpues(cfg)
self.fcos_outputs = FCOSOutputs(cfg) #FCOSOutputs((loc_loss_func): IOULoss())
pdb.set_trace()
def forward_head(self, features, top_module=None):
features = [features[f] for f in self.in_features]
pred_class_logits, pred_deltas, pred_centerness, top_feats, bbox_towers = self.fcos_head(
features, top_module, self.yield_proposal)
pdb.set_trace()
return pred_class_logits, pred_deltas, pred_centerness, top_feats, bbox_towers
def forward(self, images, features, gt_instances=None, top_module=None):
"""
Arguments:
images (list[Tensor] or ImageList): images to be processed
targets (list[BoxList]): ground-truth boxes present in the image (optional)
Returns:
result (list[BoxList] or dict[Tensor]): the output from the model.
During training, it returns a dict[Tensor] which contains the losses.
During testing, it returns list[BoxList] contains additional fields
like `scores`, `labels` and `mask` (for Mask R-CNN models).
"""
pdb.set_trace()
features = [features[f] for f in self.in_features] # len(features) = num(FPN) = 5 详细见下面注释
# locations(x, y)就是相当于 训练时候的bbox的中心
locations = self.compute_locations(features) # 调用compute_locations len(locations) = 5
pdb.set_trace()
logits_pred, reg_pred, ctrness_pred, top_feats, bbox_towers = self.fcos_head( # 走了fcos_head的forward
features, top_module, self.yield_proposal
)
results = {
}
if self.yield_proposal: # self.yield_proposal: false
results["features"] = {
f: b for f, b in zip(self.in_features, bbox_towers)
}
if self.training:
results, losses = self.fcos_outputs.losses( # 调用fcos_outpus.py的losses()
logits_pred, reg_pred, ctrness_pred,
locations, gt_instances, top_feats
)
if self.yield_proposal:
with torch.no_grad():
results["proposals"] = self.fcos_outputs.predict_proposals(
logits_pred, reg_pred, ctrness_pred,
locations, images.image_sizes, top_feats
)
pdb.set_trace()
return results, losses # len(results) = 2 len(losses) =3
else:
results = self.fcos_outputs.predict_proposals(
logits_pred, reg_pred, ctrness_pred,
locations, images.image_sizes, top_feats
)
pdb.set_trace()
return results, {
}
def compute_locations(self, features):
locations = []
for level, feature in enumerate(features): # levels 0 - 4
h, w = feature.size()[-2:]
locations_per_level = compute_locations( # 调用comm.py的文件
h, w, self.fpn_strides[level],
feature.device
)
locations.append(locations_per_level)
pdb.set_trace() # len(locations) = 5
# locations[i].shape ==> (torch.Size([3952, 2]), torch.Size([988, 2]), torch.Size([247, 2]), torch.Size([70, 2]), torch.Size([20, 2]))
# example : locations[0].shape : torch.Size([3952, 2]) [1, 3, 52, 76] --> 3952 = 52 * 76 这就是全卷积网络,对于每一个pixel,进行计算
return locations
class FCOSHead(nn.Module):
def __init__(self, cfg, input_shape: List[ShapeSpec]):
"""
Arguments:
in_channels (int): number of channels of the input feature
"""
super().__init__()
# TODO: Implement the sigmoid version first.
self.num_classes = cfg.MODEL.FCOS.NUM_CLASSES # num_classes 80
self.fpn_strides = cfg.MODEL.FCOS.FPN_STRIDES # fpn_strides [8, 16, 32, 64, 128]
head_configs = {
"cls": (cfg.MODEL.FCOS.NUM_CLS_CONVS,
cfg.MODEL.FCOS.USE_DEFORMABLE),
"bbox": (cfg.MODEL.FCOS.NUM_BOX_CONVS,
cfg.MODEL.FCOS.USE_DEFORMABLE),
"share": (cfg.MODEL.FCOS.NUM_SHARE_CONVS,
False)}
# head_configs = {'cls': (4, False), 'bbox': (4, False), 'share': (0, False)}
norm = None if cfg.MODEL.FCOS.NORM == "none" else cfg.MODEL.FCOS.NORM # GN
self.num_levels = len(input_shape) # 5
in_channels = [s.channels for s in input_shape] # 256
assert len(set(in_channels)) == 1, "Each level must have the same channel!"
in_channels = in_channels[0] # in_channels 256
# input_shape:
# [
# ShapeSpec(channels=256, height=None, width=None, stride=8),
# ShapeSpec(channels=256, height=None, width=None, stride=16),
# ShapeSpec(channels=256, height=None, width=None, stride=32),
# ShapeSpec(channels=256, height=None, width=None, stride=64),
# ShapeSpec(channels=256, height=None, width=None, stride=128)
# ]
self.in_channels_to_top_module = in_channels # 256
for head in head_configs:
tower = []
num_convs, use_deformable = head_configs[head]
for i in range(num_convs):
if use_deformable and i == num_convs - 1:
conv_func = DFConv2d
else:
conv_func = nn.Conv2d
tower.append(conv_func(
in_channels, in_channels,
kernel_size=3, stride=1,
padding=1, bias=True
))
if norm == "GN":
tower.append(nn.GroupNorm(32, in_channels))
elif norm == "NaiveGN":
tower.append(NaiveGroupNorm(32, in_channels))
elif norm == "BN":
tower.append(ModuleListDial([
nn.BatchNorm2d(in_channels) for _ in range(self.num_levels)
]))
elif norm == "SyncBN":
tower.append(ModuleListDial([
NaiveSyncBatchNorm(in_channels) for _ in range(self.num_levels)
]))
tower.append(nn.ReLU())
self.add_module('{}_tower'.format(head),
nn.Sequential(*tower))
self.cls_logits = nn.Conv2d(
in_channels, self.num_classes,
kernel_size=3, stride=1,
padding=1
)
# cls_logtis
# Conv2d(256, 3, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
# 256 --> 3 num_classes = 3
self.bbox_pred = nn.Conv2d(
in_channels, 4, kernel_size=3,
stride=1, padding=1
)
# bbox_pred
# Conv2d(256, 4, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
# 256 --> 4 [left, top, right, bottom] 4d-vector
self.ctrness = nn.Conv2d(
in_channels, 1, kernel_size=3,
stride=1, padding=1
)
# ctrness
# Conv2d(256, 1, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
# 256 --> 1 h * w * 1
pdb.set_trace()
if cfg.MODEL.FCOS.USE_SCALE: # True
self.scales = nn.ModuleList([Scale(init_value=1.0) for _ in range(self.num_levels