[FCOS]FCOS: Fully Convolutional One-Stage Object Detection(ICCV. 2019)代码笔记

FCOS: Fully Convolutional One-Stage Object Detection

如果对你有帮助的话,希望帮我点个赞~

FCOS网络结构以及论文中重点内容

FCOS网络结构

注意文中提到的share weight指的是5个fpn层输出的特征图经过各自的share head,其中share head分为回归的4个conv 以及 分类的4个conv,共享权重指的是5个head 共享回归的conv权重以及分类的conv权重,而在同一个FPN中回归和分类各自的权重并不共享。
详情见代码部分。

			feature = self.share_tower(feature) # torch.Size([1, 256, 52, 76])  torch.Size([1, 256, 26, 38])
            cls_tower = self.cls_tower(feature) # torch.Size([1, 256, 52, 76])
            bbox_tower = self.bbox_tower(feature) # torch.Size([1, 256, 52, 76])

self.cls_tower 以及 self.bbox_tower 用的都是统一的分类conv以及回归的conv,即共享权重。
在这里插入图片描述
在这里插入图片描述
5层FPN上的每个点映射回原图的公式。(s/2 + xs, s/2+ys)
在这里插入图片描述
根据max(l*, t*, r*, b*),FPN上的挑选postivie axample的限制条件。

在这里插入图片描述
centerness计算公式
在这里插入图片描述

1. AdelaiDet/adet/modeling/fcos/fcos.py

import math
from typing import List, Dict
import torch
from torch import nn
from torch.nn import functional as F

from detectron2.layers import ShapeSpec, NaiveSyncBatchNorm
from detectron2.modeling.proposal_generator.build import PROPOSAL_GENERATOR_REGISTRY

from adet.layers import DFConv2d, NaiveGroupNorm
from adet.utils.comm import compute_locations
from .fcos_outputs import FCOSOutputs
import pdb

__all__ = ["FCOS"]

INF = 100000000


class Scale(nn.Module):
    def __init__(self, init_value=1.0):
        super(Scale, self).__init__()
        self.scale = nn.Parameter(torch.FloatTensor([init_value]))

    def forward(self, input):
        return input * self.scale


class ModuleListDial(nn.ModuleList):
    def __init__(self, modules=None):
        super(ModuleListDial, self).__init__(modules)
        self.cur_position = 0

    def forward(self, x):
        result = self[self.cur_position](x)
        self.cur_position += 1
        if self.cur_position >= len(self):
            self.cur_position = 0
        return result

# 从detectron2/detectron2/modeling/proposal_generator/build.py  PROPOSAL_GENERATOR_REGISTRY.get(name)(cfg, input_shape)
@PROPOSAL_GENERATOR_REGISTRY.register()
class FCOS(nn.Module):
    """
    Implement FCOS (https://arxiv.org/abs/1904.01355).
    """
    def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]):
        super().__init__()
        self.in_features = cfg.MODEL.FCOS.IN_FEATURES # ["p3", "p4", "p5", "p6", "p7"]
        self.fpn_strides = cfg.MODEL.FCOS.FPN_STRIDES # [8, 16, 32, 64, 128]
        self.yield_proposal = cfg.MODEL.FCOS.YIELD_PROPOSAL # False
        # 调用FCOSHead()函数
        self.fcos_head = FCOSHead(cfg, [input_shape[f] for f in self.in_features])
        pdb.set_trace()
        self.in_channels_to_top_module = self.fcos_head.in_channels_to_top_module # 256
        # 调用FCOSOutpues(cfg)
        self.fcos_outputs = FCOSOutputs(cfg) #FCOSOutputs((loc_loss_func): IOULoss())
        pdb.set_trace()

    def forward_head(self, features, top_module=None):
        features = [features[f] for f in self.in_features]
        pred_class_logits, pred_deltas, pred_centerness, top_feats, bbox_towers = self.fcos_head(
            features, top_module, self.yield_proposal)
        pdb.set_trace()
        return pred_class_logits, pred_deltas, pred_centerness, top_feats, bbox_towers

    def forward(self, images, features, gt_instances=None, top_module=None):
        """
        Arguments:
            images (list[Tensor] or ImageList): images to be processed
            targets (list[BoxList]): ground-truth boxes present in the image (optional)

        Returns:
            result (list[BoxList] or dict[Tensor]): the output from the model.
                During training, it returns a dict[Tensor] which contains the losses.
                During testing, it returns list[BoxList] contains additional fields
                like `scores`, `labels` and `mask` (for Mask R-CNN models).

        """
        pdb.set_trace()
        features = [features[f] for f in self.in_features] #  len(features) = num(FPN) = 5 详细见下面注释
        # locations(x, y)就是相当于 训练时候的bbox的中心
        locations = self.compute_locations(features) # 调用compute_locations len(locations) = 5
        pdb.set_trace()
        logits_pred, reg_pred, ctrness_pred, top_feats, bbox_towers = self.fcos_head( # 走了fcos_head的forward
            features, top_module, self.yield_proposal
        )

        results = {
   }
        if self.yield_proposal: # self.yield_proposal: false
            results["features"] = {
   
                f: b for f, b in zip(self.in_features, bbox_towers)
            }

        if self.training:
            results, losses = self.fcos_outputs.losses( # 调用fcos_outpus.py的losses()
                logits_pred, reg_pred, ctrness_pred,
                locations, gt_instances, top_feats
            )
            
            if self.yield_proposal:
                with torch.no_grad():
                    results["proposals"] = self.fcos_outputs.predict_proposals(
                        logits_pred, reg_pred, ctrness_pred,
                        locations, images.image_sizes, top_feats
                    )
            pdb.set_trace()
            return results, losses # len(results) = 2 len(losses) =3 
        else:
            results = self.fcos_outputs.predict_proposals(
                logits_pred, reg_pred, ctrness_pred,
                locations, images.image_sizes, top_feats
            )
        pdb.set_trace()
        return results, {
   }

    def compute_locations(self, features):
        locations = []
        for level, feature in enumerate(features): #  levels 0 - 4
            h, w = feature.size()[-2:]
            locations_per_level = compute_locations( # 调用comm.py的文件
                h, w, self.fpn_strides[level],
                feature.device
            )
            locations.append(locations_per_level)
        pdb.set_trace()  # len(locations) = 5
        # locations[i].shape  ==> (torch.Size([3952, 2]), torch.Size([988, 2]), torch.Size([247, 2]), torch.Size([70, 2]), torch.Size([20, 2]))
        # example :  locations[0].shape : torch.Size([3952, 2]) [1, 3, 52, 76]  --> 3952 = 52 * 76 这就是全卷积网络,对于每一个pixel,进行计算   
        return locations


class FCOSHead(nn.Module):
    def __init__(self, cfg, input_shape: List[ShapeSpec]):
        """
        Arguments:
            in_channels (int): number of channels of the input feature
        """
        super().__init__()
        # TODO: Implement the sigmoid version first.
        self.num_classes = cfg.MODEL.FCOS.NUM_CLASSES # num_classes 80
        self.fpn_strides = cfg.MODEL.FCOS.FPN_STRIDES # fpn_strides [8, 16, 32, 64, 128]
        head_configs = {
   "cls": (cfg.MODEL.FCOS.NUM_CLS_CONVS,
                                cfg.MODEL.FCOS.USE_DEFORMABLE),
                        "bbox": (cfg.MODEL.FCOS.NUM_BOX_CONVS,
                                 cfg.MODEL.FCOS.USE_DEFORMABLE),
                        "share": (cfg.MODEL.FCOS.NUM_SHARE_CONVS,
                                  False)}
        # head_configs = {'cls': (4, False), 'bbox': (4, False), 'share': (0, False)}
        norm = None if cfg.MODEL.FCOS.NORM == "none" else cfg.MODEL.FCOS.NORM # GN
        self.num_levels = len(input_shape) # 5
 
        in_channels = [s.channels for s in input_shape] # 256
        assert len(set(in_channels)) == 1, "Each level must have the same channel!"
        in_channels = in_channels[0] # in_channels 256
        
        # input_shape:
        # [
            # ShapeSpec(channels=256, height=None, width=None, stride=8), 
            # ShapeSpec(channels=256, height=None, width=None, stride=16), 
            # ShapeSpec(channels=256, height=None, width=None, stride=32), 
            # ShapeSpec(channels=256, height=None, width=None, stride=64), 
            # ShapeSpec(channels=256, height=None, width=None, stride=128)
        # ]
        self.in_channels_to_top_module = in_channels # 256

        for head in head_configs:
            tower = []
            num_convs, use_deformable = head_configs[head]
            for i in range(num_convs):
                if use_deformable and i == num_convs - 1:
                    conv_func = DFConv2d
                else:
                    conv_func = nn.Conv2d
                tower.append(conv_func(
                    in_channels, in_channels,
                    kernel_size=3, stride=1,
                    padding=1, bias=True
                ))
                if norm == "GN":
                    tower.append(nn.GroupNorm(32, in_channels))
                elif norm == "NaiveGN":
                    tower.append(NaiveGroupNorm(32, in_channels))
                elif norm == "BN":
                    tower.append(ModuleListDial([
                        nn.BatchNorm2d(in_channels) for _ in range(self.num_levels)
                    ]))
                elif norm == "SyncBN":
                    tower.append(ModuleListDial([
                        NaiveSyncBatchNorm(in_channels) for _ in range(self.num_levels)
                    ]))
                tower.append(nn.ReLU())
            self.add_module('{}_tower'.format(head),
                            nn.Sequential(*tower))

        self.cls_logits = nn.Conv2d(
            in_channels, self.num_classes,
            kernel_size=3, stride=1,
            padding=1
        )
        # cls_logtis 
            # Conv2d(256, 3, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
            # 256  --> 3 num_classes = 3
        self.bbox_pred = nn.Conv2d(
            in_channels, 4, kernel_size=3,
            stride=1, padding=1
        )
        # bbox_pred 
            # Conv2d(256, 4, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
            # 256  --> 4  [left, top, right, bottom] 4d-vector
        self.ctrness = nn.Conv2d(
            in_channels, 1, kernel_size=3,
            stride=1, padding=1
        )
        # ctrness 
            # Conv2d(256, 1, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
            # 256  --> 1  h * w * 1
        pdb.set_trace()

        if cfg.MODEL.FCOS.USE_SCALE: # True
            self.scales = nn.ModuleList([Scale(init_value=1.0) for _ in range(self.num_levels
  • 2
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 2
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值