文章目录
FPN和PAN都是用于解决在目标检测中特征金字塔网络(FPN)在多尺度检测任务上的不足的方法。下面分别详细介绍一下它们的原理和区别。
0. 前言
目标检测器的构成
-
Input:Image,Patches,ImagePyramid
-
Backbones:VGG16,ResNet(ResNet-18、ResNet-34、ResNet-50、ResNet-101、ResNet-152),SpineNet,EfficientNet-B0/B7,CSPResNeXt50,CSPDarknet53,MobileNet(v1、v2、v3),ShuffleNet(v1、v2) ,GhostNet
-
Neck:
Additional blocks:SPP,ASPP,RFB,SAM
Path-aggregation blocks:FPN,PAN,NAS-FPN,Fully-connectedFPN,BiFPN,ASFF,SFAM -
Heads:
Dense Prediction(one-stage):
RPN,SSD,YOLO,RetinaNet(anchorbased)
CornerNet,CenterNet,MatrixNet,FCOS(FCOSv1、FCOSv2),ATSS,PAA(anchorfree)
SparsePrediction(two-stage):
FasterR-CNN,R-FCN,MaskR-CNN(anchorbased)
RepPoints(anchorfree)
Neck部分的设计是多种多样的
(a) FPN
(b) PANet
(c) NAS-FPN
(d) BiFPN
1. FPN
FPN全称Feature Pyramid Network,是由FAIR在2017年提出的一种处理多尺度问题的方法。FPN的主要思路是通过构建金字塔式的特征图来提取不同尺度下的目标特征,进而提高检测精度。
FPN的构建方式是从高分辨率的特征图开始向下采样,同时从低分辨率的特征图开始向上采样,将它们连接起来形成金字塔。在这个过程中,每一层特征图的信息都会与上下相邻层的特征图融合,这样可以使得高层特征图中的目标信息得以保留,同时低层特征图中的背景信息也可以被高层特征图所补充。经过这样的处理,FPN可以提高模型在多尺度检测任务上的精度,同时还可以在不影响检测速度的情况下提高检测速度。
import collections
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import init
from core.config import cfg
import utils.net as net_utils
import modeling.ResNet as ResNet
from modeling.generate_anchors import generate_anchors
from modeling.generate_proposals import GenerateProposalsOp
from modeling.collect_and_distribute_fpn_rpn_proposals import CollectAndDistributeFpnRpnProposalsOp
import nn as mynn
# Lowest and highest pyramid levels in the backbone network. For FPN, we assume
# that all networks have 5 spatial reductions, each by a factor of 2. Level 1
# would correspond to the input image, hence it does not make sense to use it.
LOWEST_BACKBONE_LVL = 2 # E.g., "conv2"-like level
HIGHEST_BACKBONE_LVL = 5 # E.g., "conv5"-like level
# ---------------------------------------------------------------------------- #
# FPN with ResNet
# ---------------------------------------------------------------------------- #
def fpn_ResNet50_conv5_body():
return fpn(
ResNet.ResNet50_conv5_body, fpn_level_info_ResNet50_conv5()
)
def fpn_ResNet50_conv5_body_bup():
return fpn(
ResNet.ResNet50_conv5_body, fpn_level_info_ResNet50_conv5(),
panet_buttomup=True
)
def fpn_ResNet50_conv5_P2only_body():
return fpn(
ResNet.ResNet50_conv5_body,
fpn_level_info_ResNet50_conv5(),
P2only=True
)
def fpn_ResNet101_conv5_body():
return fpn(
ResNet.ResNet101_conv5_body, fpn_level_info_ResNet101_conv5()
)
def fpn_ResNet101_conv5_P2only_body():
return fpn(
ResNet.ResNet101_conv5_body,
fpn_level_info_ResNet101_conv5(),
P2only=True
)
def fpn_ResNet152_conv5_body():
return fpn(
ResNet.ResNet152_conv5_body, fpn_level_info_ResNet152_conv5()
)
def fpn_ResNet152_conv5_P2only_body():
return fpn(
ResNet.ResNet152_conv5_body,
fpn_level_info_ResNet152_conv5(),
P2only=True
)
# ---------------------------------------------------------------------------- #
# Functions for bolting FPN onto a backbone architectures
# ---------------------------------------------------------------------------- #
class fpn(nn.Module):
"""Add FPN connections based on the model described in the FPN paper.
fpn_output_blobs is in reversed order: e.g [fpn5, fpn4, fpn3, fpn2]
similarly for fpn_level_info.dims: e.g [2048, 1024, 512, 256]
similarly for spatial_scale: e.g [1/32, 1/16, 1/8, 1/4]
"""
def __init__(self, conv_body_func, fpn_level_info, P2only=False, panet_buttomup=False):
super().__init__()
self.fpn_level_info = fpn_level_info
self.P2only = P2only
self.panet_buttomup = panet_buttomup
self.dim_out = fpn_dim = cfg.FPN.DIM
min_level, max_level = get_min_max_levels()
self.num_backbone_stages = len(fpn_level_info.blobs) - (min_level - LOWEST_BACKBONE_LVL)
fpn_dim_lateral = fpn_level_info.dims
self.spatial_scale = [] # a list of scales for FPN outputs
#
# Step 1: recursively build down starting from the coarsest backbone level
#
# For the coarest backbone level: 1x1 conv only seeds recursion
self.conv_top = nn.Conv2d(fpn_dim_lateral[0], fpn_dim, 1, 1, 0)
if cfg.FPN.USE_GN:
self.conv_top = nn.Sequential(
nn.Conv2d(fpn_dim_lateral[0], fpn_dim, 1, 1, 0, bias=False),
nn.GroupNorm(net_utils.get_group_gn(fpn_dim), fpn_dim,
eps=cfg.GROUP_NORM.EPSILON)
)
else:
self.conv_top = nn.Conv2d(fpn_dim_lateral[0], fpn_dim, 1, 1, 0)
self.topdown_lateral_modules = nn.ModuleList()
self.posthoc_modules = nn.ModuleList()
# For other levels add top-down and lateral connections
for i in range(self.num_backbone_stages - 1):
self.topdown_lateral_modules.append(
topdown_lateral_module(fpn_dim, fpn_dim_lateral[i+1])
)
# Post-hoc scale-specific 3x3 convs
for i in range(self.num_backbone_stages):
if cfg.FPN.USE_GN:
self.posthoc_modules.append(nn.Sequential(
nn.Conv2d(fpn_dim, fpn_dim, 3, 1, 1, bias=False),
nn.GroupNorm(net_utils.get_group_gn(fpn_dim), fpn_dim,
eps=cfg.GROUP_NORM.EPSILON)
))
else:
self.posthoc_modules.append(
nn.Conv2d(fpn_dim, fpn_dim, 3, 1, 1)
)
self.spatial_scale.append(fpn_level_info.spatial_scales[i])
# add for panet buttom-up path
if self.panet_buttomup:
self.panet_buttomup_conv1_modules = nn.ModuleList()
self.panet_buttomup_conv2_modules = nn.ModuleList()
for i in range(self.num_backbone_stages - 1):
if cfg.FPN.USE_GN:
self.panet_buttomup_conv1_modules.append(nn.Sequential(
nn.Conv2d(fpn_dim, fpn_dim, 3, 2, 1, bias=True),
nn.GroupNorm(net_utils.get_group_gn(fpn_dim), fpn_dim,
eps=cfg.GROUP_NORM.EPSILON),
nn.ReLU(inplace=True)
))
self.panet_buttomup_conv2_modules.append(nn.Sequential(
nn.Conv2d(fpn_dim, fpn_dim, 3, 1, 1, bias=True),
nn.GroupNorm(net_utils.get_group_gn(fpn_dim), fpn_dim,
eps=cfg.GROUP_NORM.EPSILON),
nn.ReLU(inplace=True)
))
else:
self.panet_buttomup_conv1_modules.append(
nn.Conv2d(fpn_dim, fpn_dim, 3, 2, 1)
)
self.panet_buttomup_conv2_modules.append(
nn.Conv2d(fpn_dim, fpn_dim, 3, 1, 1)
)
#self.spatial_scale.append(fpn_level_info.spatial_scales[i])
#
# Step 2: build up starting from the coarsest backbone level
#
# Check if we need the P6 feature map
if not cfg.FPN.EXTRA_CONV_LEVELS