maskrcnn_benchmark理解记录——modeling\backbone\fpn.py

之前我一直不懂这个FPN怎么配合的,今天看到了,其实是在basebone.py里根据config,创建resnet后,选取指定层(Conv2~Conv5),到此文件下,进行构建FPN部分,也是在这里添加了P6层,构建成(P2~P6)完了再返给backbone.py,从而构建resnet+fpn的backbone。即build_resnet_fpn_backbone。

这个roi 是指在原图上的也就是proposals。而不是说roialign之后的roifeature
如果FPN生成的proposals越大,就在尺寸越小的特征图上取(如将proposals映射到1/32的特征图)。因为这时候框里面更可能是一个大的物体,需要在视野域更大的特征上提取特征。不然下方的特征视野域过小,看不到物体的全貌,就盲人摸象了。

        参数:
             x(list [Tensor]):每个特征层的特征图。大概是Conv2~Conv5
        返回:
             results(tuple [Tensor]):FPN图层后的特征图,大概是P2~P6  (在这里又加了一层)
                 它们是从最高分辨率开始排序的。 也就是#P2〜P6
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
import torch
import torch.nn.functional as F
from torch import nn


class FPN(nn.Module):
    """
    Module that adds FPN on top of a list of feature maps.
    The feature maps are currently supposed to be in increasing depth
    order, and must be consecutive
    """

    def __init__(
        self, in_channels_list, out_channels, conv_block, top_blocks=None
    ):
        """
        Arguments:
            in_channels_list (list[int]): number of channels for each feature map that
                will be fed            256~2048
            out_channels (int): number of channels of the FPN representation  256
            top_blocks (nn.Module or None): if provided, an extra operation will
                be performed on the output of the last (smallest resolution)
                FPN output, and the result will extend the result list
            如果提供,将对最后一个(最小分辨率)FPN输出的输出执行额外操作,结果将扩展结果列表 也就是用于MP生成P6层
        """
        super(FPN, self).__init__()
        self.inner_blocks = []
        self.layer_blocks = []
        for idx, in_channels in enumerate(in_channels_list, 1):
            inner_block = "fpn_inner{}".format(idx)
            layer_block = "fpn_layer{}".format(idx)

            if in_channels == 0:
                continue
            '''def make_conv(in_channels, out_channels, kernel_size, stride=1, dilation=1)
            膨胀的卷积核尺寸 - 1 = 膨胀系数 * (原始卷积核尺寸 - 1)padding=dilation * (kernel_size - 1) // 2,
            same
            conv = Conv2d()'''
            inner_block_module = conv_block(in_channels, out_channels, 1)       #1,1,p=0,1 大小same'
            layer_block_module = conv_block(out_channels, out_channels, 3, 1)   #3,1,p=1,1 大小same'
            self.add_module(inner_block, inner_block_module)
            self.add_module(layer_block, layer_block_module)
            self.inner_blocks.append(inner_block)
            self.layer_blocks.append(layer_block)
        self.top_blocks = top_blocks

    def forward(self, x):
        """
        Arguments:
            x (list[Tensor]): feature maps for each feature level.
        Returns:
            results (tuple[Tensor]): feature maps after FPN layers.
                They are ordered from highest resolution first.
        """
        # getattr() 函数用于返回一个对象属性值。# 获取self的属性self.inner_blocks[-1]的值
        last_inner = getattr(self, self.inner_blocks[-1])(x[-1])
        results = []
        results.append(getattr(self, self.layer_blocks[-1])(last_inner))
        for feature, inner_block, layer_block in zip(
            x[:-1][::-1], self.inner_blocks[:-1][::-1], self.layer_blocks[:-1][::-1]
        ):
            if not inner_block:
                continue
            inner_top_down = F.interpolate(last_inner, scale_factor=2, mode="nearest")  #纵向插值
            inner_lateral = getattr(self, inner_block)(feature)                         #横向连接
            # TODO use size instead of scale to make it robust to different sizes
            # inner_top_down = F.upsample(last_inner, size=inner_lateral.shape[-2:],
            # mode='bilinear', align_corners=False)
            last_inner = inner_lateral + inner_top_down
            results.insert(0, getattr(self, layer_block)(last_inner))

        if isinstance(self.top_blocks, LastLevelP6P7):
            last_results = self.top_blocks(x[-1], results[-1])
            results.extend(last_results)
        elif isinstance(self.top_blocks, LastLevelMaxPool):                             #添加p6 用于class和box
            last_results = self.top_blocks(results[-1])
            results.extend(last_results)

        return tuple(results)


class LastLevelMaxPool(nn.Module):
    def forward(self, x):
        return [F.max_pool2d(x, 1, 2, 0)]
    '''nn.MaxPool2d(kernel_size, stride=None, padding=0, dilation=1,
    return_indices=False,ceil_mode=False) 如果不按顺序写全部,那肯定是要标明的了
return_indices表示是否返回最大值所处的下标;ceil_model表示使用方格代替层结构。'''


class LastLevelP6P7(nn.Module):
    """
    This module is used in RetinaNet to generate extra layers, P6 and P7.
    """
    def __init__(self, in_channels, out_channels):
        super(LastLevelP6P7, self).__init__()
        self.p6 = nn.Conv2d(in_channels, out_channels, 3, 2, 1)
        self.p7 = nn.Conv2d(out_channels, out_channels, 3, 2, 1)
        for module in [self.p6, self.p7]:
            nn.init.kaiming_uniform_(module.weight, a=1)
            nn.init.constant_(module.bias, 0)
        self.use_P5 = in_channels == out_channels

    def forward(self, c5, p5):
        x = p5 if self.use_P5 else c5
        p6 = self.p6(x)
        p7 = self.p7(F.relu(p6))
        return [p6, p7]

 

  • 1
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值