每日Attention学习11——Lightweight Dilated Bottleneck

最新推荐文章于 2024-09-14 20:22:29 发布

xiongxyowo

最新推荐文章于 2024-09-14 20:22:29 发布

阅读量448

点赞数 4

文章标签：论文阅读

本文链接：https://blog.csdn.net/qq_40714949/article/details/140382236

版权

模块出处

[TITS 23] [link] [code] Lightweight Real-Time Semantic Segmentation Network With Efficient Transformer and CNN

模块名称

Lightweight Dilated Bottleneck (LDB)

模块作用

改进的编码器块

模块结构

在这里插入图片描述

模块代码

import torch
import torch.nn as nn
import torch.nn.functional as F


class Conv(nn.Module):
    def __init__(self, nIn, nOut, kSize, stride, padding, dilation=(1, 1), groups=1, bn_acti=False, bias=False):
        super().__init__()

        self.bn_acti = bn_acti

        self.conv = nn.Conv2d(nIn, nOut, kernel_size=kSize,
                              stride=stride, padding=padding,
                              dilation=dilation, groups=groups, bias=bias)

        if self.bn_acti:
            self.bn_prelu = BNPReLU(nOut)

    def forward(self, input):
        output = self.conv(input)

        if self.bn_acti:
            output = self.bn_prelu(output)

        return output


class BNPReLU(nn.Module):
    def __init__(self, nIn):
        super().__init__()
        self.bn = nn.BatchNorm2d(nIn, eps=1e-3)
        self.acti = nn.PReLU(nIn)

    def forward(self, input):
        output = self.bn(input)
        output = self.acti(output)

        return output
    

class ShuffleBlock(nn.Module):
    def __init__(self, groups):
        super(ShuffleBlock, self).__init__()
        self.groups = groups

    def forward(self, x):
        '''Channel shuffle: [N,C,H,W] -> [N,g,C/g,H,W] -> [N,C/g,g,H,w] -> [N,C,H,W]'''
        N, C, H, W = x.size()
        g = self.groups
        #
        return x.view(N, g, int(C / g), H, W).permute(0, 2, 1, 3, 4).contiguous().view(N, C, H, W)


class eca_layer(nn.Module):
    """Constructs a ECA module.
    Args:
        channel: Number of channels of the input feature map
        k_size: Adaptive selection of kernel size
    """

    def __init__(self, channel, k_size=3):
        super(eca_layer, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.conv = nn.Conv1d(1, 1, kernel_size=k_size, padding=(k_size - 1) // 2, bias=False)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        b, c, h, w = x.size()

        # feature descriptor on the global spatial information
        y = self.avg_pool(x)

        # Two different branches of ECA module
        y = self.conv(y.squeeze(-1).transpose(-1, -2)).transpose(-1, -2).unsqueeze(-1)

        # Multi-scale information fusion
        y = self.sigmoid(y)

        return x * y.expand_as(x)

class LDB(nn.Module):
    def __init__(self, nIn, d=1, kSize=3, dkSize=3):
        super().__init__()

        self.bn_relu_1 = BNPReLU(nIn)
        self.conv1x1_in = Conv(nIn, nIn // 2, 1, 1, padding=0, bn_acti=False)
        self.conv3x1 = Conv(nIn // 2, nIn // 2, (kSize, 1), 1, padding=(1, 0), bn_acti=True)
        self.conv1x3 = Conv(nIn // 2, nIn // 2, (1, kSize), 1, padding=(0, 1), bn_acti=True)

        self.dconv3x1 = Conv(nIn // 2, nIn // 2, (dkSize, 1), 1, padding=(1, 0), groups=nIn // 2, bn_acti=True)
        self.dconv1x3 = Conv(nIn // 2, nIn // 2, (1, dkSize), 1, padding=(0, 1), groups=nIn // 2, bn_acti=True)
        self.ca11 = eca_layer(nIn // 2)
        
        self.ddconv3x1 = Conv(nIn // 2, nIn // 2, (dkSize, 1), 1, padding=(1 * d, 0), dilation=(d, 1), groups=nIn // 2, bn_acti=True)
        self.ddconv1x3 = Conv(nIn // 2, nIn // 2, (1, dkSize), 1, padding=(0, 1 * d), dilation=(1, d), groups=nIn // 2, bn_acti=True)
        self.ca22 = eca_layer(nIn // 2)

        self.bn_relu_2 = BNPReLU(nIn // 2)
        self.conv1x1 = Conv(nIn // 2, nIn, 1, 1, padding=0, bn_acti=False)
        self.shuffle = ShuffleBlock(nIn // 2)
        
    def forward(self, input):
        output = self.bn_relu_1(input)
        output = self.conv1x1_in(output)
        output = self.conv3x1(output)
        output = self.conv1x3(output)
        
        br1 = self.dconv3x1(output)
        br1 = self.dconv1x3(br1)
        br1 = self.ca11(br1)
        
        br2 = self.ddconv3x1(output)
        br2 = self.ddconv1x3(br2)
        br2 = self.ca22(br2)

        output = br1 + br2 + output
        output = self.bn_relu_2(output)
        output = self.conv1x1(output)
        output = self.shuffle(output + input)

        return output

if __name__ == '__main__':
    x = torch.randn([3, 256, 32, 32])
    ldb = LDB(nIn=256)
    out = ldb(x)
    print(out.shape)  # 3, 256, 32, 32

原文表述

LDB的结构整体上借鉴了ResNet的思想，将模块设计为残差模块，以在网络层数尽可能少的情况下收集更多的特征信息。具体来说，在bottleneck处，通过1×1卷积将输入特征的通道数减半，减少通道数之后，参数量和计算量大大减少，虽然这样会损失一部分准确率，但此时多堆叠两个模块比弥补损失更为有利。同时，由于使用了1×1卷积，必须加深网络深度才能获得更大的感受野，因此在1×1卷积之后，增加了3×1和1×3的分解卷积，以拓展感受野，从而捕捉更大范围的上下文信息。而且分解卷积也是基于考虑参数数量和计算量。同样，在接下来的双分支结构中，两个分支也都使用了分解卷积，其中一个负责局部、短距离的特征信息，另一个则使用了空洞卷积，负责在不同空洞率下从更大的感受野中提取特征信息。紧接着这两个分支的是通道注意力机制，其灵感来自ECANet，旨在在通道维度上构建注意力矩阵，以增强特征表达，抑制噪声干扰，因为对于CNN来说，大部分特征信息都包含在通道中。然后，将两个低维分支和中间特征融合，输入到下面的1×1逐点卷积中，以将特征图的通道数恢复为与输入特征图的通道数相同。最后，采用channel shuffle的策略，避免depth-wise convolution带来的信息独立、通道间无相关性的弊端，促进不同通道间语义信息的交换。