基于ACNet构建ResNet50backbone的Pytorch代码实现

ACNet是什么?

[1]X. Ding, Y. Guo, G. Ding etal.“ACNet: Strengthening the Kernel Skeletons for Powerful CNN via Asymmetric Convolution Blocks” in ICCV 2019

推荐博客:https://blog.csdn.net/practical_sharp/article/details/114671943

在这里插入图片描述

Overview of ACNet. For example, we replace every 3 × 3 layer with an ACB comprising three layers with 3 × 3, 1×3 and 3×1 kernels, respectively, and their outputs are summed up. When the training is completed, we convert the model back into the same structure as the original by adding the asymmetric kernels in each ACB onto the skeleton, which is the crisscross part of the square kernel, as marked on the figure. In practice, this conversion is implemented by building a new model with the original structure and using the converted learned parameters of the ACNet to initialize it.

总结来说ACnet就是非对称卷积,能够学习到更多特征,以ACNet构建的backbone能提高网络在 CIFAR-10, CIFAR-100, and ImageNet上的分类性能。

接下来我准备使用ACNet构造ResNet50的backbone来训练目标检测模型,测试一下模型能否存在性能提升问题。

import torch
from torch import nn as nn
# 去掉因为3x3卷积的padding多出来的行或者列
class CropLayer(nn.Module):

    #   E.g., (-1, 0) means this layer should crop the first and last rows of the feature map. And (0, -1) crops the first and last columns
    def __init__(self, crop_set):
        super(CropLayer, self).__init__()
        self.rows_to_crop = - crop_set[0]
        self.cols_to_crop = - crop_set[1]
        assert self.rows_to_crop >= 0
        assert self.cols_to_crop >= 0

    def forward(self, input):
        return input[:, :, self.rows_to_crop:-self.rows_to_crop, self.cols_to_crop:-self.cols_to_crop]

# 论文提出的3x3+1x3+3x1,其中3*3卷积还是由self.conv2来实现
# 实现一个AC卷积块只包含1*3和3*1这两个不对称卷积,这两个部分是没有与预训练权重的
# 使用这个模块的时候 padding必须先定义为 = 1
class ACBlock(nn.Module):

    def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, padding_mode='zeros', deploy=False):
        super(ACBlock, self).__init__()
        self.deploy = deploy
        if deploy:
            self.fused_conv = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=(kernel_size,kernel_size), stride=stride,
                                      padding=padding, dilation=dilation, groups=groups, bias=True, padding_mode=padding_mode)
        else:
            center_offset_from_origin_border = padding - kernel_size // 2
            ver_pad_or_crop = (center_offset_from_origin_border + 1, center_offset_from_origin_border)
            hor_pad_or_crop = (center_offset_from_origin_border, center_offset_from_origin_border + 1)
            if center_offset_from_origin_border >= 0:
                self.ver_conv_crop_layer = nn.Identity()
                ver_conv_padding = ver_pad_or_crop
                self.hor_conv_crop_layer = nn.Identity()
                hor_conv_padding = hor_pad_or_crop
            else:
                self.ver_conv_crop_layer = CropLayer(crop_set=ver_pad_or_crop)
                ver_conv_padding = (0, 0)
                self.hor_conv_crop_layer = CropLayer(crop_set=hor_pad_or_crop)
                hor_conv_padding = (0, 0)
            self.ver_conv = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=(3, 1),
                                      stride=stride,
                                      padding=ver_conv_padding, dilation=dilation, groups=groups, bias=False,
                                      padding_mode=padding_mode)

            self.hor_conv = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=(1, 3),
                                      stride=stride,
                                      padding=hor_conv_padding, dilation=dilation, groups=groups, bias=False,
                                      padding_mode=padding_mode)
            self.ver_bn = nn.BatchNorm2d(num_features=out_channels)
            self.hor_bn = nn.BatchNorm2d(num_features=out_channels)


    # forward function
    def forward(self, input):
        if self.deploy:
            return self.fused_conv(input)
        else:
            # square_outputs = self.square_conv(input)
            # square_outputs = self.square_bn(square_outputs)
            # print(square_outputs.size())
          
            vertical_outputs = self.ver_conv_crop_layer(input)
            vertical_outputs = self.ver_conv(vertical_outputs)
            vertical_outputs = self.ver_bn(vertical_outputs)
            # print(vertical_outputs.size())

            horizontal_outputs = self.hor_conv_crop_layer(input)
            horizontal_outputs = self.hor_conv(horizontal_outputs)
            horizontal_outputs = self.hor_bn(horizontal_outputs)
            # print(horizontal_outputs.size())
            return vertical_outputs + horizontal_outputs

# this is the original ResNet Block, uesing the ACBlock replace the 3*3 conv 
class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_channel, out_channel, stride=1, downsample=None, norm_layer=None):
        super(Bottleneck, self).__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        # 1*1 的卷积  不需要使用 ACBlock
        self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel,
                               kernel_size=1, stride=1, bias=False)  # squeeze channels
        self.bn1 = norm_layer(out_channel)
        # -----------------------------------------
        # 3*3 的卷积  需要使用 ACBlock
        self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel,
                              kernel_size=3, stride=stride, bias=False, padding=1)
        
        self.ACBlock = ACBlock(in_channels = out_channel,out_channels= out_channel,
                            kernel_size = 3, stride=stride, padding=1, dilation=1, 
                            groups=1, padding_mode='zeros', deploy=False)
    
        self.bn2 = norm_layer(out_channel)
        # -----------------------------------------
        # 1*1 的卷积  不需要使用 ACBlock
        self.conv3 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel * self.expansion,
                               kernel_size=1, stride=1, bias=False)  # unsqueeze channels
        self.bn3 = norm_layer(out_channel * self.expansion)
        self.relu = nn.ReLU(inplace=True)
        # some ResNet Block maybe need downsample to decrease the size of feature map(usually 2X)
        # the definition of downsample module in class ResNet 
        self.downsample = downsample
    # original ResNet Block
    def forward(self, x):
        identity = x
        if self.downsample is not None:
            identity = self.downsample(x)

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        square_outputs = self.conv2(out) 
        other_outputs = self.ACBlock(out)
        out = square_outputs + other_outputs  
        out = self.bn2(out)
        out = self.relu(out)

        

        out = self.conv3(out)
        out = self.bn3(out)

        out += identity
        out = self.relu(out)

        return out


x = torch.Tensor(8,3,256,256)
ac = ACBlock(in_channels = 3, out_channels = 64, kernel_size = 3, stride=1, padding=1, dilation=1, groups=1, padding_mode='zeros', deploy=False)
print(ac)
ACBlock的原本结构

在这里插入图片描述

但是为了预训练权重方便,还是保留了BottleBlock的conv2,所以ACBBlock只剩下了13和31的预训练权重。

在这里插入图片描述

如何用ACBlock构建ResNet50,使用ACBlock这个类代替普通的3*3的卷积就好了

## 接上面的代码
## 完全不需要改变
class ResNet(nn.Module):

    def __init__(self, block, blocks_num, num_classes=1000, include_top=True, norm_layer=None):
        super(ResNet, self).__init__()

        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        self._norm_layer = norm_layer

        self.include_top = include_top 
        self.in_channel = 64
        # 输入图像都是3个通道的,最开始进入ResNet的网络channel = 3
        # 第一个卷积层也是一个下采样,stride = 2 会造成下采样X2
        self.conv1 = nn.Conv2d(3, self.in_channel, kernel_size=7, stride=2,
                               padding=3, bias=False)
        self.bn1 = norm_layer(self.in_channel)
        self.relu = nn.ReLU(inplace=True)
        #  池化层,会损失很多信息,stride = 2 会造成下采样X2
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        
        self.layer1 = self._make_layer(block, 64, blocks_num[0])
        self.layer2 = self._make_layer(block, 128, blocks_num[1], stride=2)   # 这里传入的stride = 2,需要下采样
        self.layer3 = self._make_layer(block, 256, blocks_num[2], stride=2)   # 这里传入的stride = 2,需要下采样
        self.layer4 = self._make_layer(block, 512, blocks_num[3], stride=2)   # 这里传入的stride = 2,需要下采样
        # 图像分类需要有最后的全连接层,定义目标检测模型时候 self.include_top = False
        if self.include_top:
            self.avgpool = nn.AdaptiveAvgPool2d((1, 1))  # output size = (1, 1)
            self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')

    def _make_layer(self, block, channel, block_num, stride=1):
        norm_layer = self._norm_layer
        downsample = None
        # 如果需要下采样了,self.in_channel == channel * block.expansion就不满足了
        if stride != 1 or self.in_channel != channel * block.expansion:
            
            downsample = nn.Sequential(
                nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False),
                norm_layer(channel * block.expansion))
        # 先判断第一个block是否需要下采样
        layers = []
        layers.append(block(self.in_channel, channel, downsample=downsample,
                            stride=stride, norm_layer=norm_layer))
        self.in_channel = channel * block.expansion
        # 从1开始循环而不是从0开始循环
        for _ in range(1, block_num):
            layers.append(block(self.in_channel, channel, norm_layer=norm_layer))

        return nn.Sequential(*layers)

    def forward(self, x):
        #  eg. x = torch.Tensor(8,3,256,256)
        x = self.conv1(x)
        # after the first conv,torch.Size([8, 64, 128, 128])
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        # after maxpooling :torch.Size([8, 64, 64, 64])
        
        x = self.layer1(x)
        # torch.Size([8, 256, 64, 64])

        x = self.layer2(x)
        # torch.Size([8, 512, 32, 32])

        x = self.layer3(x)
        # torch.Size([8, 1024, 16, 16])
       
        x = self.layer4(x)
        # torch.Size([8, 2048, 8, 8])

        # object detection module need not this if sentence
        if self.include_top:
            x = self.avgpool(x)
            x = torch.flatten(x, 1)
            x = self.fc(x)

        return x



"""
x = torch.Tensor(8,3,256,256)
net  = ResNet(Bottleneck,[3,5,6,3],include_top=False)
print(net)
out = net(x)
print(out.size())
"""

具体的backbone在目标检测中是否有效果,还是等实验之后再来完善。

有一个疑问就是,这样定义卷积没有预训练模型,不知道是否如意。

使用ACBlock替换之后的ResNet是否还有pretrain模型?需要在网上百度百度。

  • 3
    点赞
  • 23
    收藏
    觉得还不错? 一键收藏
  • 7
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 7
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值