YOLO改进:动态卷积之有效推理的条件参数化卷积CondConv

理论知识

CondConv理论

CondConv论文地址
CondConv代码地址

问题背景

CNN模型的能力来源于模型的大小和数据集的规模。卷积层设计中的一个基本假设是,相同的卷积核应用于数据集中的每个示例。为了增加模型的容量,模型开发人员通常会添加更多的卷积层或增加现有卷积的大小(内核高度/宽度,输入/输出通道的数量)。这会导致模型增大,影响速度。

CondConv的提出

条件参数化卷积(CondConv),它通过计算卷积核作为输入的函数来挑战静态卷积核的范式。所谓条件:条件计算的目的是在不增加计算成本的情况下增加模型容量;在条件计算模型中,通过为每个示例仅激活整个网络的一部分来实现的。多分支卷积网络:一个层由多个卷积分支组成,这些分支被聚合以计算最终输出,如:ResNet和Inception等。CondConv层在数学上相当于多分支卷积层,其中每个分支是单个卷积,输出通过加权和聚合,但只需要计算一个卷积。

CondConv的思路不难,具体来说:
CondConv
卷积核将按照上图(a)的方式进行参数化,其中W1、W2、W3就相当于多个卷积核;ROUTE FN就相当于W1、W2、W3前的权重参数。通过(a1W1+a2W2+a3W3)组合得到一个卷积核,使用这个新得到的卷积核进行卷积操作。

CondConv代码

从代码可以更为直观的理解CondConv做的事情
CondConv-Pytorch

# 权值计算,num_experts是设置的专家数量
class _routing(nn.Module):

    def __init__(self, in_channels, num_experts, dropout_rate):
        super(_routing, self).__init__()

        self.dropout = nn.Dropout(dropout_rate)
        self.fc = nn.Linear(in_channels, num_experts)

    def forward(self, x):
        x = torch.flatten(x)
        x = self.dropout(x)
        x = self.fc(x)
        return F.sigmoid(x)

# CondConv卷积过程
class CondConv2D(_ConvNd):
    def __init__(self, in_channels, out_channels, kernel_size, stride=1,
                 padding=0, dilation=1, groups=1,
                 bias=True, padding_mode='zeros', num_experts=3, dropout_rate=0.2):
        kernel_size = _pair(kernel_size) # 3 -> (3,3)
        stride = _pair(stride)
        padding = _pair(padding)
        dilation = _pair(dilation)
        super(CondConv2D, self).__init__(
            in_channels, out_channels, kernel_size, stride, padding, dilation,
            False, _pair(0), groups, bias, padding_mode)

		# 全局平均池化会将输入特征由尺寸(bs, c, w, h) -> (bs, c, 1, 1),为后续的权重计算做准备
        self._avg_pooling = functools.partial(F.adaptive_avg_pool2d, output_size=(1, 1))
        # 调用权重计算函数,输入由(bs, c, 1, 1) -> (bs, num_experts, 1, 1),通道数发生改变,和专家数量的多少有关
        self._routing_fn = _routing(in_channels, num_experts, dropout_rate)

		# 构建了一个尺寸为(num_experts, out_channels, in_channels , kernel_size, kernel_size)的Tensor变量
        self.weight = Parameter(torch.Tensor(
            num_experts, out_channels, in_channels // groups, *kernel_size))

        self.reset_parameters()

    def _conv_forward(self, input, weight):
        if self.padding_mode != 'zeros':
            return F.conv2d(F.pad(input, self._padding_repeated_twice, mode=self.padding_mode),
                            weight, self.bias, self.stride,
                            _pair(0), self.dilation, self.groups)
        return F.conv2d(input, weight, self.bias, self.stride,
                        self.padding, self.dilation, self.groups)

    def forward(self, inputs):
        b, _, _, _ = inputs.size()
        res = []
        # inputs: (bs, c, w, h) input: (c, w, h)
        for input in inputs:
            input = input.unsqueeze(0)
            pooled_inputs = self._avg_pooling(input)
            routing_weights = self._routing_fn(pooled_inputs)
            # 在维度0上求和
            kernels = torch.sum(routing_weights[:, None, None, None, None] * self.weight, 0)
            out = self._conv_forward(input, kernels)
            res.append(out)
        return torch.cat(res, dim=0)

# 测试
x = torch.rand(1, 20, 40, 40)
condconv = CondConv2D(20, 40, 3, num_experts=3)
print(condconv(x).size()) # [1, 40, 38, 38]

相关的代码注释已经标注在代码上,通过Debug代码可以更好的理解到这种计算方式与注意力机制中的通道注意力的不同。CondConv相当于构建了num_experts(out_channels, in_channels , kernel_size, kernel_size),通过融合这num_experts个卷积;而通道注意力是对单个卷积中的通道进行加权赋值的。

YOLO中使用CondConv

考虑在C3中引入CondConv模块,在common.py中粘贴下面的代码:

class _routing(nn.Module):

    def __init__(self, in_channels, num_experts, dropout_rate):
        super(_routing, self).__init__()

        self.dropout = nn.Dropout(dropout_rate)
        self.fc = nn.Linear(in_channels, num_experts)

    def forward(self, x):
        x = torch.flatten(x)
        x = self.dropout(x)
        x = self.fc(x)
        return F.sigmoid(x)


class CondConv2D(_ConvNd):

    def __init__(self, in_channels, out_channels, kernel_size, stride=1,
                 padding=0, dilation=1, groups=1,
                 bias=True, padding_mode='zeros', num_experts=3, dropout_rate=0.2):
        kernel_size = _pair(kernel_size) # 3 -> (3,3)
        stride = _pair(stride)
        padding = _pair(padding)
        dilation = _pair(dilation)
        super(CondConv2D, self).__init__(
            in_channels, out_channels, kernel_size, stride, padding, dilation,
            False, _pair(0), groups, bias, padding_mode)

        self._avg_pooling = functools.partial(F.adaptive_avg_pool2d, output_size=(1, 1))
        self._routing_fn = _routing(in_channels, num_experts, dropout_rate)

        self.weight = Parameter(torch.Tensor(
            num_experts, out_channels, in_channels // groups, *kernel_size))

        self.reset_parameters()

    def _conv_forward(self, input, weight):
        if self.padding_mode != 'zeros':
            return F.conv2d(F.pad(input, self._padding_repeated_twice, mode=self.padding_mode),
                            weight, self.bias, self.stride,
                            _pair(0), self.dilation, self.groups)
        return F.conv2d(input, weight, self.bias, self.stride,
                        self.padding, self.dilation, self.groups)

    def forward(self, inputs):
        b, _, _, _ = inputs.size()
        res = []
        for input in inputs:
            input = input.unsqueeze(0)
            pooled_inputs = self._avg_pooling(input)
            routing_weights = self._routing_fn(pooled_inputs)
            kernels = torch.sum(routing_weights[:, None, None, None, None] * self.weight, 0)
            out = self._conv_forward(input, kernels)
            res.append(out)
        return torch.cat(res, dim=0)

class C3_CondConv(nn.Module):
    # CSP Bottleneck with 3 convolutions
    def __init__(self, c1, c2, num_experts=3, n=1, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, number, shortcut, groups, expansion
        super().__init__()
        c_ = int(c2 * e)  # hidden channels
        self.cv1 = CondConv2D(c1, c_, num_experts=num_experts, kernel_size=1, stride=1)
        self.cv2 = CondConv2D(c1, c_, num_experts=num_experts, kernel_size=1, stride=1)
        self.cv3 = CondConv2D(2 * c_, c2, num_experts=num_experts, kernel_size=1)  # optional act=FReLU(c2)
        self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))

    def forward(self, x):
        return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), 1))

# -------------------------------------------测试代码,粘贴时不需要带上--------------------------------------------------
x = torch.rand(1, 20, 40, 40)
condconv = CondConv2D(20, 40, 3, num_experts=3)
print(condconv(x).size())
c3_condconv = C3_CondConv(20, 40, 3)
print(c3_condconv(x).size())

yolo.pydef parse_model(d, ch):函数中引入如下代码:

if m in {
        Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv,
        BottleneckCSP, C3, C3TR, C3SPP, C3Ghost, nn.ConvTranspose2d, DWConvTranspose2d, C3x, C3RFEM, RFEM_KCPNet, C3_CondConv}:
    c1, c2 = ch[f], args[0]
    if c2 != no:  # if not output
        c2 = make_divisible(c2 * gw, 8)

    args = [c1, c2, *args[1:]]
    if m in {BottleneckCSP, C3, C3TR, C3Ghost, C3x, C3RFEM, C3_CondConv}:
        args.insert(2, n)  # number of repeats
        n = 1

设置相关的yaml文件:

# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license

# Parameters
nc: 80  # number of classes
depth_multiple: 0.33  # model depth multiple
width_multiple: 0.50  # layer channel multiple
anchors:
  - [10,13, 16,30, 33,23]  # P3/8
  - [30,61, 62,45, 59,119]  # P4/16
  - [116,90, 156,198, 373,326]  # P5/32

# YOLOv5 v6.0 backbone
backbone:
  # [from, number, module, args]
  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
   [-1, 3, C3_CondConv, [128, 3]],
   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
   [-1, 6, C3_CondConv, [256, 3]],
   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
   [-1, 9, C3_CondConv, [512, 3]],
   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
   [-1, 3, C3_CondConv, [1024, 3]],
   [-1, 1, SPPF, [1024, 5]],  # 9
  ]

# YOLOv5 v6.0 head
head:
  [[-1, 1, Conv, [512, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
   [-1, 3, C3_CondConv, [512, 3, False]],  # 13

   [-1, 1, Conv, [256, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
   [-1, 3, C3_CondConv, [256, 3, False]],  # 17 (P3/8-small)

   [-1, 1, Conv, [256, 3, 2]],
   [[-1, 14], 1, Concat, [1]],  # cat head P4
   [-1, 3, C3_CondConv, [512, 3, False]],  # 20 (P4/16-medium)

   [-1, 1, Conv, [512, 3, 2]],
   [[-1, 10], 1, Concat, [1]],  # cat head P5
   [-1, 3, C3_CondConv, [1024, 3, False]],  # 23 (P5/32-large)

   [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
  ]

简单说明一下yaml中的相关参数:C3_CondConv, [128, 3]:128还是原先的含义,输出通道数,3是CondConv中的num_experts的值,是可以修改的。

VisDrone数据集中的实验

待续

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值