yolov8官方代码中关于CBAM中关于通道注意力机制仅实现的平均池化操作,下面代码中实现的论文整体代码
使用自己的数据集用CBAM注意力机制,涨点3%以上。
概述
CBAM引入了两种注意力机制:通道注意力和空间注意力。通道注意力有助于增强不同通道的特征表示,而空间注意力有助于提取空间中不同位置的关键信息。
通道注意力模块
通过全局最大池化和全局平均池化–>全连接层–>Sigmoid激活–>注意力加权操作实现。
空间注意力模块
和上述通道注意力机制一样
YOLOV8官方实现代码
在ultralytics-8.2.0\ultralytics\nn\tasks.py中的parse_model()函数中添加
elif m is CBAM:
c1, c2 = ch[f], args[0]
if c2 != nc:
c2 = make_divisible(min(c2, max_channels) * width, 8)
args = [c1, *args[1:]]
在ultralytics-8.2.0\ultralytics\cfg\models\v8文件夹下创建yolov8_CBAM.yaml文件
# Ultralytics YOLO 🚀, AGPL-3.0 license
# YOLOv8 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
# Parameters
nc: 80 # number of classes
scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
# [depth, width, max_channels]
# n: [0.33, 0.25, 1024] # YOLOv8n summary: 225 layers, 3157200 parameters, 3157184 gradients, 8.9 GFLOPs
# s: [0.33, 0.50, 1024] # YOLOv8s summary: 225 layers, 11166560 parameters, 11166544 gradients, 28.8 GFLOPs
m: [0.67, 0.75, 768] # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients, 79.3 GFLOPs
# l: [1.00, 1.00, 512] # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs
# x: [1.00, 1.25, 512] # YOLOv8x summary: 365 layers, 68229648 parameters, 68229632 gradients, 258.5 GFLOPs
backbone:
# [from, repeats, module, args]
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
- [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
- [-1, 3, C2f, [128, True]]
- [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
- [-1, 6, C2f, [256, True]]
- [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
- [-1, 6, C2f, [512, True]]
- [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
- [-1, 3, C2f, [1024, True]]
- [-1, 1, SPPF, [1024, 5]] # 9
# YOLOv8.0n head
head:
- [-1, 1, nn.Upsample, [None, 2, 'nearest']]
- [[-1, 6], 1, Concat, [1]] # cat backbone P4
- [-1, 3, C2f, [512]] # 12
- [-1, 1, CBAM, [512]]
- [-1, 1, nn.Upsample, [None, 2, 'nearest']]
- [[-1, 4], 1, Concat, [1]] # cat backbone P3
- [-1, 3, C2f, [256]] # 16 (P3/8-small)
- [-1, 1, CBAM, [256]]
- [-1, 1, Conv, [256, 3, 2]]
- [[-1, 13], 1, Concat, [1]] # cat head P4
- [-1, 3, C2f, [512]] # 20 (P4/16-medium)
- [-1, 1, CBAM, [512]]
- [-1, 1, Conv, [512, 3, 2]]
- [[-1, 9], 1, Concat, [1]] # cat head P5
- [-1, 3, C2f, [1024]] # 24 (P5/32-large)
- [-1, 1, CBAM, [1024]]
- [[17, 21, 25], 1, Detect, [nc]] # Detect(P3, P4, P5)
运行结果
from n params module arguments
0 -1 1 1392 ultralytics.nn.modules.conv.Conv [3, 48, 3, 2]
1 -1 1 41664 ultralytics.nn.modules.conv.Conv [48, 96, 3, 2]
2 -1 2 111360 ultralytics.nn.modules.block.C2f [96, 96, 2, True]
3 -1 1 166272 ultralytics.nn.modules.conv.Conv [96, 192, 3, 2]
4 -1 4 813312 ultralytics.nn.modules.block.C2f [192, 192, 4, True]
5 -1 1 664320 ultralytics.nn.modules.conv.Conv [192, 384, 3, 2]
6 -1 4 3248640 ultralytics.nn.modules.block.C2f [384, 384, 4, True]
7 -1 1 1991808 ultralytics.nn.modules.conv.Conv [384, 576, 3, 2]
8 -1 2 3985920 ultralytics.nn.modules.block.C2f [576, 576, 2, True]
9 -1 1 831168 ultralytics.nn.modules.block.SPPF [576, 576, 5]
10 -1 1 0 torch.nn.modules.upsampling.Upsample [None, 2, 'nearest']
11 [-1, 6] 1 0 ultralytics.nn.modules.conv.Concat [1]
12 -1 2 1993728 ultralytics.nn.modules.block.C2f [960, 384, 2]
13 -1 1 147938 ultralytics.nn.modules.conv.CBAM [384]
14 -1 1 0 torch.nn.modules.upsampling.Upsample [None, 2, 'nearest']
15 [-1, 4] 1 0 ultralytics.nn.modules.conv.Concat [1]
16 -1 2 517632 ultralytics.nn.modules.block.C2f [576, 192, 2]
17 -1 1 37154 ultralytics.nn.modules.conv.CBAM [192]
18 -1 1 332160 ultralytics.nn.modules.conv.Conv [192, 192, 3, 2]
19 [-1, 13] 1 0 ultralytics.nn.modules.conv.Concat [1]
20 -1 2 1846272 ultralytics.nn.modules.block.C2f [576, 384, 2]
21 -1 1 147938 ultralytics.nn.modules.conv.CBAM [384]
22 -1 1 1327872 ultralytics.nn.modules.conv.Conv [384, 384, 3, 2]
23 [-1, 9] 1 0 ultralytics.nn.modules.conv.Concat [1]
24 -1 2 4207104 ultralytics.nn.modules.block.C2f [960, 576, 2]
25 -1 1 332450 ultralytics.nn.modules.conv.CBAM [576]
26 [17, 21, 25] 1 3776854 ultralytics.nn.modules.head.Detect [2, [192, 384, 576]]
``# 添加原版论文代码
在ultralytics-8.2.0\ultralytics\nn\modules\conv.py文件下将ChannelAttention、SpatialAttention、CBAM替换成如下代码
class ChannelAttention(nn.Module):
"""
CBAM混合注意力机制的通道注意力
"""
def __init__(self, in_channels, ratio=16):
super(ChannelAttention, self).__init__()
self.avg_pool = nn.AdaptiveAvgPool2d(1)
self.max_pool = nn.AdaptiveMaxPool2d(1)
self.fc = nn.Sequential(
# 全连接层
# nn.Linear(in_planes, in_planes // ratio, bias=False),
# nn.ReLU(),
# nn.Linear(in_planes // ratio, in_planes, bias=False)
# 利用1x1卷积代替全连接,避免输入必须尺度固定的问题,并减小计算量
nn.Conv2d(in_channels, in_channels // ratio, 1, bias=False),
nn.ReLU(inplace=True),
nn.Conv2d(in_channels // ratio, in_channels, 1, bias=False)
)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
avg_out = self.fc(self.avg_pool(x))
max_out = self.fc(self.max_pool(x))
out = avg_out + max_out
out = self.sigmoid(out)
return out * x
class SpatialAttention(nn.Module):
"""
CBAM混合注意力机制的空间注意力
"""
def __init__(self, kernel_size=7):
super(SpatialAttention, self).__init__()
assert kernel_size in (3, 7), 'kernel size must be 3 or 7'
padding = 3 if kernel_size == 7 else 1
self.conv1 = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
out = torch.cat([avg_out, max_out], dim=1)
out = self.sigmoid(self.conv1(out))
return out * x
class CBAM(nn.Module):
"""
CBAM混合注意力机制
"""
def __init__(self, in_channels, ratio=16, kernel_size=3):
super(CBAM, self).__init__()
self.channelattention = ChannelAttention(in_channels, ratio=ratio)
self.spatialattention = SpatialAttention(kernel_size=kernel_size)
def forward(self, x):
x = self.channelattention(x)
x = self.spatialattention(x)
return x