1. Ghostnetv2介绍
1.1 摘要:轻量级卷积神经网络(CNN)专为移动设备上的应用而设计,具有更快的推理速度。 卷积运算只能捕获窗口区域的局部信息,阻碍了性能的进一步提升。 在卷积中引入self-attention可以很好地捕捉全局信息,但会很大程度上拖累实际速度。 在本文中,我们提出了一种硬件友好的注意力机制(称为 DFC 注意力),然后为移动应用程序提出了一种新的 GhostNetV2 架构。 所提出的DFC注意力是基于全连接层构建的,它不仅可以在通用硬件上快速执行,而且可以捕获远程像素之间的依赖性。 我们进一步回顾了之前 GhostNet 中的表达能力瓶颈,并建议通过 DFC 注意力增强廉价操作产生的扩展特征,以便 GhostNetV2 块可以同时聚合本地和远程信息。 大量实验证明了 GhostNetV2 相对于现有架构的优越性。 例如,它在 ImageNet 上以 167M FLOP 实现了 75.3% 的 top-1 准确率,在类似的计算成本下显着抑制了 GhostNetV1 (74.5%)。
官方论文地址:https://arxiv.org/pdf/2211.12905v1
1.2 简单介绍:
GhostNetV2模块是一种新型的轻量级视觉骨干网络,它通过引入一种称为DFC(decoupled fully connected)注意力机制来增强模型对长距离空间信息的捕捉能力。这种注意力机制设计用于在保持计算效率的同时,捕获像素之间在较大范围内依赖关系的能力。
在GhostNetV2中,DFC注意力被集成到Ghost模块中,以改善其表达能力。具体来说,输入特征首先通过Ghost模块生成内在特征和廉价操作产生的更多特征。这些特征沿着通道维度连接起来形成输出特征。为了捕获不同空间像素之间的长距离依赖性,GhostNetV2还采用了DFC注意力机制。这个机制将一个全连接层分解为水平和垂直两个方向上的全连接层,分别沿着这两个方向聚合卷积神经网络(CNN)中的2D特征图的像素。
此外,GhostNetV2的设计还包括了下采样操作,以减少特征的大小,从而在较小的特征上应用DFC注意力,并通过上采样操作将其恢复至原始大小。这种下采样操作有助于降低DFC注意力分支的计算成本,并加速实际的推理速度。
最后,GhostNetV2采用了一个包含两个Ghost模块的反向残差瓶颈结构。其中,第一个Ghost模块作为扩展层增加输出通道的数量,第二个Ghost模块则减少通道数以匹配快捷路径。通过在扩展特性上实现DFC注意力,可以有效增强表示能力。
1.3 模块结构
2. 核心代码
import torch
import torch.nn as nn
import torch.nn.functional as F
import math
from timm.models import register_model
def _make_divisible(v, divisor, min_value=None):
if min_value is None:
min_value = divisor
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
# Make sure that round down does not go down by more than 10%.
if new_v < 0.9 * v:
new_v += divisor
return new_v
def hard_sigmoid(x, inplace: bool = False):
if inplace:
return x.add_(3.).clamp_(0., 6.).div_(6.)
else:
return F.relu6(x + 3.) / 6.
class SqueezeExcite(nn.Module):
def __init__(self, in_chs, se_ratio=0.25, reduced_base_chs=None,
act_layer=nn.ReLU, gate_fn=hard_sigmoid, divisor=4, **_):
super(SqueezeExcite, self).__init__()
self.gate_fn = gate_fn
reduced_chs = _make_divisible((reduced_base_chs or in_chs) * se_ratio, divisor)
self.avg_pool = nn.AdaptiveAvgPool2d(1)
self.conv_reduce = nn.Conv2d(in_chs, reduced_chs, 1, bias=True)
self.act1 = act_layer(inplace=True)
self.conv_expand = nn.Conv2d(reduced_chs, in_chs, 1, bias=True)
def forward(self, x):
x_se = self.avg_pool(x)
x_se = self.conv_reduce(x_se)
x_se = self.act1(x_se)
x_se = self.conv_expand(x_se)
x = x * self.gate_fn(x_se)
return x
class ConvBnAct(nn.Module):
def __init__(self, in_chs, out_chs, kernel_size,
stride=1, act_layer=nn.ReLU):
super(ConvBnAct, self).__init__()
self.conv = nn.Conv2d(in_chs, out_chs, kernel_size, stride, kernel_size // 2, bias=False)
self.bn1 = nn.BatchNorm2d(out_chs)
self.act1 = act_layer(inplace=True)
def forward(self, x):
x = self.conv(x)
x = self.bn1(x)
x = self.act1(x)
return x
class GhostModuleV2(nn.Module):
def __init__(self, inp, oup, kernel_size=1, ratio=2, dw_size=3, stride=1, relu=True, mode=None, args=None):
super(GhostModuleV2, self).__init__()
self.mode = mode
self.gate_fn = nn.Sigmoid()
if self.mode in ['original']:
self.oup = oup
init_channels = math.ceil(oup / ratio)
new_channels = init_channels * (ratio - 1)
self.primary_conv = nn.Sequential(
nn.Conv2d(inp, init_channels, kernel_size, stride, kernel_size // 2, bias=False),
nn.BatchNorm2d(init_channels),
nn.ReLU(inplace=True) if relu else nn.Sequential(),
)
self.cheap_operation = nn.Sequential(
nn.Conv2d(init_channels, new_channels, dw_size, 1, dw_size // 2, groups=init_channels, bias=False),
nn.BatchNorm2d(new_channels),
nn.ReLU(inplace=True) if relu else nn.Sequential(),
)
elif self.mode in ['attn']:
self.oup = oup
init_channels = math.ceil(oup / ratio)
new_channels = init_channels * (ratio - 1)
self.primary_conv = nn.Sequential(
nn.Conv2d(inp, init_channels, kernel_size, stride, kernel_size // 2, bias=False),
nn.BatchNorm2d(init_channels),
nn.ReLU(inplace=True) if relu else nn.Sequential(),
)
self.cheap_operation = nn.Sequential(
nn.Conv2d(init_channels, new_channels, dw_size, 1, dw_size // 2, groups=init_channels, bias=False),
nn.BatchNorm2d(new_channels),
nn.ReLU(inplace=True) if relu else nn.Sequential(),
)
self.short_conv = nn.Sequential(
nn.Conv2d(inp, oup, kernel_size, stride, kernel_size // 2, bias=False),
nn.BatchNorm2d(oup),
nn.Conv2d(oup, oup, kernel_size=(1, 5), stride=1, padding=(0, 2), groups=oup, bias=False),
nn.BatchNorm2d(oup),
nn.Conv2d(oup, oup, kernel_size=(5, 1), stride=1, padding=(2, 0), groups=oup, bias=False),
nn.BatchNorm2d(oup),
)
def forward(self, x):
if self.mode in ['original']:
x1 = self.primary_conv(x)
x2 = self.cheap_operation(x1)
out = torch.cat([x1, x2], dim=1)
return out[:, :self.oup, :, :]
elif self.mode in ['attn']:
res = self.short_conv(F.avg_pool2d(x, kernel_size=2, stride=2))
x1 = self.primary_conv(x)
x2 = self.cheap_operation(x1)
out = torch.cat([x1, x2], dim=1)
return out[:, :self.oup, :, :] * F.interpolate(self.gate_fn(res), size=(out.shape[-2], out.shape[-1]),
mode='nearest')
class GhostBottleneckV2(nn.Module):
def __init__(self, in_chs, mid_chs, out_chs, dw_kernel_size=3,
stride=1, act_layer=nn.ReLU, se_ratio=0., layer_id=None, args=None):
super(GhostBottleneckV2, self).__init__()
has_se = se_ratio is not None and se_ratio > 0.
self.stride = stride
# Point-wise expansion
if layer_id <= 1:
self.ghost1 = GhostModuleV2(in_chs, mid_chs, relu=True, mode='original', args=args)
else:
self.ghost1 = GhostModuleV2(in_chs, mid_chs, relu=True, mode='attn', args=args)
# Depth-wise convolution
if self.stride > 1:
self.conv_dw = nn.Conv2d(mid_chs, mid_chs, dw_kernel_size, stride=stride,
padding=(dw_kernel_size - 1) // 2, groups=mid_chs, bias=False)
self.bn_dw = nn.BatchNorm2d(mid_chs)
# Squeeze-and-excitation
if has_se:
self.se = SqueezeExcite(mid_chs, se_ratio=se_ratio)
else:
self.se = None
self.ghost2 = GhostModuleV2(mid_chs, out_chs, relu=False, mode='original', args=args)
# shortcut
if (in_chs == out_chs and self.stride == 1):
self.shortcut = nn.Sequential()
else:
self.shortcut = nn.Sequential(
nn.Conv2d(in_chs, in_chs, dw_kernel_size, stride=stride,
padding=(dw_kernel_size - 1) // 2, groups=in_chs, bias=False),
nn.BatchNorm2d(in_chs),
nn.Conv2d(in_chs, out_chs, 1, stride=1, padding=0, bias=False),
nn.BatchNorm2d(out_chs),
)
def forward(self, x):
residual = x
x = self.ghost1(x)
if self.stride > 1:
x = self.conv_dw(x)
x = self.bn_dw(x)
if self.se is not None:
x = self.se(x)
x = self.ghost2(x)
x += self.shortcut(residual)
return x
class GhostNetV2(nn.Module):
def __init__(self, cfgs, num_classes=1000, width=1.0, dropout=0.2, block=GhostBottleneckV2, args=None):
super(GhostNetV2, self).__init__()
self.cfgs = cfgs
self.dropout = dropout
self.num_classes = num_classes
# building first layer
output_channel = _make_divisible(16 * width, 4)
self.conv_stem = nn.Conv2d(3, output_channel, 3, 2, 1, bias=False)
self.bn1 = nn.BatchNorm2d(output_channel)
self.act1 = nn.ReLU(inplace=True)
input_channel = output_channel
# building inverted residual blocks
stages = []
# block = block
layer_id = 0
for cfg in self.cfgs:
layers = []
for k, exp_size, c, se_ratio, s in cfg:
output_channel = _make_divisible(c * width, 4)
hidden_channel = _make_divisible(exp_size * width, 4)
if block == GhostBottleneckV2:
layers.append(block(input_channel, hidden_channel, output_channel, k, s,
se_ratio=se_ratio, layer_id=layer_id, args=args))
input_channel = output_channel
layer_id += 1
stages.append(nn.Sequential(*layers))
output_channel = _make_divisible(exp_size * width, 4)
stages.append(nn.Sequential(ConvBnAct(input_channel, output_channel, 1)))
input_channel = output_channel
self.blocks = nn.Sequential(*stages)
self.width_list = [i.size(1) for i in self.forward(torch.randn(1, 3, 640, 640))]
def reset_classifier(self, num_classes, global_avg=''):
self.num_classes = num_classes
self.classifier = nn.Linear(1280, self.num_classes) if self.num_classes > 0 else nn.Identity()
def forward(self, x):
unique_tensors = {}
x = self.conv_stem(x)
x = self.bn1(x)
x = self.act1(x)
for model in self.blocks:
x = model(x)
if self.dropout > 0.:
x = F.dropout(x, p=self.dropout, training=self.training)
width, height = x.shape[2], x.shape[3]
unique_tensors[(width, height)] = x
result_list = list(unique_tensors.values())[-4:]
return result_list
@register_model
def Ghostnetv2(pretrained=False, pretrained_cfg=None, pretrained_cfg_overlay=None, **kwargs):
cfgs = [
# k, t, c, SE, s
[[3, 16, 16, 0, 1]],
[[3, 48, 24, 0, 2]],
[[3, 72, 24, 0, 1]],
[[5, 72, 40, 0.25, 2]],
[[5, 120, 40, 0.25, 1]],
[[3, 240, 80, 0, 2]],
[[3, 200, 80, 0, 1],
[3, 184, 80, 0, 1],
[3, 184, 80, 0, 1],
[3, 480, 112, 0.25, 1],
[3, 672, 112, 0.25, 1]
],
[[5, 672, 160, 0.25, 2]],
[[5, 960, 160, 0, 1],
[5, 960, 160, 0.25, 1],
[5, 960, 160, 0, 1],
[5, 960, 160, 0.25, 1]
]
]
return GhostNetV2(cfgs)
if __name__=='__main__':
model = Ghostnetv2()
model.eval()
input = torch.randn(16,3,224,224)
y = model(input)
print(y.size())
3.YOLOv11中添加Ghostnetv2方式
3.1 在ultralytics/nn下新建Extramodule
3.2 在Extramodule里创建Ghostnetv2
在Ghostnetv2.py文件里添加给出的Ghostnetv2代码
添加完Ghostnetv2代码后,在ultralytics/nn/Extramodule/__init__.py文件中引用
3.3 在tasks.py里引用
在ultralytics/nn/tasks.py文件里引用Extramodule
(1)在tasks.py找到parse_model(ctrl+f 可以直接搜索parse_model位置)
(2)
elif m in {Ghostnetv2}:
m = m(*args)
c2 = m.width_list
backbone = True
(3)将elif m is AIFI:以下的代码全部替换成我给的
上述代码全部替换以下代码:
elif m is AIFI:
args = [ch[f], *args]
elif m in {HGStem, HGBlock}:
c1, cm, c2 = ch[f], args[0], args[1]
args = [c1, cm, c2, *args[2:]]
if m is HGBlock:
args.insert(4, n) # number of repeats
n = 1
elif m is ResNetLayer:
c2 = args[1] if args[3] else args[1] * 4
elif m is nn.BatchNorm2d:
args = [ch[f]]
elif m is Concat:
c2 = sum(ch[x] for x in f)
elif m in {Detect, WorldDetect, Segment, Pose, OBB, ImagePoolingAttn, v10Detect}:
args.append([ch[x] for x in f])
if m is Segment:
args[2] = make_divisible(min(args[2], max_channels) * width, 8)
elif m is RTDETRDecoder: # special case, channels arg must be passed in index 1
args.insert(1, [ch[x] for x in f])
elif m is CBLinear:
c2 = args[0]
c1 = ch[f]
args = [c1, c2, *args[1:]]
elif m is CBFuse:
c2 = ch[f[-1]]
else:
c2 = ch[f]
if isinstance(c2, list):
m_ = m
m_.backbone = True
else:
m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args) # module
t = str(m)[8:-2].replace('__main__.', '') # module type
m.np = sum(x.numel() for x in m_.parameters()) # number params
m_.i, m_.f, m_.type = i + 4 if backbone else i, f, t # attach index, 'from' index, type
if verbose:
LOGGER.info(f'{i:>3}{str(f):>20}{n_:>3}{m.np:10.0f} {t:<45}{str(args):<30}') # print
save.extend(
x % (i + 4 if backbone else i) for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist
layers.append(m_)
if i == 0:
ch = []
if isinstance(c2, list):
ch.extend(c2)
if len(c2) != 5:
ch.insert(0, 0)
else:
ch.append(c2)
return nn.Sequential(*layers), sorted(save)
(4)这个修改不在def parse_model中,但是还在tasks.py中,在tasks.py前面几行
# 主干修改处
def _predict_once(self, x, profile=False, visualize=False, embed=None):
"""
Perform a forward pass through the network.
Args:
x (torch.Tensor): The input tensor to the model.
profile (bool): Print the computation time of each layer if True, defaults to False.
visualize (bool): Save the feature maps of the model if True, defaults to False.
embed (list, optional): A list of feature vectors/embeddings to return.
Returns:
(torch.Tensor): The last output of the model.
"""
y, dt, embeddings = [], [], [] # outputs
for m in self.model:
if m.f != -1: # if not from previous layer
x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers
if profile:
self._profile_one_layer(m, x, dt)
if hasattr(m, 'backbone'):
x = m(x)
if len(x) != 5: # 0 - 5
x.insert(0, None)
for index, i in enumerate(x):
if index in self.save:
y.append(i)
else:
y.append(None)
x = x[-1] # 最后一个输出传给下一层
else:
x = m(x) # run
y.append(x if m.i in self.save else None) # save output
if visualize:
feature_visualization(x, m.type, m.i, save_dir=visualize)
if embed and m.i in embed:
embeddings.append(nn.functional.adaptive_avg_pool2d(x, (1, 1)).squeeze(-1).squeeze(-1)) # flatten
if m.i == max(embed):
return torch.unbind(torch.cat(embeddings, 1), dim=0)
return x
(5)在ultralytics/models/yolo/detect/train.py里找到
到此全部修改结束。
4. 新建一个yolo11Ghostnetv2.yaml文件
# Ultralytics YOLO 🚀, AGPL-3.0 license
# YOLO11 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
# Parameters
nc: 1 # number of classes
scales: # model compound scaling constants, i.e. 'model=yolo11n.yaml' will call yolo11.yaml with scale 'n'
# [depth, width, max_channels]
n: [0.50, 0.25, 1024] # summary: 319 layers, 2624080 parameters, 2624064 gradients, 6.6 GFLOPs
s: [0.50, 0.50, 1024] # summary: 319 layers, 9458752 parameters, 9458736 gradients, 21.7 GFLOPs
m: [0.50, 1.00, 512] # summary: 409 layers, 20114688 parameters, 20114672 gradients, 68.5 GFLOPs
l: [1.00, 1.00, 512] # summary: 631 layers, 25372160 parameters, 25372144 gradients, 87.6 GFLOPs
x: [1.00, 1.50, 512] # summary: 631 layers, 56966176 parameters, 56966160 gradients, 196.0 GFLOPs
# YOLO11n backbone
backbone:
# [from, repeats, module, args]
- [-1, 1, Ghostnetv2, []] # 4
- [-1, 1, SPPF, [1024, 5]] # 5
- [-1, 2, C2PSA, [1024]] # 6
# YOLO11n head
head:
- [-1, 1, nn.Upsample, [None, 2, 'nearest']] # 7
- [[-1, 3], 1, Concat, [1]] # 8 cat backbone P4
- [-1, 3, C3k2, [512, False]] # 9
- [-1, 1, nn.Upsample, [None, 2, 'nearest']] # 10
- [[-1, 2], 1, Concat, [1]] # 11 cat backbone P3
- [-1, 3, C3k2, [256, False]] # 12 (P3/8-small)
- [-1, 1, Conv, [256, 3, 2]] # 13
- [[-1, 9], 1, Concat, [1]] # 14 cat head P4
- [-1, 3, C3k2, [512, False]] # 15 (P4/16-medium)
- [-1, 1, Conv, [512, 3, 2]] # 16
- [[-1, 6], 1, Concat, [1]] # 17 cat head P5
- [-1, 3, C3k2, [1024, False]] # 18 (P5/32-large)
- [[12, 15, 18], 1, Detect, [nc]] # Detect(P3, P4, P5)
大家根据自己的数据集实际情况,修改nc大小。
5.模型训练
import warnings
warnings.filterwarnings('ignore')
from ultralytics import YOLO
if __name__ == '__main__':
model = YOLO(r'D:\yolo\yolov11\ultralytics-main\datasets\yolo11Ghostnetv2.yaml')
model.train(data=r'D:\yolo\yolov11\ultralytics-main\datasets\data.yaml',
cache=False,
imgsz=640,
epochs=100,
single_cls=False, # 是否是单类别检测
batch=8,
close_mosaic=10,
workers=0,
device='0',
optimizer='SGD',
amp=True,
project='runs/train',
name='exp',
)
模型结构打印,成功运行 :
6.本文总结
到此本文的正式分享内容就结束了,在这里给大家推荐我的YOLOv11改进有效涨点专栏,本专栏目前为新开的,后期我会根据各种前沿顶会进行论文复现,也会对一些老的改进机制进行补充,如果大家觉得本文帮助到你了,订阅本专栏,关注后续更多的更新~