基于大佬迪菲赫尔曼的专栏文章进行实操时,感觉文章写的并不是很清楚,部分步骤写的不太详细,对于新手小白不太友好。这里写下详细版的步骤。
迪菲赫尔曼改进YOLO原文:
改进YOLO系列 | GhostNetV2: 用长距离注意力增强低成本运算 | 更换骨干网络之GhostNetV2_ghostnetv2 yolo-CSDN博客
目录
一、GhostNetV2核心代码
在models文件夹下新建modules文件夹,在modules文件夹下新建一个py文件。这里为GhostV2.py。复制以下代码到文件里面。
注意:很多改进教程都是将代码直接复制到common.py文件,如果改进机制多了容易造成混乱。建议创建一个modules文件夹,将改进机制放里面方便管理。
import math
from functools import partial
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
def _make_divisible(x, divisor=4):
return int(np.ceil(x * 1. / divisor) * divisor)
class MyHSigmoid(nn.Module):
"""
Hard Sigmoid definition.
Args:
Returns:
Tensor, output tensor.
Examples:
>>> MyHSigmoid()
"""
def __init__(self):
super(MyHSigmoid, self).__init__()
self.relu6 = nn.ReLU6()
def forward(self, x):
""" forward """
return self.relu6(x + 3.) * 0.16666667
class Activation(nn.Module):
"""
Activation definition.
Args:
act_func(string): activation name.
Returns:
Tensor, output tensor.
"""
def __init__(self, act_func):
super(Activation, self).__init__()
if act_func == 'relu':
self.act = nn.ReLU()
elif act_func == 'relu6':
self.act = nn.ReLU6()
elif act_func == 'sigmoid':
self.act = nn.Sigmoid()
elif act_func in ('hsigmoid', 'hard_sigmoid'):
self.act = MyHSigmoid()
elif act_func in ('hswish', 'hard_swish'):
self.act = nn.Hardswish()
else:
raise NotImplementedError
def forward(self, x):
""" forward """
return self.act(x)
class GlobalAvgPooling(nn.Module):
"""
Global avg pooling definition.
Args:
Returns:
Tensor, output tensor.
Examples:
>>> GlobalAvgPooling()
"""
def __init__(self):
super(GlobalAvgPooling, self).__init__()
self.mean = nn.AdaptiveAvgPool2d(1)
def forward(self, x):
""" forward """
x = self.mean(x)
return x
class SE_Ghost(nn.Module):
"""
SE warpper definition.
Args:
num_out (int): Output channel.
ratio (int): middle output ratio.
Returns:
Tensor, output tensor.
"""
def __init__(self, num_out, ratio=4):
super(SE_Ghost, self).__init__()
num_mid = _make_divisible(num_out // ratio)
self.pool = GlobalAvgPooling()
self.conv_reduce = nn.Conv2d(in_channels=num_out, out_channels=num_mid,
kernel_size=1, bias=True, padding_mode='zeros')
self.act1 = Activation('relu')
self.conv_expand = nn.Conv2d(in_channels=num_mid, out_channels=num_out,
kernel_size=1, bias=True, padding_mode='zeros')
self.act2 = Activation('hsigmoid')
def forward(self, x):
""" forward of SE module """
out = self.pool(x)
out = self.conv_reduce(out)
out = self.act1(out)
out = self.conv_expand(out)
out = self.act2(out)
out = x * out
return out
class ConvUnit(nn.Module):
"""
ConvUnit warpper definition.
Args:
num_in (int): Input channel.
num_out (int): Output channel.
kernel_size (Union[int, tuple[int]]): Input kernel size.
stride (int): Stride size.
padding (Union[int, tuple[int]]): Padding number.
num_groups (int): Output num group.
use_act (bool): Used activation or not.
act_type (string): Activation type.
Returns:
Tensor, output tensor.
"""
def __init__(self, num_in, num_out, kernel_size=1, stride=1, padding=0, num_groups=1,
use_act=True, act_type='relu'):
super(ConvUnit, self).__init__()
self.conv = nn.Conv2d(in_channels=num_in,
out_channels=num_out,
kernel_size=kernel_size,
stride=stride,
padding=padding,
groups=num_groups,
bias=False,
padding_mode='zeros')
self.bn = nn.BatchNorm2d(num_out)
self.use_act = use_act
self.act = Activation(act_type) if use_act else None
def forward(self, x):
""" forward of conv unit """
out = self.conv(x)
out = self.bn(out)
if self.use_act:
out = self.act(out)
return out
class GhostModule(nn.Module):
"""
GhostModule warpper definition.
Args:
num_in (int): Input channel.
num_out (int): Output channel.
kernel_size (int): Input kernel size.
stride (int): Stride size.
padding (int): Padding number.
ratio (int): Reduction ratio.
dw_size (int): kernel size of cheap operation.
use_act (bool): Used activation or not.
act_type (string): Activation type.
Returns:
Tensor, output tensor.
"""
def __init__(self, num_in, num_out, kernel_size=1, stride=1, padding=0, ratio=2, dw_size=3,
use_act=True, act_type='relu'):
super(GhostModule, self).__init__()
init_channels = math.ceil(num_out / ratio)
new_channels = init_channels * (ratio - 1)
self.primary_conv = ConvUnit(num_in, init_channels, kernel_size=kernel_size, stride=stride,
padding=kernel_size // 2, num_groups=1, use_act=use_act, act_type=act_type)
self.cheap_operation = ConvUnit(init_channels, new_channels, kernel_size=dw_size, stride=1,
padding=dw_size // 2, num_groups=init_channels,
use_act=use_act, act_type=act_type)
def forward(self, x):
""" ghost module forward """
x1 = self.primary_conv(x)
x2 = self.cheap_operation(x1)
return torch.cat([x1, x2], dim=1)
class GhostModuleMul(nn.Module):
"""
GhostModuleMul warpper definition.
Args:
num_in (int): Input channel.
num_out (int): Output channel.
kernel_size (int): Input kernel size.
stride (int): Stride size.
padding (int): Padding number.
ratio (int): Reduction ratio.
dw_size (int): kernel size of cheap operation.
use_act (bool): Used activation or not.
act_type (string): Activation type.
Returns:
Tensor, output tensor.
"""
def __init__(self, num_in, num_out, kernel_size=1, stride=1, padding=0, ratio=2, dw_size=3,
use_act=True, act_type='relu'):
super(GhostModuleMul, self).__init__()
self.avgpool2d = nn.AvgPool2d(kernel_size=1, stride=1)
self.gate_fn = Activation('sigmoid')
init_channels = math.ceil(num_out / ratio)
new_channels = init_channels * (ratio - 1)
self.primary_conv = ConvUnit(num_in, init_channels, kernel_size=kernel_size, stride=stride,
padding=kernel_size // 2, num_groups=1, use_act=use_act, act_type=act_type)
self.cheap_operation = ConvUnit(init_channels, new_channels, kernel_size=dw_size, stride=1,
padding=dw_size // 2, num_groups=init_channels,
use_act=use_act, act_type=act_type)
self.short_conv = nn.Sequential(
ConvUnit(num_in, num_out, kernel_size=kernel_size, stride=stride,
padding=kernel_size // 2, num_groups=1, use_act=False),
ConvUnit(num_out, num_out, kernel_size=(1, 5), stride=1,
padding=(0, 2), num_groups=num_out, use_act=False),
ConvUnit(num_out, num_out, kernel_size=(5, 1), stride=1,
padding=(2, 0), num_groups=num_out, use_act=False),
)
def forward(self, x):
""" ghost module forward """
res = self.avgpool2d(x)
res = self.short_conv(res)
res = self.gate_fn(res)
x1 = self.primary_conv(x)
x2 = self.cheap_operation(x1)
out = torch.cat([x1, x2], dim=1)
out = out * F.interpolate(res, size=out.shape[-2:], mode="bilinear", align_corners=True)
return out
class GhostV2(nn.Module):
"""
GhostBottleneck warpper definition.
Args:
num_in (int): Input channel.
num_mid (int): Middle channel.
num_out (int): Output channel.
kernel_size (int): Input kernel size.
stride (int): Stride size.
act_type (str): Activation type.
use_se (bool): Use SE warpper or not.
Returns:
Tensor, output tensor.
"""
def __init__(self, num_in, num_out, num_mid, kernel_size, stride=1, act_type='relu', use_se=False, layer_id=None):
super(GhostV2, self).__init__()
self.use_ori_module = layer_id <= 1
if self.use_ori_module:
self.ghost1 = GhostModule(num_in, num_mid, kernel_size=1,
stride=1, padding=0, act_type=act_type)
else:
self.ghost1 = GhostModuleMul(num_in, num_mid, kernel_size=1,
stride=1, padding=0, act_type=act_type)
self.use_dw = stride > 1
self.dw = None
if self.use_dw:
self.dw = ConvUnit(num_mid, num_mid, kernel_size=kernel_size, stride=stride,
padding=self._get_pad(kernel_size), act_type=act_type, num_groups=num_mid, use_act=False)
self.use_se = use_se
if use_se:
self.se = SE_Ghost(num_mid)
self.ghost2 = GhostModule(num_mid, num_out, kernel_size=1, stride=1,
padding=0, act_type=act_type, use_act=False)
self.down_sample = False
if num_in != num_out or stride != 1:
self.down_sample = True
self.shortcut = None
if self.down_sample:
self.shortcut = nn.Sequential(
ConvUnit(num_in, num_in, kernel_size=kernel_size, stride=stride,
padding=self._get_pad(kernel_size), num_groups=num_in, use_act=False),
ConvUnit(num_in, num_out, kernel_size=1, stride=1,
padding=0, num_groups=1, use_act=False),
)
def forward(self, x):
""" forward of ghostnet """
shortcut = x
out = self.ghost1(x)
if self.use_dw:
out = self.dw(out)
if self.use_se:
out = self.se(out)
out = self.ghost2(out)
if self.down_sample:
shortcut = self.shortcut(shortcut)
out = shortcut + out
return out
def _get_pad(self, kernel_size):
"""set the padding number"""
pad = 0
if kernel_size == 1:
pad = 0
elif kernel_size == 3:
pad = 1
elif kernel_size == 5:
pad = 2
elif kernel_size == 7:
pad = 3
else:
raise NotImplementedError
return pad
二、修改common.py
在common.py文件中,在前面的部分添加以下代码,导入GhostV2.py的内容:
from models.modules.GhostV2 import *
在C3Ghost模块后面,加入C3_Ghostv2模块,加入以下代码:
class C3_Ghostv2(nn.Module):
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
super().__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = Conv(c1, c_, 1, 1)
self.cv2 = Conv(c1, c_, 1, 1)
self.cv3 = Conv(2 * c_, c2, 1) # act=FReLU(c2)
self.m = nn.Sequential(*(GhostV2(c_, c_, 2 * c_, 3, 1, 'relu', False, 2) for _ in range(n)))
def forward(self, x):
return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1))
三、修改yolo.py
在yolo.py文件中,在导入common模块的上面一行添加以下代码,导入GhostV2.py的内容:
from models.modules.GhostV2 import *
注意:这里位置不要搞错,不然可能会找不到导入的模块。
如下图所示:
找到parse_model函数,将GhostV2和C3_Ghostv2模块加入,如下图所示:
在后面几行加入以下代码:
elif m is GhostV2:
c1, c2 = ch[f], args[0]
if c2 != no:
c2 = make_divisible(c2 * gw, 8)
args = [c1, c2, *args[1:]]
如下图所示:
四、建立yaml文件
在models文件夹下,新建yaml文件,这里命名为yolov5-GhostNetv2.yaml。
将以下代码复制进文件。
# Paper: https://arxiv.org/abs/2211.12905
# Parameters
nc: 80 # number of classes
depth_multiple: 1 # model depth multiple
width_multiple: 1 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [16, 3, 2]], # 0-P1/2
[-1, 1, GhostV2, [16, 16, 3, 1, 'relu', False ,0]], #1 outc, midc, k, s, act, se, id
[-1, 1, GhostV2, [24, 48, 3, 2, 'relu', False ,1]], #2
[-1, 1, GhostV2, [24, 72, 3, 1, 'relu', False ,2]], #3 p2
[-1, 1, GhostV2, [40, 72, 5, 2,'relu', True,3]], #4
[-1, 1, GhostV2, [40, 120, 5, 1 ,'relu', True,4]], #5 p3
[-1, 1, GhostV2, [80, 240, 3, 2, 'relu', False,5]], #
[-1, 1, GhostV2, [80, 200, 3, 1, 'relu', False,6]], #
[-1, 1, GhostV2, [80, 184, 3, 1, 'relu', False,7]], #
[-1, 1, GhostV2, [80, 184, 3, 1, 'relu', False,8]], #
[-1, 1, GhostV2, [112, 480, 3, 1,'relu', True,9]], #
[-1, 1, GhostV2, [112, 672, 3, 1,'relu', True,10]], #11 p4
[-1, 1, GhostV2, [160, 672,5, 2, 'relu', True,11]], #
[-1, 1, GhostV2, [160, 960,5, 1, 'relu', False,12]], #
[-1, 1, GhostV2, [160, 960,5, 1, 'relu', True,13]], #
[-1, 1, GhostV2, [160, 960,5, 1, 'relu', False,14]], #
[-1, 1, GhostV2, [160, 960,5, 1, 'relu', True,15]], #16 p5
]
# YOLOv5 v6.0 head
head:
[[-1, 1, Conv, [80, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 11], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [192, False]], # 20
[-1, 1, Conv, [80, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1,5], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [120, False]], # 24 (P3/8-small)
[-1, 1, Conv, [60, 3, 2]],
[[-1, 21], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [144, False]], # 27 (P4/16-medium)
[-1, 1, Conv, [80, 3, 2]],
[[-1, 17], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [160, False]], # 30 (P5/32-large)
[[24, 27, 30], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]
至此更改完成。
五、训练
由于更改了主干网络,weights权重选不选都是从头开始训练。
在data下打开coco.yaml文件,对路径进行修改。尽量用绝对路径,相对路径容易报错。
在yolov5下找到train.py文件,对里面参数的进行修改。
目前主要对--weight、--cfg、--data进行参数设置。其他默认即可。
--weight:先选用官方的yolov5.pt权重。自己训练完后可更换为自己的权重。
注意:这里由于更改了主干网络,weights权重选不选都是从头开始训练。
--cfg:选用刚刚在models下建立的yolov5-GhostNetv2.yaml。
--data:选用上面修改过路径的coco.yaml。
--batch-size:默认是16。如果出现以下问题,提示内存不足的情况,建议设置为8。(batch-size的设置一般为8的整数倍)
打开train.py。ctrl+shift+p 在弹出框窗口搜索Python:选择解释器,选择自己创建的Python虚拟环境,这里是yolo 。
点击右上角运行程序。等待训练结束即可。
训练结果会保存在run文件夹下。
六、GFLOPs不显示
修改完后发现运行yolo.py时,发现GFLOPs指标无法显示。(yolo的运行和运行train一样,需修改cfg网络配置等参数)
解决方法如下:在utils文件夹下,打开torch_utils.py文件,找到model_info函数进行修改。YOLOv8 四可以在ultralytics/utils/torch utils.py中的get flops 函数中修改。
try: # FLOPs
p = next(model.parameters())
stride = max(int(model.stride.max()), 32) if hasattr(model, 'stride') else 32 # max stride
im = torch.empty((1, p.shape[1], stride, stride), device=p.device) # input image in BCHW format
flops = thop.profile(deepcopy(model), inputs=(im, ), verbose=False)[0] / 1E9 * 2 # stride GFLOPs
imgsz = imgsz if isinstance(imgsz, list) else [imgsz, imgsz] # expand if int/float
fs = f', {flops * imgsz[0] / stride * imgsz[1] / stride:.1f} GFLOPs' # 640x640 GFLOPs
except Exception as e:
# print(e)
im = torch.rand(1, 3, 640, 640).to(p.device)
flops, parms = thop.profile(model, inputs=(im,), verbose=False)
# print(f'Params: {parms}, GFLOPs: {flops * 2 / 1e9}')
# fs=''
fs = f', {flops * 2 / 1E9:.1f} GFLOPs'
修改完后,再次运行。