- 🍨 本文为🔗365天深度学习训练营 中的学习记录博客
- 🍖 原作者:K同学啊 | 接辅导、项目定制
yolov8的模块代码主要在./ultralytics/nn/modules/
下
conv.py
在conv.py文件的头部,__all__
中声明了当前模块的所有暴露的模型,如果需要自定义的话,需要将自定义的模块写到这个里面
__all__ = ('Conv', 'Conv2', 'LightConv', 'DWConv', 'DWConvTranspose2d', 'ConvTranspose', 'Focus', 'GhostConv',
'ChannelAttention', 'SpatialAttention', 'CBAM', 'Concat', 'RepConv')
autopad
autopad的功能是返回padding的大小,使padding后输出张量的大小不变
参数k: 卷积核(kernel)的大小,类型可能是一个int也可能是一个序列
参数p: 填充(padding)的大小。默认为None
参数d: 扩张率(dilation rate)的大小,默认为1,普通卷积的扩张率为1,空洞卷积的扩张率大于1
def autopad(k, p=None, d=1): # kernel, padding, dilation
"""Pad to 'same' shape outputs."""
if d > 1:
k = d * (k - 1) + 1 if isinstance(k, int) else [d * (x - 1) + 1 for x in k] # actual kernel-size
if p is None:
p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
return p
Conv
conv的功能是一个标准卷积模块,它继承自nn.Module
。
参数c1: 输入通道数
参数c2: 输出通道数
参数k:卷积核大小,默认是1
参数s: 步长,默认是1
参数p: 填充,默认为None
参数g:组,默认为1
参数d:扩张率,默认为1
参数act:是否采用激活函数,默认为True,且采用SiLU为激活函数
class Conv(nn.Module):
"""Standard convolution with args(ch_in, ch_out, kernel, stride, padding, groups, dilation, activation)."""
default_act = nn.SiLU() # default activation
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True):
"""Initialize Conv layer with given arguments including activation."""
super().__init__()
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p, d), groups=g, dilation=d, bias=False)
self.bn = nn.BatchNorm2d(c2)
self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity()
def forward(self, x):
"""Apply convolution, batch normalization and activation to input tensor."""
return self.act(self.bn(self.conv(x)))
def forward_fuse(self, x):
"""Perform transposed convolution of 2D data."""
return self.act(self.conv(x))
Focus
Focus模块是YOLO作者自己设计出来的,为了减少浮点数和提高速度,而不是增加featuremap。本质是将图像进行切片,类似于下采样取值,将原图像的宽高信息切分,聚合到通道中。其结构如下:
class Focus(nn.Module):
"""Focus wh information into c-space."""
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):
"""Initializes Focus object with user defined channel, convolution, padding, group and activation values."""
super().__init__()
self.conv = Conv(c1 * 4, c2, k, s, p, g, act=act)
# self.contract = Contract(gain=2)
def forward(self, x):
"""
Applies convolution to concatenated tensor and returns the output.
Input shape is (b,c,w,h) and output shape is (b,4c,w/2,h/2).
"""
# 将输入沿着通道维度拼接4次得到新的输入,然后经过卷积层self.conv处理
return self.conv(torch.cat((x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]), 1))
# 如果启用降维模块
# return self.conv(self.contract(x))
block.py
c2f
c2f结构如图所示:
class C2f(nn.Module):
"""Faster Implementation of CSP Bottleneck with 2 convolutions."""
# 有两个CSP Bottleneck
def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5):
"""Initialize CSP bottleneck layer with two convolutions with arguments ch_in, ch_out, number, shortcut, groups,
expansion.
"""
super().__init__()
self.c = int(c2 * e) # hidden channels
self.cv1 = Conv(c1, 2 * self.c, 1, 1)
self.cv2 = Conv((2 + n) * self.c, c2, 1) # optional act=FReLU(c2)
self.m = nn.ModuleList(Bottleneck(self.c, self.c, shortcut, g, k=((3, 3), (3, 3)), e=1.0) for _ in range(n))
def forward(self, x):
"""Forward pass through C2f layer."""
# 通过c2f层进行向前传播
y = list(self.cv1(x).chunk(2, 1))
y.extend(m(y[-1]) for m in self.m)
return self.cv2(torch.cat(y, 1))
def forward_split(self, x):
"""Forward pass using split() instead of chunk()."""
# 通过split()而不是chunk()进行前向传播
y = list(self.cv1(x).split((self.c, self.c), 1))
y.extend(m(y[-1]) for m in self.m)
return self.cv2(torch.cat(y, 1))