参考:https://zhuanlan.zhihu.com/p/590986066
https://blog.csdn.net/qq_43676259/article/details/135596294
https://mp.weixin.qq.com/s?__biz=MzU1NjEwMTY0Mw==&mid=2247557624&idx=1&sn=6533470d6bfc2a6ebdb720a5952bbb0a&chksm=fbc9989cccbe118a6b30ec9d8a6d5a2ba8239ec041ed02b464926aabd8cea462619976c0901a&scene=21#wechat_redirect
https://mp.weixin.qq.com/s?__biz=MzU1NjEwMTY0Mw==&mid=2247558125&idx=1&sn=4992c109ea00d4b87db8dcdc4404c02b&chksm=fbc99a89ccbe139f1bee2b622c5529c3374f2b92a74a57979c3bde9356b9215bde413201e1e0&scene=27
两个关键点:RepConv(训练多分支,推理卷积重参数化);辅助头训练()
YOLOv7相比于YOLOv5,最主要的不同之处如下:
模型结构:引进了更为高效的特征提取模块(ELAN)、下采样模块(MP),不同的空间池化层(SPPCSPC),重参数卷积(RepConv)
正样本匹配:结合YOLOv5中和正样本匹配方法和YOLOX中的正样本筛选方法(SimOTA)
辅助训练模块网络的中间层增加额外的辅助头,以及以辅助损失为指导的浅层网络权重,最后将辅助头和检测头的权重做融合。
2.1 CBS模块
Class Conv(nn.Module):
'''
卷积块:conv-BN-Activation
'''
default_act = nn.SiLU() # 默认激活函数
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True, b=False):
'''
:param c1: 输入通道数
:param c2: 输出通道数
:param k: 卷积核大小
:param s: 步长
:param p: 填充 默认为None则表示填充至与输入分辨率相同
:param g: 分组卷积,默认为1时为标准卷积
:param d: 间隙卷积,默认为1时为标准卷积;不为1表示点之间有空隙的过滤器,对卷积核进行膨胀
:param act: 是否使用激活函数
:param b: 卷积偏置,默认使用无偏置卷积
'''
super(Conv, self).__init__()
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p, d), groups=g, dilation=d, bias=b)
self.bn = nn.BatchNorm2d(c2)
# 若act为True:使用默认激活函数;若act为其他激活函数模块:则使用该激活函数;反之:使用nn.Identity,表示不对输入进行操作,直接输出输入
self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity()
def forward(self, x):
# 前向传播
return self.act(self.bn(self.conv(x)))
2.2 ELAN模块
class ELAN_B(nn.Module):
'''
yolov7中特征提取模块(backbone部分)
使用了密集的残差结构, 通过增加相当的深度来提高准确率; 内部的残差块使用跳跃连接,
缓解了深度神经网络中增加深度带来的梯度消失问题。
'''
def __init__(self, c1, c2, e=0.5):
super().__init__()
c_ = int(c1 * e)
self.cv1 = Conv(c1, c_, 1, 1)
self.cv2 = Conv(c1, c_, 1, 1)
self.cv3 = nn.Sequential(Conv(c_, c_, 3, 1),
Conv(c_, c_, 3, 1))
self.cv4 = nn.Sequential(Conv(c_, c_, 3, 1),
Conv(c_, c_, 3, 1))
self.cv5 = Conv(c_ * 4, c2, 1, 1)
def forward(self, x):
y1 = self.cv1(x) # c1 // 2
y2 = self.cv2(x) # c1 // 2
y3 = self.cv3(y2) # c1
y4 = self.cv4(y3) # c1
return self.cv5(torch.cat([y1, y2, y3, y4], dim=1))
class ELAN_H(nn.Module):
def __init__(self, c1, c2, e=0.5):
'''Head部分'''
super().__init__()
c_ = int(c1 * e)
c__ = c_ // 2
self.cv1 = Conv(c1, c_, 1, 1)
self.cv2 = Conv(c1, c_, 1, 1)
self.cv3 = Conv(c_, c__, 3, 1)
self.cv4 = Conv(c__, c__, 3, 1)
self.cv5 = Conv(c__, c__, 3, 1)
self.cv6 = Conv(c__, c__, 3, 1)
self.cv7 = Conv(c_ * 2 + c__ * 4, c2, 1, 1)
def forward(self, x):
y1 = self.cv1(x)
y2 = self.cv2(x)
y3 = self.cv3(y2)
y4 = self.cv4(y3)
y5 = self.cv5(y4)
y6 = self.cv6(y5)
return self.cv7(torch.cat([y1, y2, y3, y4, y5, y6], dim=1))
2.3 MP模块
class MP(nn.Module):
'''
yolov7中下采样模块
'''
def __init__(self, c1, c2):
super().__init__()
# MP-1
if c1 == c2:
c_ = c1 // 2
# MP-2
else:
c_ = c1
# 第一个分支
self.maxpool = MaxPool(k=2, s=2)
self.cv1 = Conv(c1, c_, 1, 1)
# 第二个分支
self.cv2 = Conv(c1, c_, 1, 1)
self.cv3 = Conv(c_, c_, 3, 2)
def forward(self, x):
o1 = self.cv1(self.maxpool(x))
o2 = self.cv3(self.cv2(x))
return torch.cat([o1, o2], dim=1)
2.4 SPPCSPC模块
class SPPCSPC(nn.Module):
def __init__(self, c1, c2, k=(5, 9, 13), e=0.5):
super().__init__()
c_ = int(2 * c2 * e)
self.cv1 = Conv(c1, c_, 1, 1)
self.cv2 = Conv(c1, c_, 1, 1)
self.cv3 = Conv(c_, c_, 3, 1)
self.cv4 = Conv(c_, c_, 1, 1)
self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
self.cv5 = Conv(4 * c_, c_, 1, 1)
self.cv6 = Conv(c_, c_, 3, 1)
self.cv7 = Conv(2 * c_, c2, 1, 1)
def forward(self, x):
x1 = self.cv4(self.cv3(self.cv1(x)))
y1 = self.cv6(self.cv5(torch.cat([x1] + [m(x1) for m in self.m], 1)))
y2 = self.cv2(x)
return self.cv7(torch.cat((y1, y2), dim=1))
2.5.2 模块实现
class RepConv(nn.Module):
def __init__(self, c1, c2, k=3, s=1, p=None, g=1, act=True, deploy=False):
'''
重参数卷积
训练时:
deploy = False
rbr_dense(3x3卷积) + rbr_1x1(1x1卷积) + rbr_identity(c2==c1时)相加
rbr_reparam = None
推理时:
deploy = True
rbr_param = Conv2d
rbr_dense, rbr_1x1, rbr_identity = None, None, None
'''
super().__init__()
self.deploy = deploy
self.groups = g
self.in_channels = c1
self.out_channels = c2
assert k == 3
assert autopad(k, p) == 1
padding_11 = autopad(k, p) - k // 2
self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())
# 推理阶段, 仅有一个3x3卷积
if self.deploy:
self.rbr_reparam = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=True)
else:
# 输入输出通道数相同时, identity层(BN层)
self.rbr_identity = (nn.BatchNorm2d(num_features=c1) if c2 == c1 and s == 1 else None)
# 3×3卷积 + BN层
self.rbr_dense = nn.Sequential(
nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False),
nn.BatchNorm2d(num_features=c2),
)
# 1×1卷积 + BN层
self.rbr_1x1 = nn.Sequential(
nn.Conv2d(c1, c2, 1, s, padding_11, groups=g, bias=False),
nn.BatchNorm2d(num_features=c2),
)
def forward(self, x):
# 推理阶段
if hasattr(self, 'rbr_reparam'):
return self.act(self.rbr_reparam(x))
# 训练阶段
if self.rbr_identity is None:
id_out = 0
else:
id_out = self.rbr_identity(x)
return self.act(self.rbr_dense(x) + self.rbr_1x1(x) + id_out)
#融合卷积层和BN层: Conv2D+BN=Conv2D
def fuse_conv_bn(self, conv, bn):
std = (bn.running_var + bn.eps).sqrt()
bias = bn.bias - bn.running_mean * bn.weight / std
t = (bn.weight / std).reshape(-1, 1, 1, 1)
weights = conv.weight * t
bn = nn.Identity()
conv = nn.Conv2d(in_channels=conv.in_channels,
out_channels=conv.out_channels,
kernel_size=conv.kernel_size,
stride=conv.stride,
padding=conv.padding,
dilation=conv.dilation,
groups=conv.groups,
bias=True,
padding_mode=conv.padding_mode)
conv.weight = torch.nn.Parameter(weights)
conv.bias = torch.nn.Parameter(bias)
return conv
# 重参数操作(在推理阶段执行)
def fuse_repvgg_block(self):
if self.deploy:
return
print(f"RepConv.fuse_repvgg_block")
# 融合3x3的卷积层和BN层为一个3x3卷积(有偏置)
self.rbr_dense = self.fuse_conv_bn(self.rbr_dense[0], self.rbr_dense[1])
# 融合1x1的卷积层和BN层为一个1x1卷积(有偏置)
self.rbr_1x1 = self.fuse_conv_bn(self.rbr_1x1[0], self.rbr_1x1[1])
rbr_1x1_bias = self.rbr_1x1.bias
# 填充卷积核大小与3x3卷积大小相同
weight_1x1_expanded = torch.nn.functional.pad(self.rbr_1x1.weight, [1, 1, 1, 1])
# 融合identity的BN层为一个1x1卷积(无偏置)
if isinstance(self.rbr_identity, nn.BatchNorm2d) or isinstance(self.rbr_identity, nn.modules.batchnorm.SyncBatchNorm):
identity_conv_1x1 = nn.Conv2d(
in_channels=self.in_channels,
out_channels=self.out_channels,
kernel_size=1,
stride=1,
padding=0,
groups=self.groups,
bias=False)
identity_conv_1x1.weight.data = identity_conv_1x1.weight.data.to(self.rbr_1x1.weight.data.device)
identity_conv_1x1.weight.data = identity_conv_1x1.weight.data.squeeze().squeeze()
identity_conv_1x1.weight.data.fill_(0.0)
identity_conv_1x1.weight.data.fill_diagonal_(1.0)
identity_conv_1x1.weight.data = identity_conv_1x1.weight.data.unsqueeze(2).unsqueeze(3)
# 融合该1x1卷积和Identity的BN层
identity_conv_1x1 = self.fuse_conv_bn(identity_conv_1x1, self.rbr_identity)
bias_identity_expanded = identity_conv_1x1.bias
weight_identity_expanded = torch.nn.functional.pad(identity_conv_1x1.weight, [1, 1, 1, 1])
else:
bias_identity_expanded = torch.nn.Parameter(torch.zeros_like(rbr_1x1_bias))
weight_identity_expanded = torch.nn.Parameter(torch.zeros_like(weight_1x1_expanded))
# 融合3x3卷积和扩充的1x1卷积的权重和偏置
self.rbr_dense.weight = torch.nn.Parameter(
self.rbr_dense.weight + weight_1x1_expanded + weight_identity_expanded)
self.rbr_dense.bias = torch.nn.Parameter(self.rbr_dense.bias + rbr_1x1_bias + bias_identity_expanded)
self.rbr_reparam = self.rbr_dense
self.deploy = True
if self.rbr_identity is not None:
del self.rbr_identity
self.rbr_identity = None
if self.rbr_1x1 is not None:
del self.rbr_1x1
self.rbr_1x1 = None
if self.rbr_dense is not None:
del self.rbr_dense
self.rbr_dense = None