改进主要分为三步:
1.将主代码添加至common文件中;
2.将主代码中的函数写入yolo.py文件中的parse_model函数;
3.重新配置相关.yaml文件
1.MobileNetv3
1.1主代码
############################################### MobileNet V3 #########################################################
# ---------------------------- MobileBlock start -------------------------------
class h_sigmoid(nn.Module):
def __init__(self, inplace=True):
super(h_sigmoid, self).__init__()
self.relu = nn.ReLU6(inplace=inplace)
def forward(self, x):
return self.relu(x + 3) / 6
class h_swish(nn.Module):
def __init__(self, inplace=True):
super(h_swish, self).__init__()
self.sigmoid = h_sigmoid(inplace=inplace)
def forward(self, x):
return x * self.sigmoid(x)
class SELayer(nn.Module):
def __init__(self, channel, reduction=4):
super(SELayer, self).__init__()
# Squeeze操作
self.avg_pool = nn.AdaptiveAvgPool2d(1)
# Excitation操作(FC+ReLU+FC+Sigmoid)
self.fc = nn.Sequential(
nn.Linear(channel, channel // reduction),
nn.ReLU(inplace=True),
nn.Linear(channel // reduction, channel),
h_sigmoid()
)
def forward(self, x):
b, c, _, _ = x.size()
y = self.avg_pool(x)
y = y.view(b, c)
y = self.fc(y).view(b, c, 1, 1) # 学习到的每一channel的权重
return x * y
class conv_bn_hswish(nn.Module):
"""
This equals to
def conv_3x3_bn(inp, oup, stride):
return nn.Sequential(
nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
nn.BatchNorm2d(oup),
h_swish()
)
"""
def __init__(self, c1, c2, stride):
super(conv_bn_hswish, self).__init__()
self.conv = nn.Conv2d(c1, c2, 3, stride, 1, bias=False)
self.bn = nn.BatchNorm2d(c2)
self.act = h_swish()
def forward(self, x):
return self.act(self.bn(self.conv(x)))
def fuseforward(self, x):
return self.act(self.conv(x))
class MobileNet_Block(nn.Module):
def __init__(self, inp, oup, hidden_dim, kernel_size, stride, use_se, use_hs):
super(MobileNet_Block, self).__init__()
assert stride in [1, 2]
self.identity = stride == 1 and inp == oup
# 输入通道数=扩张通道数 则不进行通道扩张
if inp == hidden_dim:
self.conv = nn.Sequential(
# dw
nn.Conv2d(hidden_dim, hidden_dim, kernel_size, stride, (kernel_size - 1) // 2, groups=hidden_dim,
bias=False),
nn.BatchNorm2d(hidden_dim),
h_swish() if use_hs else nn.ReLU(inplace=True),
# Squeeze-and-Excite
SELayer(hidden_dim) if use_se else nn.Sequential(),
# pw-linear
nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
nn.BatchNorm2d(oup),
)
else:
# 否则 先进行通道扩张
self.conv = nn.Sequential(
# pw
nn.Conv2d(inp, hidden_dim, 1, 1, 0, bias=False),
nn.BatchNorm2d(hidden_dim),
h_swish() if use_hs else nn.ReLU(inplace=True),
# dw
nn.Conv2d(hidden_dim, hidden_dim, kernel_size, stride, (kernel_size - 1) // 2, groups=hidden_dim,
bias=False),
nn.BatchNorm2d(hidden_dim),
# Squeeze-and-Excite
SELayer(hidden_dim) if use_se else nn.Sequential(),
h_swish() if use_hs else nn.ReLU(inplace=True),
# pw-linear
nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
nn.BatchNorm2d(oup),
)
def forward(self, x):
y = self.conv(x)
if self.identity:
return x + y
else:
return y
# ---------------------------- MobileBlock end ---------------------------------
1.2 .yaml配置文件
# YOLOv5 v6.0 backbone
backbone:
# MobileNetV3-small 11层
# [from, number, module, args]
# MobileNet_Block: [out_ch, hidden_ch, kernel_size, stride, use_se, use_hs]
# hidden_ch表示在Inverted residuals中的扩张通道数
# use_se 表示是否使用 SELayer, use_hs 表示使用 h_swish 还是 ReLU
[[-1, 1, conv_bn_hswish, [16, 2]], # 0-p1/2
[-1, 1, MobileNet_Block, [16, 16, 3, 2, 1, 0]], # 1-p2/4
[-1, 1, MobileNet_Block, [24, 72, 3, 2, 0, 0]], # 2-p3/8
[-1, 1, MobileNet_Block, [24, 88, 3, 1, 0, 0]], # 3-p3/8
[-1, 1, MobileNet_Block, [40, 96, 5, 2, 1, 1]], # 4-p4/16
[-1, 1, MobileNet_Block, [40, 240, 5, 1, 1, 1]], # 5-p4/16
[-1, 1, MobileNet_Block, [40, 240, 5, 1, 1, 1]], # 6-p4/16
[-1, 1, MobileNet_Block, [48, 120, 5, 1, 1, 1]], # 7-p4/16
[-1, 1, MobileNet_Block, [48, 144, 5, 1, 1, 1]], # 8-p4/16
[-1, 1, MobileNet_Block, [96, 288, 5, 2, 1, 1]], # 9-p5/32
[-1, 1, MobileNet_Block, [96, 576, 5, 1, 1, 1]], # 10-p5/32
[-1, 1, MobileNet_Block, [96, 576, 5, 1, 1, 1]], # 11-p5/32
]
# YOLOv5 v6.0 head
2.ShuffleNetV2
2.1主代码
#############################################ShuffleNet V2############################################
def shuffle_channel(x, num_groups):
"""channel shuffle 的常规实现
"""
batch_size, num_channels, height, width = x.size()
assert num_channels % num_groups == 0
a=torch.div(num_channels,num_groups, rounding_mode='trunc')
x = x.view(batch_size, num_groups, a, height, width)
x = x.permute(0, 2, 1, 3, 4)
return x.contiguous().view(batch_size, num_channels, height, width)
class CBRM(nn.Module): #conv BN ReLU Maxpool2d
def __init__(self, c1, c2): # ch_in, ch_out
super(CBRM, self).__init__()
self.conv = nn.Sequential(
nn.Conv2d(c1, c2, kernel_size=3, stride=2, padding=1, bias=False),
nn.BatchNorm2d(c2),
nn.ReLU(inplace=True),
)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
def forward(self, x):
return self.maxpool(self.conv(x))
class Shuffle_Block(nn.Module):
def __init__(self, ch_in, ch_out, stride):
super(Shuffle_Block, self).__init__()
if not (1 <= stride <= 2):
raise ValueError('illegal stride value')
self.stride = stride
branch_features = ch_out // 2
assert (self.stride != 1) or (ch_in == branch_features << 1)
if self.stride > 1:
self.branch1 = nn.Sequential(
self.depthwise_conv(ch_in, ch_in, kernel_size=3, stride=self.stride, padding=1),
nn.BatchNorm2d(ch_in),
nn.Conv2d(ch_in, branch_features, kernel_size=1, stride=1, padding=0, bias=False),
nn.BatchNorm2d(branch_features),
nn.ReLU(inplace=True),
)
##DWConv 深度可分离卷积=分离卷积+逐点卷积
##self.branch1=dwconv+Pointconv
##self.branch2=dwconv+pointconv
self.branch2 = nn.Sequential(
nn.Conv2d(ch_in if (self.stride > 1) else branch_features,
branch_features, kernel_size=1, stride=1, padding=0, bias=False),
nn.BatchNorm2d(branch_features),
nn.ReLU(inplace=True),
self.depthwise_conv(branch_features, branch_features, kernel_size=3, stride=self.stride, padding=1),
nn.BatchNorm2d(branch_features),
nn.Conv2d(branch_features, branch_features, kernel_size=1, stride=1, padding=0, bias=False),
nn.BatchNorm2d(branch_features),
nn.ReLU(inplace=True),
)
@staticmethod
def depthwise_conv(i, o, kernel_size, stride=1, padding=0, bias=False):
return nn.Conv2d(i, o, kernel_size, stride, padding, bias=bias, groups=i) #分离卷积
def forward(self, x):
if self.stride == 1:
x1, x2 = x.chunk(2, dim=1) # 按照维度1进行split
out = torch.cat((x1, self.branch2(x2)), dim=1)
else:
out = torch.cat((self.branch1(x), self.branch2(x)), dim=1)
out = shuffle_channel(out, 2)
return out
2.2 .yaml配置文件
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
# Shuffle_Block: [out, stride]
[[ -1, 1, CBRM, [ 32 ] ], # 0-P2/4
[ -1, 1, Shuffle_Block, [ 128, 2 ] ], # 1-P3/8
[ -1, 1, Shuffle_Block, [ 128, 1 ] ], # 2
[ -1, 1, Shuffle_Block, [ 256, 2 ] ], # 3-P4/16
[ -1, 1, Shuffle_Block, [ 256, 1 ] ], # 4
[ -1, 1, Shuffle_Block, [ 512, 2 ] ], # 5-P5/32
[ -1, 1, Shuffle_Block, [ 512, 1 ] ], # 6
]
# YOLOv5 v6.0 head
3.EfficientNetv2
3.1主代码
class stem(nn.Module):
def __init__(self, c1, c2, kernel_size=3, stride=1, groups=1):
super().__init__()
# kernel_size为3时,padding 为1,kernel为1时,padding为0
padding = (kernel_size - 1) // 2
# 由于要加bn层,所以不加偏置
self.conv = nn.Conv2d(c1, c2, kernel_size, stride, padding=padding, groups=groups, bias=False)
self.bn = nn.BatchNorm2d(c2, eps=1e-3, momentum=0.1)
self.act = nn.SiLU(inplace=True)
def forward(self, x):
# print(x.shape)
x = self.conv(x)
x = self.bn(x)
x = self.act(x)
return x
def drop_path(x, drop_prob: float = 0., training: bool = False):
if drop_prob == 0. or not training:
return x
keep_prob = 1 - drop_prob
shape = (x.shape[0],) + (1,) * (x.ndim - 1)
random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device)
random_tensor.floor_() # binarize
output = x.div(keep_prob) * random_tensor
return output
class DropPath(nn.Module):
def __init__(self, drop_prob=None):
super(DropPath, self).__init__()
self.drop_prob = drop_prob
def forward(self, x):
return drop_path(x, self.drop_prob, self.training)
class SqueezeExcite_efficientv2(nn.Module):
def __init__(self, c1, c2, se_ratio=0.25, act_layer=nn.ReLU):
super().__init__()
self.gate_fn = nn.Sigmoid()
reduced_chs = int(c1 * se_ratio)
self.avg_pool = nn.AdaptiveAvgPool2d(1)
self.conv_reduce = nn.Conv2d(c1, reduced_chs, 1, bias=True)
self.act1 = act_layer(inplace=True)
self.conv_expand = nn.Conv2d(reduced_chs, c2, 1, bias=True)
def forward(self, x):
# 先全局平均池化
x_se = self.avg_pool(x)
# 再全连接(这里是用的1x1卷积,效果与全连接一样,但速度快)
x_se = self.conv_reduce(x_se)
# ReLU激活
x_se = self.act1(x_se)
# 再全连接
x_se = self.conv_expand(x_se)
# sigmoid激活
x_se = self.gate_fn(x_se)
# 将x_se 维度扩展为和x一样的维度
x = x * (x_se.expand_as(x))
return x
# Fused-MBConv 将 MBConv 中的 depthwise conv3×3 和扩展 conv1×1 替换为单个常规 conv3×3。
class FusedMBConv(nn.Module):
def __init__(self, c1, c2, k=3, s=1, expansion=1, se_ration=0, dropout_rate=0.2, drop_connect_rate=0.2):
super().__init__()
# shorcut 是指到残差结构 expansion是为了先升维,再卷积,再降维,再残差
self.has_shortcut = (s == 1 and c1 == c2) # 只要是步长为1并且输入输出特征图大小相等,就是True 就可以使用到残差结构连接
self.has_expansion = expansion != 1 # expansion==1 为false expansion不为1时,输出特征图维度就为expansion*c1,k倍的c1,扩展维度
expanded_c = c1 * expansion
if self.has_expansion:
self.expansion_conv = stem(c1, expanded_c, kernel_size=k, stride=s)
self.project_conv = stem(expanded_c, c2, kernel_size=1, stride=1)
else:
self.project_conv = stem(c1, c2, kernel_size=k, stride=s)
self.drop_connect_rate = drop_connect_rate
if self.has_shortcut and drop_connect_rate > 0:
self.dropout = DropPath(drop_connect_rate)
def forward(self, x):
if self.has_expansion:
result = self.expansion_conv(x)
result = self.project_conv(result)
else:
result = self.project_conv(x)
if self.has_shortcut:
if self.drop_connect_rate > 0:
result = self.dropout(result)
result += x
return result
class MBConv(nn.Module):
def __init__(self, c1, c2, k=3, s=1, expansion=1, se_ration=0, dropout_rate=0.2, drop_connect_rate=0.2):
super().__init__()
self.has_shortcut = (s == 1 and c1 == c2)
expanded_c = c1 * expansion
self.expansion_conv = stem(c1, expanded_c, kernel_size=1, stride=1)
self.dw_conv = stem(expanded_c, expanded_c, kernel_size=k, stride=s, groups=expanded_c)
self.se = SqueezeExcite_efficientv2(expanded_c, expanded_c, se_ration) if se_ration > 0 else nn.Identity()
self.project_conv = stem(expanded_c, c2, kernel_size=1, stride=1)
self.drop_connect_rate = drop_connect_rate
if self.has_shortcut and drop_connect_rate > 0:
self.dropout = DropPath(drop_connect_rate)
def forward(self, x):
# 先用1x1的卷积增加升维
result = self.expansion_conv(x)
# 再用一般的卷积特征提取
result = self.dw_conv(result)
# 添加se模块
result = self.se(result)
# 再用1x1的卷积降维
result = self.project_conv(result)
# 如果使用shortcut连接,则加入dropout操作
if self.has_shortcut:
if self.drop_connect_rate > 0:
result = self.dropout(result)
# shortcut就是到残差结构,输入输入的channel大小相等,这样就能相加了
result += x
return result
# ------------------------------Efficientnetv2 end--------------------------------------
3.2 .yaml配置文件
# YOLOv5 v6.0 backbone
backbone:
[[-1, 1, stem, [24, 3, 2]], # 0-P1/2 efficientnetv2 一开始是Stem = 普通的卷积+bn+激活 640*640*3 --> 320*320*24
# # [out_channel,kernel_size,stride,expansion,se_ration]
[-1, 2, FusedMBConv, [24, 3, 1, 1, 0]], # 1 2个FusedMBConv=3*3conv+se+1*1conv 320*320*24-->320*320*24
[-1, 1, FusedMBConv, [48, 3, 2, 4, 0]], # 2 这里strid2=2,特征图尺寸缩小一半,expansion=4输出特征图的深度变为原来的4倍 320*320*24-->160*160*48
[-1, 3, FusedMBConv, [48, 3, 1, 4, 0]], # 3 三个FusedMBConv
[-1, 1, FusedMBConv, [64, 3, 2, 4, 0]], # 4 160*160*48-->80*80*64
[-1, 3, FusedMBConv, [64, 3, 1, 4, 0]], # 5
[-1, 1, MBConv, [128, 3, 2, 4, 0.25]], # 6 这里strid2=2,特征图尺寸缩小一半, 40*40*128
[-1, 5, MBConv, [128, 3, 1, 4, 0.25]], # 7
[-1, 1, MBConv, [160, 3, 2, 6, 0.25]], # 8 这里 strid2=2,特征图尺寸缩小一半,20*20*160
[-1, 8, MBConv, [160, 3, 1, 6, 0.25]], # 9
[-1, 1, MBConv, [256, 3, 2, 4, 0.25]], # 10 这里strid2=2,特征图尺寸缩小一半,10*10*160
[-1, 14, MBConv, [256, 3, 1, 4, 0.25]], # 11
[-1, 1, SPPF, [1024, 5]], #12
]
# YOLOv5 v6.0 head
如果喜欢,请给我点赞、关注哦