多种轻量化算法改进YOLOv5模型

yolo我不会

已于 2023-08-31 16:45:07 修改

阅读量1.6k

点赞数 6

文章标签： YOLO python pycharm

于 2023-08-30 20:44:19 首次发布

本文链接：https://blog.csdn.net/m0_61348166/article/details/132590076

版权

改进主要分为三步:

1.将主代码添加至common文件中；

2.将主代码中的函数写入yolo.py文件中的parse_model函数；

3.重新配置相关.yaml文件

1.MobileNetv3

1.1主代码

###############################################  MobileNet V3  #########################################################
# ---------------------------- MobileBlock start -------------------------------
class h_sigmoid(nn.Module):
    def __init__(self, inplace=True):
        super(h_sigmoid, self).__init__()
        self.relu = nn.ReLU6(inplace=inplace)

    def forward(self, x):
        return self.relu(x + 3) / 6


class h_swish(nn.Module):
    def __init__(self, inplace=True):
        super(h_swish, self).__init__()
        self.sigmoid = h_sigmoid(inplace=inplace)

    def forward(self, x):
        return x * self.sigmoid(x)


class SELayer(nn.Module):
    def __init__(self, channel, reduction=4):
        super(SELayer, self).__init__()
        # Squeeze操作
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        # Excitation操作(FC+ReLU+FC+Sigmoid)
        self.fc = nn.Sequential(
            nn.Linear(channel, channel // reduction),
            nn.ReLU(inplace=True),
            nn.Linear(channel // reduction, channel),
            h_sigmoid()
        )

    def forward(self, x):
        b, c, _, _ = x.size()
        y = self.avg_pool(x)
        y = y.view(b, c)
        y = self.fc(y).view(b, c, 1, 1)  # 学习到的每一channel的权重
        return x * y


class conv_bn_hswish(nn.Module):
    """
    This equals to
    def conv_3x3_bn(inp, oup, stride):
        return nn.Sequential(
            nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
            nn.BatchNorm2d(oup),
            h_swish()
        )
    """

    def __init__(self, c1, c2, stride):
        super(conv_bn_hswish, self).__init__()
        self.conv = nn.Conv2d(c1, c2, 3, stride, 1, bias=False)
        self.bn = nn.BatchNorm2d(c2)
        self.act = h_swish()

    def forward(self, x):
        return self.act(self.bn(self.conv(x)))

    def fuseforward(self, x):
        return self.act(self.conv(x))


class MobileNet_Block(nn.Module):
    def __init__(self, inp, oup, hidden_dim, kernel_size, stride, use_se, use_hs):
        super(MobileNet_Block, self).__init__()
        assert stride in [1, 2]

        self.identity = stride == 1 and inp == oup

        # 输入通道数=扩张通道数 则不进行通道扩张
        if inp == hidden_dim:
            self.conv = nn.Sequential(
                # dw
                nn.Conv2d(hidden_dim, hidden_dim, kernel_size, stride, (kernel_size - 1) // 2, groups=hidden_dim,
                          bias=False),
                nn.BatchNorm2d(hidden_dim),
                h_swish() if use_hs else nn.ReLU(inplace=True),
                # Squeeze-and-Excite
                SELayer(hidden_dim) if use_se else nn.Sequential(),
                # pw-linear
                nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
                nn.BatchNorm2d(oup),
            )
        else:
            # 否则 先进行通道扩张
            self.conv = nn.Sequential(
                # pw
                nn.Conv2d(inp, hidden_dim, 1, 1, 0, bias=False),
                nn.BatchNorm2d(hidden_dim),
                h_swish() if use_hs else nn.ReLU(inplace=True),
                # dw
                nn.Conv2d(hidden_dim, hidden_dim, kernel_size, stride, (kernel_size - 1) // 2, groups=hidden_dim,
                          bias=False),
                nn.BatchNorm2d(hidden_dim),
                # Squeeze-and-Excite
                SELayer(hidden_dim) if use_se else nn.Sequential(),
                h_swish() if use_hs else nn.ReLU(inplace=True),
                # pw-linear
                nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
                nn.BatchNorm2d(oup),
            )

    def forward(self, x):
        y = self.conv(x)
        if self.identity:
            return x + y
        else:
            return y
# ---------------------------- MobileBlock end ---------------------------------

1.2 .yaml配置文件

# YOLOv5 v6.0 backbone
backbone:
  # MobileNetV3-small 11层
  # [from, number, module, args]
  # MobileNet_Block: [out_ch, hidden_ch, kernel_size, stride, use_se, use_hs]
  # hidden_ch表示在Inverted residuals中的扩张通道数
  # use_se 表示是否使用 SELayer, use_hs 表示使用 h_swish 还是 ReLU
  [[-1, 1, conv_bn_hswish, [16, 2]],                 # 0-p1/2
   [-1, 1, MobileNet_Block, [16,  16, 3, 2, 1, 0]],  # 1-p2/4
   [-1, 1, MobileNet_Block, [24,  72, 3, 2, 0, 0]],  # 2-p3/8
   [-1, 1, MobileNet_Block, [24,  88, 3, 1, 0, 0]],  # 3-p3/8
   [-1, 1, MobileNet_Block, [40,  96, 5, 2, 1, 1]],  # 4-p4/16
   [-1, 1, MobileNet_Block, [40, 240, 5, 1, 1, 1]],  # 5-p4/16
   [-1, 1, MobileNet_Block, [40, 240, 5, 1, 1, 1]],  # 6-p4/16
   [-1, 1, MobileNet_Block, [48, 120, 5, 1, 1, 1]],  # 7-p4/16
   [-1, 1, MobileNet_Block, [48, 144, 5, 1, 1, 1]],  # 8-p4/16
   [-1, 1, MobileNet_Block, [96, 288, 5, 2, 1, 1]],  # 9-p5/32
   [-1, 1, MobileNet_Block, [96, 576, 5, 1, 1, 1]],  # 10-p5/32
   [-1, 1, MobileNet_Block, [96, 576, 5, 1, 1, 1]],  # 11-p5/32
  ]

# YOLOv5 v6.0 head

2.ShuffleNetV2

2.1主代码

#############################################ShuffleNet V2############################################

def shuffle_channel(x, num_groups):
    """channel shuffle 的常规实现
    """
    batch_size, num_channels, height, width = x.size()
    assert num_channels % num_groups == 0
    a=torch.div(num_channels,num_groups, rounding_mode='trunc')

    x = x.view(batch_size, num_groups, a, height, width)
    x = x.permute(0, 2, 1, 3, 4)
    return x.contiguous().view(batch_size, num_channels, height, width)

class CBRM(nn.Module):           #conv BN ReLU Maxpool2d
    def __init__(self, c1, c2):  # ch_in, ch_out
        super(CBRM, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(c1, c2, kernel_size=3, stride=2, padding=1, bias=False),
            nn.BatchNorm2d(c2),
            nn.ReLU(inplace=True),
        )
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)

    def forward(self, x):
        return self.maxpool(self.conv(x))


class Shuffle_Block(nn.Module):
    def __init__(self, ch_in, ch_out, stride):
        super(Shuffle_Block, self).__init__()

        if not (1 <= stride <= 2):
            raise ValueError('illegal stride value')
        self.stride = stride

        branch_features = ch_out // 2
        assert (self.stride != 1) or (ch_in == branch_features << 1)

        if self.stride > 1:
            self.branch1 = nn.Sequential(
                self.depthwise_conv(ch_in, ch_in, kernel_size=3, stride=self.stride, padding=1),
                nn.BatchNorm2d(ch_in),

                nn.Conv2d(ch_in, branch_features, kernel_size=1, stride=1, padding=0, bias=False),
                nn.BatchNorm2d(branch_features),
                nn.ReLU(inplace=True),
            )
            ##DWConv 深度可分离卷积=分离卷积+逐点卷积
            ##self.branch1=dwconv+Pointconv
            ##self.branch2=dwconv+pointconv

        self.branch2 = nn.Sequential(
            nn.Conv2d(ch_in if (self.stride > 1) else branch_features,
                      branch_features, kernel_size=1, stride=1, padding=0, bias=False),
            nn.BatchNorm2d(branch_features),
            nn.ReLU(inplace=True),

            self.depthwise_conv(branch_features, branch_features, kernel_size=3, stride=self.stride, padding=1),
            nn.BatchNorm2d(branch_features),

            nn.Conv2d(branch_features, branch_features, kernel_size=1, stride=1, padding=0, bias=False),
            nn.BatchNorm2d(branch_features),
            nn.ReLU(inplace=True),
        )

    @staticmethod
    def depthwise_conv(i, o, kernel_size, stride=1, padding=0, bias=False):
        return nn.Conv2d(i, o, kernel_size, stride, padding, bias=bias, groups=i)   #分离卷积

    def forward(self, x):
        if self.stride == 1:
            x1, x2 = x.chunk(2, dim=1)  # 按照维度1进行split
            out = torch.cat((x1, self.branch2(x2)), dim=1)
        else:
            out = torch.cat((self.branch1(x), self.branch2(x)), dim=1)

        out = shuffle_channel(out, 2)

        return out

2.2 .yaml配置文件

# YOLOv5 v6.0 backbone
backbone:
  # [from, number, module, args]
  # Shuffle_Block: [out, stride]
  [[ -1, 1, CBRM, [ 32 ] ], # 0-P2/4
   [ -1, 1, Shuffle_Block, [ 128, 2 ] ],  # 1-P3/8
   [ -1, 1, Shuffle_Block, [ 128, 1 ] ],  # 2
   [ -1, 1, Shuffle_Block, [ 256, 2 ] ],  # 3-P4/16
   [ -1, 1, Shuffle_Block, [ 256, 1 ] ],  # 4
   [ -1, 1, Shuffle_Block, [ 512, 2 ] ],  # 5-P5/32
   [ -1, 1, Shuffle_Block, [ 512, 1 ] ],  # 6
  ]

# YOLOv5 v6.0 head

3.EfficientNetv2

3.1主代码

class stem(nn.Module):
    def __init__(self, c1, c2, kernel_size=3, stride=1, groups=1):
        super().__init__()
        # kernel_size为3时，padding 为1，kernel为1时，padding为0
        padding = (kernel_size - 1) // 2
        # 由于要加bn层，所以不加偏置
        self.conv = nn.Conv2d(c1, c2, kernel_size, stride, padding=padding, groups=groups, bias=False)
        self.bn = nn.BatchNorm2d(c2, eps=1e-3, momentum=0.1)
        self.act = nn.SiLU(inplace=True)

    def forward(self, x):
        # print(x.shape)
        x = self.conv(x)
        x = self.bn(x)
        x = self.act(x)
        return x


def drop_path(x, drop_prob: float = 0., training: bool = False):
    if drop_prob == 0. or not training:
        return x
    keep_prob = 1 - drop_prob
    shape = (x.shape[0],) + (1,) * (x.ndim - 1)
    random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device)
    random_tensor.floor_()  # binarize

    output = x.div(keep_prob) * random_tensor
    return output


class DropPath(nn.Module):
    def __init__(self, drop_prob=None):
        super(DropPath, self).__init__()
        self.drop_prob = drop_prob

    def forward(self, x):
        return drop_path(x, self.drop_prob, self.training)


class SqueezeExcite_efficientv2(nn.Module):
    def __init__(self, c1, c2, se_ratio=0.25, act_layer=nn.ReLU):
        super().__init__()
        self.gate_fn = nn.Sigmoid()
        reduced_chs = int(c1 * se_ratio)
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.conv_reduce = nn.Conv2d(c1, reduced_chs, 1, bias=True)
        self.act1 = act_layer(inplace=True)
        self.conv_expand = nn.Conv2d(reduced_chs, c2, 1, bias=True)

    def forward(self, x):
        # 先全局平均池化
        x_se = self.avg_pool(x)
        # 再全连接（这里是用的1x1卷积，效果与全连接一样，但速度快）
        x_se = self.conv_reduce(x_se)
        # ReLU激活
        x_se = self.act1(x_se)
        # 再全连接
        x_se = self.conv_expand(x_se)
        # sigmoid激活
        x_se = self.gate_fn(x_se)
        # 将x_se 维度扩展为和x一样的维度
        x = x * (x_se.expand_as(x))
        return x

# Fused-MBConv 将 MBConv 中的 depthwise conv3×3 和扩展 conv1×1 替换为单个常规 conv3×3。
class FusedMBConv(nn.Module):
    def __init__(self, c1, c2, k=3, s=1, expansion=1, se_ration=0, dropout_rate=0.2, drop_connect_rate=0.2):
        super().__init__()
        # shorcut 是指到残差结构 expansion是为了先升维，再卷积，再降维，再残差
        self.has_shortcut = (s == 1 and c1 == c2)  # 只要是步长为1并且输入输出特征图大小相等，就是True 就可以使用到残差结构连接
        self.has_expansion = expansion != 1  # expansion==1 为false expansion不为1时，输出特征图维度就为expansion*c1，k倍的c1,扩展维度
        expanded_c = c1 * expansion

        if self.has_expansion:
            self.expansion_conv = stem(c1, expanded_c, kernel_size=k, stride=s)
            self.project_conv = stem(expanded_c, c2, kernel_size=1, stride=1)
        else:
            self.project_conv = stem(c1, c2, kernel_size=k, stride=s)

        self.drop_connect_rate = drop_connect_rate
        if self.has_shortcut and drop_connect_rate > 0:
            self.dropout = DropPath(drop_connect_rate)

    def forward(self, x):
        if self.has_expansion:
            result = self.expansion_conv(x)
            result = self.project_conv(result)
        else:
            result = self.project_conv(x)
        if self.has_shortcut:
            if self.drop_connect_rate > 0:
                result = self.dropout(result)
            result += x

        return result


class MBConv(nn.Module):
    def __init__(self, c1, c2, k=3, s=1, expansion=1, se_ration=0, dropout_rate=0.2, drop_connect_rate=0.2):
        super().__init__()
        self.has_shortcut = (s == 1 and c1 == c2)
        expanded_c = c1 * expansion
        self.expansion_conv = stem(c1, expanded_c, kernel_size=1, stride=1)
        self.dw_conv = stem(expanded_c, expanded_c, kernel_size=k, stride=s, groups=expanded_c)
        self.se = SqueezeExcite_efficientv2(expanded_c, expanded_c, se_ration) if se_ration > 0 else nn.Identity()
        self.project_conv = stem(expanded_c, c2, kernel_size=1, stride=1)
        self.drop_connect_rate = drop_connect_rate
        if self.has_shortcut and drop_connect_rate > 0:
            self.dropout = DropPath(drop_connect_rate)

    def forward(self, x):
        # 先用1x1的卷积增加升维
        result = self.expansion_conv(x)
        # 再用一般的卷积特征提取
        result = self.dw_conv(result)
        # 添加se模块
        result = self.se(result)
        # 再用1x1的卷积降维
        result = self.project_conv(result)
        # 如果使用shortcut连接，则加入dropout操作
        if self.has_shortcut:
            if self.drop_connect_rate > 0:
                result = self.dropout(result)
            # shortcut就是到残差结构，输入输入的channel大小相等，这样就能相加了
            result += x

        return result

# ------------------------------Efficientnetv2  end--------------------------------------

3.2 .yaml配置文件

# YOLOv5 v6.0 backbone
backbone:
  [[-1, 1, stem, [24, 3, 2]],  # 0-P1/2 efficientnetv2 一开始是Stem = 普通的卷积+bn+激活  640*640*3 --> 320*320*24
#                    # [out_channel,kernel_size,stride,expansion,se_ration]
   [-1, 2, FusedMBConv, [24, 3, 1, 1, 0]], # 1 2个FusedMBConv=3*3conv+se+1*1conv   320*320*24-->320*320*24

   [-1, 1, FusedMBConv, [48, 3, 2, 4, 0]], # 2 这里strid2=2，特征图尺寸缩小一半，expansion=4输出特征图的深度变为原来的4倍 320*320*24-->160*160*48
   [-1, 3, FusedMBConv, [48, 3, 1, 4, 0]], # 3 三个FusedMBConv

   [-1, 1, FusedMBConv, [64, 3, 2, 4, 0]], # 4 160*160*48-->80*80*64
   [-1, 3, FusedMBConv, [64, 3, 1, 4, 0]], # 5

   [-1, 1, MBConv, [128, 3, 2, 4, 0.25]], # 6  这里strid2=2，特征图尺寸缩小一半， 40*40*128
   [-1, 5, MBConv, [128, 3, 1, 4, 0.25]], # 7

   [-1, 1, MBConv, [160, 3, 2, 6, 0.25]], # 8 这里 strid2=2，特征图尺寸缩小一半，20*20*160
   [-1, 8, MBConv, [160, 3, 1, 6, 0.25]], # 9

   [-1, 1, MBConv, [256, 3, 2, 4, 0.25]], # 10 这里strid2=2，特征图尺寸缩小一半，10*10*160
   [-1, 14, MBConv, [256, 3, 1, 4, 0.25]], # 11

   [-1, 1, SPPF, [1024, 5]], #12
  ]
# YOLOv5 v6.0 head

如果喜欢，请给我点赞、关注哦