yolov5将特征提取网络改为fasternet结构

yolov5增加fasternet结构

论文地址:https://arxiv.org/abs/2303.03667
需要先安装timm
pip install timm
引入 DropPath
from timm.models.layers import DropPath

方案一:使用faster net backbone替换yolobackbone

创建网络配置文件:
yolov5-pconv.yaml

# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 
# Parameters
nc: 80  # number of classes
depth_multiple: 1  # model depth multiple
width_multiple: 1  # layer channel multiple
 
anchors:
  - [10,13, 16,30, 33,23]  # P3/8
  - [30,61, 62,45, 59,119]  # P4/16
  - [116,90, 156,198, 373,326]  # P5/32
 
# YOLOv5 v6.0 backbone
backbone:
  # [from, number, module, args]
  [[-1, 1, PatchEmbed, [40, 4, 4, nn.BatchNorm2d]],  # 0-P1/4
   [-1, 1, BasicStage, [40, 1]],  # 1
   [-1, 1, PatchMerging, [80,2,2]],  # 2-P2/8
   [-1, 1, BasicStage, [80, 1]],  # 3
   [-1, 1, BasicStage, [80, 1]],  # 4
   [-1, 1, PatchMerging, [160,2,2]],  # 5
   [-1, 1, BasicStage, [160, 1]],  # 6
   [-1, 1, BasicStage, [160, 1]],  # 7
   [-1, 1, BasicStage, [160, 1]],  # 8
   [-1, 1, BasicStage, [160, 1]],  # 9
   [-1, 1, BasicStage, [160, 1]],  # 10
   [-1, 1, BasicStage, [160, 1]],  # 11
   [-1, 1, BasicStage, [160, 1]],  # 12
   [-1, 1, BasicStage, [160, 1]],  # 13
   [-1, 1, PatchMerging, [320,2,2]], # 14
   [-1, 1, BasicStage, [320, 1]], # 15
   [-1, 1, BasicStage, [320, 1]], # 16
   [-1, 1, SPPF, [320, 5]],  # 17
  ]
 
# YOLOv5 v6.0 head
head:
  [[-1, 1, Conv, [256, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 13], 1, Concat, [1]],  # cat backbone P4
   [-1, 3, C3, [256, False]],  # 21
 
   [-1, 1, Conv, [128, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
   [-1, 3, C3, [128, False]],  # 25 (P3/8-small)
 
   [-1, 1, Conv, [128, 3, 2]],
   [[-1, 22], 1, Concat, [1]],  # cat head P4
   [-1, 3, C3, [256, False]],  # 28 (P4/16-medium)
 
   [-1, 1, Conv, [256, 3, 2]],
   [[-1, 18], 1, Concat, [1]],  # cat head P5
   [-1, 3, C3, [512, False]],  # 31 (P5/32-large)
 
   [[25, 28, 31], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
  ]

在models\common.py中添加:

class Partial_conv3(nn.Module):
    def __init__(self, dim, n_div, forward):
        super().__init__()
        self.dim_conv3 = dim // n_div
        self.dim_untouched = dim - self.dim_conv3
        self.partial_conv3 = nn.Conv2d(self.dim_conv3, self.dim_conv3, 3, 1, 1, bias=False)
 
        if forward == 'slicing':
            self.forward = self.forward_slicing
        elif forward == 'split_cat':
            self.forward = self.forward_split_cat
        else:
            raise NotImplementedError
 
    def forward_slicing(self, x):
        # only for inference
        x = x.clone()  # !!! Keep the original input intact for the residual connection later
        x[:, :self.dim_conv3, :, :] = self.partial_conv3(x[:, :self.dim_conv3, :, :])
 
        return x
 
    def forward_split_cat(self, x):
        # for training/inference
        x1, x2 = torch.split(x, [self.dim_conv3, self.dim_untouched], dim=1)
        x1 = self.partial_conv3(x1)
        x = torch.cat((x1, x2), 1)
        return x
 
 
class MLPBlock(nn.Module):
    def __init__(self,
                 dim,
                 n_div,
                 mlp_ratio,
                 drop_path,
                 layer_scale_init_value,
                 act_layer,
                 norm_layer,
                 pconv_fw_type
                 ):
 
        super().__init__()
        self.dim = dim
        self.mlp_ratio = mlp_ratio
        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
        self.n_div = n_div
 
        mlp_hidden_dim = int(dim * mlp_ratio)
 
        mlp_layer = [
            nn.Conv2d(dim, mlp_hidden_dim, 1, bias=False),
            norm_layer(mlp_hidden_dim),
            act_layer(),
            nn.Conv2d(mlp_hidden_dim, dim, 1, bias=False)
        ]
 
        self.mlp = nn.Sequential(*mlp_layer)
 
        self.spatial_mixing = Partial_conv3(
            dim,
            n_div,
            pconv_fw_type
        )
 
        if layer_scale_init_value > 0:
            self.layer_scale = nn.Parameter(layer_scale_init_value * torch.ones((dim)), requires_grad=True)
            self.forward = self.forward_layer_scale
        else:
            self.forward = self.forward
 
    def forward(self, x):
        shortcut = x
        x = self.spatial_mixing(x)
        x = shortcut + self.drop_path(self.mlp(x))
        return x
 
    def forward_layer_scale(self, x):
        shortcut = x
        x = self.spatial_mixing(x)
        x = shortcut + self.drop_path(
            self.layer_scale.unsqueeze(-1).unsqueeze(-1) * self.mlp(x))
        return x
 
 
class BasicStage(nn.Module):
    def __init__(self,
                 dim,
                 depth=1,
                 n_div=4,
                 mlp_ratio=2,
                 # drop_path=[0.0],
                 layer_scale_init_value=0,
                 norm_layer=nn.BatchNorm2d,
                 act_layer=nn.ReLU,
                 pconv_fw_type='split_cat'
                 ):
        super().__init__()
        dpr = [x.item()
               for x in torch.linspace(0, 0.0, sum([1, 2, 8, 2]))]
        blocks_list = [
            MLPBlock(
                dim=dim,
                n_div=n_div,
                mlp_ratio=mlp_ratio,
                drop_path=dpr[i],
                layer_scale_init_value=layer_scale_init_value,
                norm_layer=norm_layer,
                act_layer=act_layer,
                pconv_fw_type=pconv_fw_type
            )
            for i in range(depth)
        ]
 
        self.blocks = nn.Sequential(*blocks_list)
 
    def forward(self, x):
        x = self.blocks(x)
        return x
 
 
class PatchEmbed(nn.Module):
 
    def __init__(self, in_chans, embed_dim, patch_size, patch_stride, norm_layer):
        super().__init__()
        self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_stride, bias=False)
        if norm_layer is not None:
            self.norm = norm_layer(embed_dim)
        else:
            self.norm = nn.Identity()
 
    def forward(self, x):
        x = self.norm(self.proj(x))
        return x
 
    def fuseforward(self, x):
        x = self.proj(x)
        return x
 
 
class PatchMerging(nn.Module):
 
    def __init__(self, dim, out_dim, k, patch_stride2, norm_layer=nn.BatchNorm2d):
        super().__init__()
        self.reduction = nn.Conv2d(dim, out_dim, kernel_size=k, stride=patch_stride2, bias=False)
        if norm_layer is not None:
            self.norm = norm_layer(out_dim)
        else:
            self.norm = nn.Identity()
 
    def forward(self, x):
        x = self.norm(self.reduction(x))
        return x
 
    def fuseforward(self, x):
        x = self.reduction(x)
        return x

在yolo.py中的parse_model中做如下修改

def parse_model(d, ch):  # model_dict, input_channels(3)
    # Parse a YOLOv5 model.yaml dictionary
    LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10}  {'module':<40}{'arguments':<30}")
    anchors, nc, gd, gw, act = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple'], d.get('activation')
    if act:
        Conv.default_act = eval(act)  # redefine default activation, i.e. Conv.default_act = nn.SiLU()
        LOGGER.info(f"{colorstr('activation:')} {act}")  # print
    na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors  # number of anchors
    no = na * (nc + 5)  # number of outputs = anchors * (classes + 5)
 
    layers, save, c2 = [], [], ch[-1]  # layers, savelist, ch out
    for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']):  # from, number, module, args
        m = eval(m) if isinstance(m, str) else m  # eval strings
        for j, a in enumerate(args):
            with contextlib.suppress(NameError):
                args[j] = eval(a) if isinstance(a, str) else a  # eval strings
 
        n = n_ = max(round(n * gd), 1) if n > 1 else n  # depth gain
        if m in {
            Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv,
            BottleneckCSP, C3, C3TR, C3SPP, C3Ghost, nn.ConvTranspose2d, DWConvTranspose2d, C3x, StemBlock,
            BlazeBlock, DoubleBlazeBlock, ShuffleV2Block, MobileBottleneck, InvertedResidual, ConvBNReLU,
            RepVGGBlock, SEBlock, RepBlock, SimCSPSPPF,BasicStage,PatchEmbed,PatchMerging}:
            c1, c2 = ch[f], args[0]
            if c2 != no:  # if not output
                c2 = make_divisible(c2 * gw, 8)
            if m == InvertedResidual:
                c2 = make_divisible(c2 * gw, 4 if gw == 0.1 else 8)
                
            args = [c1, c2, *args[1:]]
            if m in {BottleneckCSP, C3, C3TR, C3Ghost, C3x}:
                args.insert(2, n)  # number of repeats
                n = 1
            if m in {BasicStage}:
                args.pop(1)
 
        elif m is nn.BatchNorm2d:
            args = [ch[f]]
        elif m is Concat:
            c2 = sum(ch[x] for x in f)
        # TODO: channel, gw, gd
        elif m in {Detect}:
            args.append([ch[x] for x in f])
            if isinstance(args[1], int):  # number of anchors
                args[1] = [list(range(args[1] * 2))] * len(f)
        elif m is Contract:
            c2 = ch[f] * args[0] ** 2
        elif m is Expand:
            c2 = ch[f] // args[0] ** 2
        else:
            c2 = ch[f]
 
        m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args)  # module
        t = str(m)[8:-2].replace('__main__.', '')  # module type
        np = sum(x.numel() for x in m_.parameters())  # number params
        m_.i, m_.f, m_.type, m_.np = i, f, t, np  # attach index, 'from' index, type, number params
        LOGGER.info(f'{i:>3}{str(f):>18}{n_:>3}{np:10.0f}  {t:<40}{str(args):<30}')  # print
        save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1)  # append to savelist
        layers.append(m_)
        if i == 0:
            ch = []
        ch.append(c2)
    return nn.Sequential(*layers), sorted(save)

在yolo.py中的BaseModel中的fuse函数做如下修改:

    def fuse(self):  # fuse model Conv2d() + BatchNorm2d() layers
        LOGGER.info('Fusing layers... ')
        for m in self.model.modules():
            if isinstance(m, (Conv, DWConv)) and hasattr(m, 'bn'):
                m.conv = fuse_conv_and_bn(m.conv, m.bn)  # update conv
                delattr(m, 'bn')  # remove batchnorm
                m.forward = m.forward_fuse  # update forward
                
            if type(m) is PatchEmbed:
                m.proj = fuse_conv_and_bn(m.proj, m.norm)
                delattr(m, 'norm')  # remove batchnorm
                m.forward = m.fuseforward
            if type(m) is PatchMerging:
                m.reduction = fuse_conv_and_bn(m.reduction, m.norm)
                delattr(m, 'norm')  # remove batchnorm
                m.forward = m.fuseforward
            if type(m) is MLPBlock:
                if hasattr(m, 'mlp'):
                    re_mlp = nn.Sequential(
                        nn.Conv2d(m.mlp[0].in_channels, m.mlp[0].out_channels,
                                  kernel_size=m.mlp[0].kernel_size, stride=m.mlp[0].stride,
                                  padding=m.mlp[0].padding, groups=m.mlp[0].groups),
                        nn.ReLU(inplace=True),
                        nn.Conv2d(m.mlp[3].in_channels, m.mlp[3].out_channels,
                                  kernel_size=m.mlp[3].kernel_size, stride=m.mlp[3].stride,
                                  padding=m.mlp[3].padding, bias=False),
                    )
                    re_mlp[0] = fuse_conv_and_bn(m.mlp[0], m.mlp[1])
                    # delattr(m, 'se')
                    m.mlp = re_mlp

方案二: 用Pconv替换C3中的conv

配置文件:

# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 
# Parameters
nc: 80  # number of classes
depth_multiple: 0.33  # model depth multiple
width_multiple: 0.50  # layer channel multiple
anchors:
  - [10,13, 16,30, 33,23]  # P3/8
  - [30,61, 62,45, 59,119]  # P4/16
  - [116,90, 156,198, 373,326]  # P5/32
 
# YOLOv5 v6.0 backbone
backbone:
  # [from, number, module, args]
  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
   [-1, 3, C3_P, [128]],
   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
   [-1, 6, C3_P, [256]],
   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
   [-1, 9, C3_P, [512]],
   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
   [-1, 3, C3_P, [1024]],
   [-1, 1, SPPF, [1024, 5]],  # 9
  ]
 
# YOLOv5 v6.0 head
head:
  [[-1, 1, Conv, [512, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
   [-1, 3, C3_P, [512, False]],  # 13
 
   [-1, 1, Conv, [256, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
   [-1, 3, C3_P, [256, False]],  # 17 (P3/8-small)
 
   [-1, 1, Conv, [256, 3, 2]],
   [[-1, 14], 1, Concat, [1]],  # cat head P4
   [-1, 3, C3_P, [512, False]],  # 20 (P4/16-medium)
 
   [-1, 1, Conv, [512, 3, 2]],
   [[-1, 10], 1, Concat, [1]],  # cat head P5
   [-1, 3, C3_P, [1024, False]],  # 23 (P5/32-large)
 
   [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
  ]

在models\common.py中添加:

class Partial_conv3(nn.Module):
    def __init__(self, dim, n_div, forward):
        super().__init__()
        self.dim_conv3 = dim // n_div
        self.dim_untouched = dim - self.dim_conv3
        self.partial_conv3 = nn.Conv2d(self.dim_conv3, self.dim_conv3, 3, 1, 1, bias=False)
 
        if forward == 'slicing':
            self.forward = self.forward_slicing
        elif forward == 'split_cat':
            self.forward = self.forward_split_cat
        else:
            raise NotImplementedError
 
    def forward_slicing(self, x):
        # only for inference
        x = x.clone()  # !!! Keep the original input intact for the residual connection later
        x[:, :self.dim_conv3, :, :] = self.partial_conv3(x[:, :self.dim_conv3, :, :])
        return x
 
    def forward_split_cat(self, x):
        # for training/inference
        x1, x2 = torch.split(x, [self.dim_conv3, self.dim_untouched], dim=1)
        x1 = self.partial_conv3(x1)
        x = torch.cat((x1, x2), 1)
        return x
 
 
class MLPBlock(nn.Module):
    def __init__(self,
                 c1,
                 c2,
                 n_div=4,
                 mlp_ratio=2,
                 drop_path=0.1,
                 layer_scale_init_value=0.0,
                 pconv_fw_type='split_cat'
                 ):
        super().__init__()
        self.dim = c2
        self.mlp_ratio = mlp_ratio
        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
        self.n_div = n_div
 
        mlp_hidden_dim = int(c2 * mlp_ratio)
        mlp_layer = [
            Conv(c2, mlp_hidden_dim, 1),
            nn.Conv2d(mlp_hidden_dim, c2, 1, bias=False)
        ]
 
        self.mlp = nn.Sequential(*mlp_layer)
        self.spatial_mixing = Partial_conv3(c2, n_div, pconv_fw_type)
 
        if layer_scale_init_value > 0:
            self.layer_scale = nn.Parameter(layer_scale_init_value * torch.ones((c2)), requires_grad=True)
            self.forward = self.forward_layer_scale
        else:
            self.forward = self.forward
 
    def forward(self, x):
        shortcut = x
        x = self.spatial_mixing(x)
        x = shortcut + self.drop_path(self.mlp(x))
        return x
 
    def forward_layer_scale(self, x):
        shortcut = x
        x = self.spatial_mixing(x)
        x = shortcut + self.drop_path(
            self.layer_scale.unsqueeze(-1).unsqueeze(-1) * self.mlp(x))
        return x
 
 
class C3_P(C3):
    # C3 module with cross-convolutions
    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
        super().__init__(c1, c2, n, shortcut, g, e)
        c_ = int(c2 * e)
        self.m = nn.Sequential(*(MLPBlock(c_, c_) for _ in range(n)))

yolo.py中的parse_model中:

        if m in {
            Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv,
            BottleneckCSP, C3, C3TR, C3SPP, C3Ghost, nn.ConvTranspose2d, DWConvTranspose2d, C3x, StemBlock,
            BlazeBlock, DoubleBlazeBlock, ShuffleV2Block, MobileBottleneck, InvertedResidual, ConvBNReLU,
            RepVGGBlock, SEBlock, RepBlock, SimCSPSPPF, C3_P, SPPCSPC, RepConv}:
            c1, c2 = ch[f], args[0]
            if c2 != no:  # if not output
                c2 = make_divisible(c2 * gw, 8)
            if m == InvertedResidual:
                c2 = make_divisible(c2 * gw, 4 if gw == 0.1 else 8)
                
            args = [c1, c2, *args[1:]]
            if m in {BottleneckCSP, C3, C3TR, C3Ghost, C3x, C3_P}:
                args.insert(2, n)  # number of repeats
                n = 1
  • 0
    点赞
  • 8
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

acanab

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值