yolov11改进|特征融合金字塔HSFPN|小目标检测

AI白导

已于 2024-11-02 10:32:13 修改

阅读量2.2k

点赞数 20

分类专栏： yolov11 文章标签： YOLO 目标检测深度学习

于 2024-10-31 16:08:49 首次发布

本文链接：https://blog.csdn.net/qq_58467323/article/details/143403856

版权

yolov11 专栏收录该内容

15 篇文章

订阅专栏

一、创建HSFPN.py代码

论文： https://arxiv.org/pdf/2401.00926.pdf

import torch
import torch.nn as nn


class MyChannelAttention_HSFPN(nn.Module):
    def __init__(self, in_planes, ratio=4, flag=True):
        super(MyChannelAttention_HSFPN, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.max_pool = nn.AdaptiveMaxPool2d(1)

        self.conv1 = nn.Conv2d(in_planes, in_planes // ratio, 1, bias=False)
        self.relu = nn.ReLU()
        self.conv2 = nn.Conv2d(in_planes // ratio, in_planes, 1, bias=False)
        self.flag = flag
        self.sigmoid = nn.Sigmoid()

        nn.init.xavier_uniform_(self.conv1.weight)
        nn.init.xavier_uniform_(self.conv2.weight)

    def forward(self, x):
        avg_out = self.conv2(self.relu(self.conv1(self.avg_pool(x))))
        max_out = self.conv2(self.relu(self.conv1(self.max_pool(x))))
        out = avg_out + max_out
        return self.sigmoid(out) * x if self.flag else self.sigmoid(out)


class Multiply(nn.Module):
    def __init__(self) -> None:
        super().__init__()

    def forward(self, x):
        return x[0] * x[1]


class Add(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, x):
        return torch.sum(torch.stack(x, dim=0), dim=0)

二、更改task.py(def parse_model(d, ch, verbose=True))

1.

from ultralytics.nn.modules.HSFPN import MyChannelAttention_HSFPN, Multiply, Add

2.加入nn.conv2d

wAAACH5BAEKAAAALAAAAAABAAEAAAICRAEAOw==

编辑
3.

elif m is MyChannelAttention_HSFPN:
    c2 = ch[f]
    args = [c2, *args]
elif m is Multiply:
    c2 = ch[f[0]]
elif m is Add:
    c2 = ch[f[-1]]

三、配置网络

# Ultralytics YOLO 🚀, AGPL-3.0 license
# YOLO11 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect

# Parameters
nc: 80 # number of classes
scales: # model compound scaling constants, i.e. 'model=yolo11n.yaml' will call yolo11.yaml with scale 'n'
  # [depth, width, max_channels]
  n: [0.50, 0.25, 1024] # summary: 319 layers, 2624080 parameters, 2624064 gradients, 6.6 GFLOPs
  s: [0.50, 0.50, 1024] # summary: 319 layers, 9458752 parameters, 9458736 gradients, 21.7 GFLOPs
  m: [0.50, 1.00, 512] # summary: 409 layers, 20114688 parameters, 20114672 gradients, 68.5 GFLOPs
  l: [1.00, 1.00, 512] # summary: 631 layers, 25372160 parameters, 25372144 gradients, 87.6 GFLOPs
  x: [1.00, 1.50, 512] # summary: 631 layers, 56966176 parameters, 56966160 gradients, 196.0 GFLOPs

# YOLO11n backbone
backbone:
  # [from, repeats, module, args]
  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
  - [-1, 2, C3k2, [256, False, 0.25]]
  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
  - [-1, 2, C3k2, [512, False, 0.25]]
  - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
  - [-1, 2, C3k2, [512, True]]
  - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
  - [-1, 2, C3k2, [1024, True]]
  - [-1, 1, SPPF, [1024, 5]] # 9
  - [-1, 2, C2PSA, [1024]] # 10

# YOLO11n head
head:
  - [-1, 1, MyChannelAttention_HSFPN, [4]] # 10
  - [-1, 1, nn.Conv2d, [256, 1]] # 11
  - [-1, 1, nn.ConvTranspose2d, [256, 3, 2, 1, 1]] # 12

  - [6, 1, MyChannelAttention_HSFPN, [4]] # 13
  - [-1, 1, nn.Conv2d, [256, 1]] # 14
  - [13, 1, MyChannelAttention_HSFPN, [4, False]] # 15
  - [[-1, -2], 1, Multiply, []] # 16
  - [[-1, 13], 1, Add, []] # 17
  - [-1, 2, C3k2, [256]] # 18 P4/16

  - [13, 1, nn.ConvTranspose2d, [256, 3, 2, 1, 1]] # 19
  - [4, 1, MyChannelAttention_HSFPN, [4]] # 20
  - [-1, 1, nn.Conv2d, [256, 1]] # 21
  - [20, 1, MyChannelAttention_HSFPN, [4, False]] # 22
  - [[-1, -2], 1, Multiply, []] # 23
  - [[-1, 20], 1, Add, []] # 24
  - [-1, 2, C3k2, [256]] # 25 P3/16

  - [[26, 19, 12], 1, Detect, [nc]] # Detect(P3, P4, P5)