YOLOv8添加Dynamic Head

最新推荐文章于 2025-04-03 22:01:05 发布

奇迹甜甜圈

最新推荐文章于 2025-04-03 22:01:05 发布

阅读量2.8k

点赞数 9

文章标签： YOLO python 计算机视觉目标检测

本文链接：https://blog.csdn.net/weixin_43845798/article/details/139727568

版权

文章目录

一、添加DyHead模块代码
二、构建模型结构步骤导入DyDetect

一、添加DyHead模块代码

在modules.py中添加相关模块代码。其中DyDetect类继承Detect类，作用为添加dyhead模块，DyHeadBlock模块为官方代码。

class DyDetect(Detect):
    def __init__(self, nc=10, ch=()):  # detection layer
        super(DyDetect, self).__init__(nc, ch)
        self.dyhead = nn.Sequential(*[DyHeadBlock(ch[0]) for _ in range(2)])
        self.cv2 = nn.ModuleList(nn.Sequential(nn.Conv2d(x, 4 * self.reg_max, 1)) for x in ch)
        self.cv3 = nn.ModuleList(nn.Sequential(nn.Conv2d(x, self.nc, 1)) for x in ch)

    def forward(self, x):
        shape = x[0].shape  # BCHW
        for layer in self.dyhead:
            x = layer(x)
        for i in range(self.nl):
            x[i] = torch.cat((self.cv2[i](x[i]), self.cv3[i](x[i])), 1)
        if self.training:
            return x
        elif self.dynamic or self.shape != shape:
            self.anchors, self.strides = (x.transpose(0, 1) for x in make_anchors(x, self.stride, 0.5))
            self.shape = shape
        box, cls = torch.cat([xi.view(shape[0], self.no, -1) for xi in x], 2).split((self.reg_max * 4, self.nc), 1)
        dbox = dist2bbox(self.dfl(box), self.anchors.unsqueeze(0), xywh=True, dim=1) * self.strides
        y = torch.cat((dbox, cls.sigmoid()), 1)
        return y if self.export else (y, x)

DyHeadBlock模块代码如下：

class DyHeadBlock(nn.Module):
    """DyHead Block with three types of attention.
    HSigmoid arguments in default act_cfg follow official code, not paper.
    https://github.com/microsoft/DynamicHead/blob/master/dyhead/dyrelu.py
    """

    def __init__(self,
                 in_channels,
                 norm_type='GN',
                 zero_init_offset=True,
                 act_cfg=dict(type='HSigmoid', bias=3.0, divisor=6.0)):
        super().__init__()
        self.zero_init_offset = zero_init_offset
        # (offset_x, offset_y, mask) * kernel_size_y * kernel_size_x
        self.offset_and_mask_dim = 3 * 3 * 3
        self.offset_dim = 2 * 3 * 3

        if norm_type == 'GN':
            norm_dict = dict(type='GN', num_groups=16, requires_grad=True)
        elif norm_type == 'BN':
            norm_dict = dict(type='BN', requires_grad=True)

        self.spatial_conv_high = DyDCNv2(in_channels, in_channels, norm_cfg=norm_dict)

        # self.spatial_conv_mid = DyDCNv2(in_channels, in_channels)

        # self.spatial_conv_mid = CBAM(in_channels)

        self.spatial_conv_mid = Conv(in_channels, in_channels, 3, 1)

        self.spatial_conv_low = DyDCNv2(in_channels, in_channels, stride=2)

        self.spatial_conv_offset = nn.Conv2d(
            in_channels, self.offset_and_mask_dim, 3, padding=1)
        self.scale_attn_module = nn.Sequential(
            nn.AdaptiveAvgPool2d(1), nn.Conv2d(in_channels, 1, 1),
            nn.ReLU(inplace=True), build_activation_layer(act_cfg))
        self.task_attn_module = DyReLU(in_channels)
        self._init_weights()

    def _init_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                normal_init(m, 0, 0.01)
        if self.zero_init_offset:
            constant_init(self.spatial_conv_offset, 0)

    def forward(self, x):
        """Forward function."""
        outs = []
        for level in range(len(x)):
            # print(f'level: {level}')
            # calculate offset and mask of DCNv2 from middle-level feature
            offset_and_mask = self.spatial_conv_offset(x[level])
            offset = offset_and_mask[:, :self.offset_dim, :, :]
            mask = offset_and_mask[:, self.offset_dim:, :, :].sigmoid()

            # mid_feat = self.spatial_conv_mid(x[level], offset, mask)

            mid_feat = self.spatial_conv_mid(x[level])

            sum_feat = mid_feat * self.scale_attn_module(mid_feat)
            summed_levels = 1
            if level > 0:
                # print(f'if level > 0')
                low_feat = self.spatial_conv_low(x[level - 1], offset, mask)
                sum_feat += low_feat * self.scale_attn_module(low_feat)
                summed_levels += 1
            if level < len(x) - 1:
                # print(f'level < len(x) - 1')
                # this upsample order is weird, but faster than natural order
                # https://github.com/microsoft/DynamicHead/issues/25
                high_feat = F.interpolate(
                    self.spatial_conv_high(x[level + 1], offset, mask),
                    size=x[level].shape[-2:],
                    mode='bilinear',
                    align_corners=True)
                sum_feat += high_feat * self.scale_attn_module(high_feat)
                summed_levels += 1
            outs.append(self.task_attn_module(sum_feat / summed_levels))

        return outs

二、构建模型结构步骤导入DyDetect

代码如下：

        ...
        elif m in {Detect, DyDetect}:
            args.append([ch[x] for x in f])
        ...

在ultralytics/yolo/utils/torch_utils.py中添加检测头

def guess_task_from_head(head):
    task = None
    if head.lower() in ["classify", "classifier", "cls", "fc"]:
        task = "classify"
    if head.lower() in ["detect", "dydetect"]:
        task = "detect"
    if head.lower() in ["segment"]:
        task = "segment"

    if not task:
        raise SyntaxError("task or model not recognized! Please refer the docs at : ")  # TODO: add docs links

    return task

最后在配置文件中添加检测头，以单个检测头为例：

# Ultralytics YOLO 🚀, GPL-3.0 license

# Parameters
nc: 10  # number of classes
depth_multiple: 0.33  # scales module repeats
width_multiple: 1.00  # scales convolution channels

# YOLOv8.0n backbone
backbone:
  # [from, repeats, module, args]
  - [-1, 1, Conv, [16, 3, 2]]  # 0-P1/2
  - [-1, 1, Conv, [32, 3, 2]]  # 1-P2/4
  - [-1, 3, C2f, [32, True]]
  - [-1, 1, Conv, [64, 3, 2]]  # 3-P3/8
  - [-1, 6, C2f, [64, True]]
  - [-1, 1, Conv, [128, 3, 2]]  # 5-P4/16
  - [-1, 6, C2f, [128, True]]
  - [-1, 1, Conv, [256, 3, 2]]  # 7-P5/32
  - [-1, 3, C2f, [256, True]]
  - [-1, 1, SPPF, [256, 5]]  # 9

# YOLOv8.0n head
head:
  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
  - [[-1, 6], 1, Concat, [1]]  # cat backbone P4
  - [-1, 3, C2f, [128]]  # 12

  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
  - [[-1, 4], 1, Concat, [1]]  # cat backbone P3
  - [-1, 3, C2f, [64]]  # 15 (P3/8-small)

  - [[15], 1, DyDetect, [nc]]  # Detect(P3, P4, P5)