YOLO/RtDETR改进创新

辛勤的程序猿

已于 2024-08-11 16:15:51 修改

阅读量188

点赞数 3

文章标签： YOLO 人工智能计算机视觉

于 2024-07-22 17:00:00 首次发布

本文链接：https://blog.csdn.net/llz19670/article/details/140592802

版权

C2f-SSD模块

随着Mamba在序列建模上的效果，Mamba进入到了视觉任务，在视觉任务中主要是采用双向的SSM结构，在YOLO或者是RTDETR中有一个C2F结构，在yolov10中修改成了C2f-CIB结构，这里的创新是将CIB替换了原先的Bottleneck块，同样的我们做的一个工作是使用Mamba2中的SSD模块替换CIB从而将C2fCIB改为了我们的C2f-SSD模块。

即插即用代码：

import torch
import torch.nn as nn
from vmamba import SS2D  #见我mamba即插即用模块

import torch
from torch import nn
def autopad(k, p=None, d=1):  # kernel, padding, dilation
    """Pad to 'same' shape outputs."""
    if d > 1:
        k = d * (k - 1) + 1 if isinstance(k, int) else [d * (x - 1) + 1 for x in k]  # actual kernel-size
    if p is None:
        p = k // 2 if isinstance(k, int) else [x // 2 for x in k]  # auto-pad
    return p
class Conv(nn.Module):
    """Standard convolution with args(ch_in, ch_out, kernel, stride, padding, groups, dilation, activation)."""

    default_act = nn.SiLU()  # default activation

    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True):
        """Initialize Conv layer with given arguments including activation."""
        super().__init__()
        self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p, d), groups=g, dilation=d, bias=False)
        self.bn = nn.BatchNorm2d(c2)
        self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity()

    def forward(self, x):
        """Apply convolution, batch normalization and activation to input tensor."""
        return self.act(self.bn(self.conv(x)))

    def forward_fuse(self, x):
        """Perform transposed convolution of 2D data."""
        return self.act(self.conv(x))
class Bottleneck(nn.Module):
    """Standard bottleneck."""

    def __init__(self, c1, c2, shortcut=True, g=1, k=(3, 3), e=0.5):
        """Initializes a bottleneck module with given input/output channels, shortcut option, group, kernels, and
        expansion.
        """
        super().__init__()
        c_ = int(c2 * e)  # hidden channels
        self.cv1 = Conv(c1, c_, k[0], 1)
        self.cv2 = Conv(c_, c2, k[1], 1, g=g)
        self.add = shortcut and c1 == c2

    def forward(self, x):
        """'forward()' applies the YOLO FPN to input data."""
        return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))

class C2f(nn.Module):
    """Faster Implementation of CSP Bottleneck with 2 convolutions."""

    def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5):
        """Initialize CSP bottleneck layer with two convolutions with arguments ch_in, ch_out, number, shortcut, groups,
        expansion.
        """
        super().__init__()
        self.c = int(c2 * e)  # hidden channels
        self.cv1 = Conv(c1, 2 * self.c, 1, 1)
        self.cv2 = Conv((2 + n) * self.c, c2, 1)  # optional act=FReLU(c2)
        self.m = nn.ModuleList(Bottleneck(self.c, self.c, shortcut, g, k=((3, 3), (3, 3)), e=1.0) for _ in range(n))
        self.ELA=ELA(c2)

    def forward(self, x):
        """Forward pass through C2f layer."""
        y = list(self.cv1(x).chunk(2, 1))
        y.extend(m(y[-1]) for m in self.m)
        y=self.cv2(torch.cat(y, 1))
        y=self.ELA(y)
        return y

    def forward_split(self, x):
        """Forward pass using split() instead of chunk()."""
        y = list(self.cv1(x).split((self.c, self.c), 1))
        y.extend(m(y[-1]) for m in self.m)
        return self.cv2(torch.cat(y, 1))
class ELA(nn.Module):
    def __init__(self, channels) -> None:
        super().__init__()
        self.pool_h = nn.AdaptiveAvgPool2d((None, 1))
        self.pool_w = nn.AdaptiveAvgPool2d((1, None))
        self.conv1x1 = nn.Sequential(
            nn.Conv1d(channels, channels, 1),
            nn.GroupNorm(16, channels),
            nn.Sigmoid()
        )

    def forward(self, x):
        b, c, h, w = x.size()
        x_h = self.conv1x1(self.pool_h(x).reshape((b, c, h))).reshape((b, c, h, 1))
        x_w = self.conv1x1(self.pool_w(x).reshape((b, c, w))).reshape((b, c, 1, w))
        return x * x_h * x_w
class C2fSSD(C2f):

    def __init__(self, c1, c2, n=1, shortcut=False, lk=False, g=1, e=0.5):
        """Initializes the module with specified parameters for channel, shortcut, local key, groups, and expansion."""
        super().__init__(c1, c2, n, shortcut, g, e)
        self.m = nn.ModuleList(SS2D(d_model=self.c) for _ in range(n))
#测试
x=torch.rand(16,32,3,4).cuda()
model=C2fSSD(32,32).cuda()
print(model(x).shape)

辛勤的程序猿

关注

3
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
YOLO/RtDETR改进创新

随着Mamba在序列建模上的效果，Mamba进入到了视觉任务，在视觉任务中主要是采用双向的SSM结构，在YOLO或者是RTDETR中有一个C2F结构，在yolov10中修改成了C2f-CIB结构，这里的创新是将CIB替换了原先的Bottleneck块，同样的我们做的一个工作是使用Mamba2中的SSD模块替换CIB从而将C2fCIB改为了我们的C2f-SSD模块。
复制链接

扫一扫