Camouflaged Object Derection SINet网络模型

前言

Camouflaged Object Derection主要对一些融入背景的目标进行检测,且取得了不错的效果,并且提出了一个新的数据集COD10K,具体情况感兴趣的可以去搜索原文进行阅读,我在这里只记录一下网络模型的复现。

整体模型

整体模型论文给出了清楚的图片介绍,如下
在这里插入图片描述
总体分为search model(SM) 和 identification(IM)部分,SM负责搜索目标,后者负责检测。
现在对模型逐块分析,首先是左上角的backbone

- backbone

在这里插入图片描述

backbone使用了ResNet-50,可参考ResNet-50网络结构
代码实现

import torch.nn as nn



# bottleneck多次用到,所以封装成类调用
class Bottleneck(nn.Module):
    def __init__(self, inchannels, outchannels, stride=1, identity=None):
        """
        :param inchannels: 输入维度
        :param outchannels: 输出维度
        :param stride: 第二个卷积层的步长
        :param identity: 输入到输出的处理,维度相同直接相加,维度不同要1x1卷积
        """
        super(Bottleneck, self).__init__()
        # 第一个卷积和第二个卷积的channel
        self.mid_channels = outchannels // 4;
        # 定义三个卷积层
        self.conv1 = nn.Conv2d(inchannels, self.mid_channels, kernel_size=1, stride=1)
        self.bn1 = nn.BatchNorm2d(self.mid_channels)

        self.conv2 = nn.Conv2d(self.mid_channels, self.mid_channels, kernel_size=3, stride=stride, padding=1)
        self.bn2 = nn.BatchNorm2d(self.mid_channels)

        self.conv3 = nn.Conv2d(self.mid_channels, outchannels, kernel_size=1, stride=1)
        self.bn3 = nn.BatchNorm2d(outchannels)

        # inplace=True 新的值直接代替旧的值 节省内存
        self.relu = nn.ReLU(inplace=True)

        self.stride = stride
        if identity is not None:
            self.identity = identity
        else:
            # 卷积然后BN,RELU,stride和3x3卷积的一致,保持shape一致
            self.identity = nn.Sequential(
                nn.Conv2d(inchannels,outchannels,kernel_size=1,stride=stride,bias=False),
                nn.BatchNorm2d(outchannels),
                nn.ReLU(inplace=True)
            )


    def forward(self,x):
        # 走identity线
        resduial=x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)
        out = self.relu(out)

        if self.identity is not None:
            resduial = self.identity(resduial)

        out += resduial
        out = self.bn3(out)
        out = self.relu(out)

        return out




class resnet_50(nn.Module):
    def __init__(self):
        super(resnet_50, self).__init__()
        # 一般后接BN层不需要偏置
        self.layer_0_conv1 = nn.Conv2d(3,64,kernel_size=7,stride=2,padding=3,bias=False)
        self.layer_0_bn1 = nn.BatchNorm2d(64)
        self.layer_0_pool = nn.MaxPool2d(kernel_size=3,stride=2,padding=1)

        self.layer_1 = self.makelayer(3,64,256,1)
        # ResNet layer2的3x3卷积原本的stride的步长是2 SINet为1
        self.layer_2 = self.makelayer(4,256,512,2)
        self.layer_3 = self.makelayer(6,512,1024,2)
        self.layer_4 = self.makelayer(3,1024,2048,2)
        self.a_pool = nn.AvgPool2d(3,stride=1,padding=1)
        self.fc = nn.Linear(7*7*2048,1000)



    def forward(self,x):

        out = self.layer_0_conv1(x)
        out = self.layer_0_bn1(out)
        out = self.layer_0_pool(out)
        x0 = out
        out = self.layer_1(out)
        x1 = out
        out = self.layer_2(out)
        x2 = out
        out = self.layer_3(out)
        x3 = out
        out = self.layer_4(out)
        x4 = out

        return x0,x1,x2,x3,x4







    # 生成layer
    def makelayer(self,number,inchannels, last_channels, stride=1):
        """
        :param number: 由几个bottleneck构成
        :param last_channels: bottleneck的最后一个卷积层输出的channel
        :param stride: bottleneck 3x3卷积的步长
        :param is_layer1: 是否为第一层
        :return:
        """

        layer = []
        # 构建生成每一层layer的方法
        # 每个layer的第一层的stride为2 单独构造
        bottle = Bottleneck(inchannels,last_channels,stride=stride)
        layer.append(bottle)

        for i in range(number-1):
            bottle = Bottleneck(last_channels,last_channels,stride=1,)
            layer.append(bottle)
        # *list 提取list中的每一个元素
        return nn.Sequential(*layer)


RF

在这里插入图片描述
Bconv是basic conv,即conv+BN+RELU,看下代码就知道什么意思了

class Basic_conv(nn.Module):
    def __init__(self, inchannel, outchannel, kenel_size, stride=1, padding=0, dilation=1):
        super(Basic_conv, self).__init__()

        self.conv = nn.Conv2d(inchannel, outchannel, kernel_size=kenel_size,
                              stride=stride, padding=padding, dilation=dilation)
        self.bn = nn.BatchNorm2d(outchannel)
        self.relu = nn.ReLU(True)

    def forward(self, x):
        x = self.conv(x)
        x = self.bn(x)
        x = self.relu(x)

        return x

Dilation 是空洞卷积,对卷积核进行膨胀。

普通卷积
在这里插入图片描述
dilation =1时 的空洞卷积
在这里插入图片描述
padding的计算,dilation 可以看作更大的卷积核进行卷积(偶数行列值为0,卷积核大小=(kenel_size-1)*dilation+1,再以此计算。
代码实现

class RF(nn.Module):
    def __init__(self, inchannel, outchannel):
        super(RF, self).__init__()
        # 用sequential 变成网络的部分结构
        self.branch1 = nn.Sequential(
            Basic_conv(inchannel, outchannel, kenel_size=1),
            Basic_conv(outchannel, outchannel, kenel_size=(1, 3), stride=1, padding=(0, 1)),
            Basic_conv(outchannel, outchannel, kenel_size=(3, 1), stride=1, padding=(1, 0)),
            # dilation 看作更大的卷积核(偶数行列值为0) (kenel_size-1)*dilation+1 为大卷积核size 所以保持shape不变 padding=3
            Basic_conv(outchannel, outchannel, kenel_size=(3, 3), stride=1, padding=3, dilation=3)
        )

        self.branch2 = nn.Sequential(
            Basic_conv(inchannel, outchannel, kenel_size=1),
            Basic_conv(outchannel, outchannel, kenel_size=(1, 5), stride=1, padding=(0, 2)),
            Basic_conv(outchannel, outchannel, kenel_size=(5, 1), stride=1, padding=(2, 0)),
            Basic_conv(outchannel, outchannel, kenel_size=(3, 3), stride=1, padding=5, dilation=5)
        )

        self.branch3 = nn.Sequential(
            Basic_conv(inchannel, outchannel, kenel_size=1),
            Basic_conv(outchannel, outchannel, kenel_size=(1, 7), stride=1, padding=(0, 3)),
            Basic_conv(outchannel, outchannel, kenel_size=(7, 1), stride=1, padding=(3, 0)),
            Basic_conv(outchannel, outchannel, kenel_size=(3, 3), stride=1, padding=7, dilation=7)
        )

        self.branch4 = nn.Sequential(
            Basic_conv(inchannel, outchannel, kenel_size=1)
        )

        self.branch5 = nn.Sequential(
            Basic_conv(inchannel, outchannel, kenel_size=1)
        )

        self.conv = Basic_conv(outchannel * 4, outchannel, kenel_size=1)
        self.relu = nn.ReLU(True)

    def forward(self, x):
        x1 = self.branch1(x)
        x2 = self.branch2(x)
        x3 = self.branch3(x)
        x4 = self.branch4(x)
        x5 = self.branch5(x)
        # 在channel维度concatenate起来
        x_cat = torch.cat((x1, x2, x3, x4), dim=1)

        out = self.conv(x_cat)+x5
        out = self.relu(out)

        return out

PDC

在这里插入图片描述
rf1的第三条输入,图中进行了一次上采样,事实上rf2的大小是rf1的二倍,只进行一次上采样是不能与x2_1concatenate的,所以必须进行两次倍数为2的上采样,作者提供的代码也证实了这一点。
图中我标注了在自己代码中,所使用的变量命名,有助于理解。
PDC一共用到了两次,一次有rf4,一次没有,所以我直接构建了两个类。

# 三个输入的PDC
class PDC_3(nn.Module):
    def __init__(self,channnel):
        super(PDC_3, self).__init__()
        # 采样倍数为2
        self.upsample = nn.Upsample(scale_factor=2,mode='bilinear',align_corners=True)
        # cat前的最左边的4个卷积
        self.conv1 = Basic_conv(channnel,channnel,kenel_size=3,padding=1)
        # rf1 cat后的卷积
        self.cat2_conv = Basic_conv(channnel*2,channnel*2,kenel_size=3,padding=1)
        # 输出前的三个卷积
        self.cat3_conv = Basic_conv(channnel*3,channnel*3,kenel_size=3,padding=1)
        self.cat3_conv2 = nn.Conv2d(channnel*3,1,kernel_size=1)
        self.relu = nn.ReLU(True)

    def forward(self,x1,x2,x3):
        x1_1 = self.conv1(self.upsample(x1))
        x1_2 = x1_1 * x2
        x1_3 = torch.cat((x1_2,x1_1),1)
        x1_4 = self.cat2_conv(x1_3)
        x1_5 = self.cat2_conv(self.upsample(x1_4))

        x2_1 = self.conv1(self.upsample(x2))
        # 对应元素相乘 无所谓顺序
        # 图像中x1的第三个分支upsample了一次 实际上要两次 shape才能一致 才能相乘
        # 我翻看了作者提供的代码 这里确实进行了两次upsample
        x2_2 = x2_1 * self.conv1(self.upsample(self.upsample(x1)))* x3

        x2_3 = torch.cat((x1_5,x2_2),1)
        out = self.cat3_conv(x2_3)
        out = self.cat3_conv(out)
        out = self.cat3_conv2(out)
        

        return out

#4个输入
class PDC_4(nn.Module):
    def __init__(self, channnel):
        super(PDC_4, self).__init__()
        # 采样倍数为2
        self.upsample = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
        # cat前的最左边的4个卷积
        self.conv1 = Basic_conv(channnel, channnel, kenel_size=3, padding=1)
        # rf1 cat后的卷积
        self.cat2_conv = Basic_conv(channnel * 2, channnel * 2, kenel_size=3, padding=1)
        # 输出前的三个卷积
        self.cat3_conv = Basic_conv(channnel * 4, channnel * 4, kenel_size=3, padding=1)
        self.cat3_conv2 = nn.Conv2d(channnel * 4, 1, kernel_size=1)
        self.relu = nn.ReLU(True)

    def forward(self, x1, x2, x3,x4):
        x1_1 = self.conv1(self.upsample(x1))
        x1_2 = x1_1 * x2
        x1_3 = torch.cat((x1_2, x1_1), 1)
        x1_4 = self.cat2_conv(x1_3)
        x1_5 = self.cat2_conv(self.upsample(x1_4))

        x2_1 = self.conv1(self.upsample(x2))
        # 对应元素相乘 无所谓顺序
        # 图像中x1的第三个分支upsample了一次 实际上要两次 shape才能一致 才能相乘
        # 我翻看了作者提供的代码 这里确实进行了两次upsample
        x2_2 = x2_1 * self.conv1(self.upsample(self.upsample(x1))) * x3

        x2_3 = torch.cat((x1_5, x2_2,x4), 1)
        out = self.cat3_conv(x2_3)
        out = self.cat3_conv(out)
        out = self.cat3_conv2(out)

        return out


SA
对于SA部分,我有许多困惑,或许是英语水平不足,在这里贴出来希望能得到解答

- 高斯滤波卷积核大小和标准差

原文中说标准差为32,kernelsize为4在这里插入图片描述
但在作者提供的源码中,作者自定义了一个生成高斯滤波卷积核的函数,最后得出的卷积核 shape为(1,1,31,31)明显与4不一致

# 作者提供的代码
def _get_kernel(kernlen=16, nsig=3):
    interval = (2*nsig+1.)/kernlen
    # 均匀生成kernlen+1个数
    x = np.linspace(-nsig-interval/2., nsig+interval/2., kernlen+1)
    # 正态分布每一段的值
    # st.norm.cdf求正态分布x左侧的积分,diff下一个元素与上一个的差
    kern1d = np.diff(st.norm.cdf(x))
    # kern1d与自身外积开根号
    kernel_raw = np.sqrt(np.outer(kern1d, kern1d))
    # 归一化
    kernel = kernel_raw/kernel_raw.sum()
    return kernel

并且在forward中,padding选择了15,是否可以说明卷积核大小实际上为31呢。

  • 高斯滤波的对象不一致

在原文提供的公式在这里插入图片描述
中,论文中高斯滤波的对象为backbone输出的x2,但在实际代码中,卷积的对象却为经过sigmoid的attention。

# 作者提供的代码
class SA(nn.Module):
    """
        holistic attention src
    """
    def __init__(self):
        super(SA, self).__init__()
        gaussian_kernel = np.float32(_get_kernel(31, 4))
        # 增加两个新维度 现在有4个维度
        gaussian_kernel = gaussian_kernel[np.newaxis, np.newaxis, ...]
        self.gaussian_kernel = Parameter(torch.from_numpy(gaussian_kernel))

    def forward(self, attention, x):
        soft_attention = F.conv2d(attention, self.gaussian_kernel, padding=15)
        soft_attention = min_max_norm(soft_attention)       # normalization
        x = torch.mul(x, soft_attention.max(attention))     # mul 逐个元素相乘
        return x

这些问题目前我还没有得到答案,我也只是初学通过复现模型来练手,如果以后有了想法,我会再来修改。

  • 代码实现

以下是我的代码实现,高斯卷积核我设置为3,标准差为32。对x2进行高斯滤波,然后归一化,在乘以

class SA(nn.Module):
    def __init__(self):
        super(SA, self).__init__()
        # 利用cv2生成一维卷积核再取乘积
        self.gaussian_kernel = cv2.getGaussianKernel(3, 32) * cv2.getGaussianKernel(3, 32).T
        #self.gaussian_kernel = nn.parameter.Parameter(torch.from_numpy(self.gaussian_kernel))
        self.gaussian_kernel = torch.Tensor(self.gaussian_kernel)
        self.gaussian_kernel = self.gaussian_kernel.unsqueeze(0).unsqueeze(0)

    def forward(self, c, x):
        # 对x2进行滤波 然后利用他提取x2中的特征,突出一些东西
        # sigmoid之后获得了感兴趣区域的概率,把概率大的部分提取出来
        self.gaussian_kernel=self.gaussian_kernel.expand(1,512,3,3)
        x1 = F.conv2d(x, self.gaussian_kernel, padding=1)
        x1 = self.normalize(x1)
        # x.max返回x和x1中大的数
        # 把c中负数映射到0-1之间,降低了权重
        x = x1.max(x) * c
        return x

    def normalize(self, x):
        # 取特征图上的最大值 然后扩张成与输入形状一致
        max = x.max(3)[0].max(2)[0].unsqueeze(2).unsqueeze(3).expand_as(x)
        min = x.min(3)[0].max(2)[0].unsqueeze(2).unsqueeze(3).expand_as(x)
        # 归一化 映射到(0-1)之间
        x = (x - min) / max

        return x

把所有的部分链接起来

class SINet(nn.Module):
    def __init__(self):
        super(SINet, self).__init__()
        self.sa = SA()

        self.rfs1 = RF(2048, 32)
        self.rfs2 = RF(3072, 32)
        self.rfs3 = RF(3584, 32)
        self.rfs4 = RF(320, 32)

        self.rfi1 = RF(2048, 32)
        self.rfi2 = RF(1024, 32)
        self.rfi3 = RF(512, 32)

        self.pdc4 = PDC_4(32)
        self.pdc3 = PDC_3(32)

        self.resnet = resnet_50()
        self.downsample_2 = nn.MaxPool2d(2, stride=2)
        self.upsample_8 = nn.Upsample(scale_factor=8, mode="bilinear", align_corners=True)
        self.upsample_2 = nn.Upsample(scale_factor=2, mode="bilinear", align_corners=True)
        self.upsample_4 = nn.Upsample(scale_factor=4, mode="bilinear", align_corners=True)

    def forward(self, x):
        '*********RESNET + RF************'
        x0,x1,x2,x3,x4 = self.resnet(x)
        x1 = torch.cat((x0, x1), 1)
        rfs_4 = self.rfs4(self.downsample_2(x1))

        x3_up = self.upsample_2(x3)
        x4_up4 = self.upsample_4(x4)
        rfs_3 = self.rfs3(torch.cat((x2, x3_up, x4_up4), dim=1))

        rfs_2 = self.rfs2(torch.cat((x3, self.upsample_2(x4)), dim=1))

        rfs_1 = self.rfs1(x4)

        cs = self.pdc4(rfs_1, rfs_2, rfs_3, rfs_4)
        ccsm= self.upsample_8(cs)

        '************SA************'
        ch = self.sa(cs.sigmoid(),x2)
        x3_1 = self.resnet.layer_3(ch)
        x4_1 = self.resnet.layer_4(x3_1)

        rfi_1 = self.rfi1(x4_1)
        rfi_2 = self.rfi2(x3_1)
        rfi_3 = self.rfi3(ch)
        ci = self.pdc3(rfi_1,rfi_2,rfi_3)

        ccim = self.upsample_8(ci)

        return ccsm,ccim

"Depth Confidence-aware Camouflaged Object Detection" 是一篇关于深度置信感知伪装目标检测的论文。该论文介绍了一种用于检测伪装目标的新方法,该方法结合了深度信息和置信度感知。 伪装目标指的是那些在外观上与周围环境相似,很难被人眼或传统算法准确识别的目标。传统的目标检测方法在处理伪装目标时往往存在困难,因为伪装目标与背景具有相似的颜色、纹理或形状,导致目标难以被区分。 该论文中提出的方法通过结合深度信息和置信度感知来解决伪装目标检测的问题。具体而言,该方法首先利用深度传感器(如RGB-D相机)获取场景的深度信息,将其与RGB图像进行融合。然后,使用深度信息来提取特征,并通过深度感知模块来增强目标的边缘和轮廓特征。 此外,该方法还引入了置信度感知模块,用于评估每个像素点的置信度。置信度可以根据像素点的深度、颜色、纹理等信息计算得出。通过增强高置信度区域的特征表示,可以提高对伪装目标的检测能力。 最后,该方法使用深度置信感知的特征表示进行目标检测和分割。实验结果表明,该方法在多个公开数据集上取得了较好的性能,并且在伪装目标检测任务上相比其他方法具有明显的优势。 综上所述,"Depth Confidence-aware Camouflaged Object Detection" 这篇论文提出了一种利用深度信息和置信度感知来检测伪装目标的新方法。通过结合深度和置信度信息,该方法能够提高对伪装目标的检测准确性和鲁棒性。
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值