模块出处
[link] [code] [IJCAI 22] Boundary-Guided Camouflaged Object Detection
模块名称
Context Aggregation Module (CAM)
模块作用
增大感受野,全局特征提取
模块结构
模块代码
import torch
import torch.nn as nn
import torch.nn.functional as F
class ConvBNR(nn.Module):
def __init__(self, inplanes, planes, kernel_size=3, stride=1, dilation=1, bias=False):
super(ConvBNR, self).__init__()
self.block = nn.Sequential(
nn.Conv2d(inplanes, planes, kernel_size, stride=stride, padding=dilation, dilation=dilation, bias=bias),
nn.BatchNorm2d(planes),
nn.ReLU(inplace=True)
)
def forward(self, x):
return self.block(x)
class Conv1x1(nn.Module):
def __init__(self, inplanes, planes):
super(Conv1x1, self).__init__()
self.conv = nn.Conv2d(inplanes, planes, 1)
self.bn = nn.BatchNorm2d(planes)
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
x = self.relu(x)
return x
class CAM(nn.Module):
def __init__(self, hchannel, channel):
super(CAM, self).__init__()
self.conv1_1 = Conv1x1(hchannel + channel, channel)
self.conv3_1 = ConvBNR(channel // 4, channel // 4, 3)
self.dconv5_1 = ConvBNR(channel // 4, channel // 4, 3, dilation=2)
self.dconv7_1 = ConvBNR(channel // 4, channel // 4, 3, dilation=3)
self.dconv9_1 = ConvBNR(channel // 4, channel // 4, 3, dilation=4)
self.conv1_2 = Conv1x1(channel, channel)
self.conv3_3 = ConvBNR(channel, channel, 3)
def forward(self, lf, hf):
if lf.size()[2:] != hf.size()[2:]:
hf = F.interpolate(hf, size=lf.size()[2:], mode='bilinear', align_corners=False)
x = torch.cat((lf, hf), dim=1)
x = self.conv1_1(x)
xc = torch.chunk(x, 4, dim=1)
x0 = self.conv3_1(xc[0] + xc[1])
x1 = self.dconv5_1(xc[1] + x0 + xc[2])
x2 = self.dconv7_1(xc[2] + x1 + xc[3])
x3 = self.dconv9_1(xc[3] + x2)
xx = self.conv1_2(torch.cat((x0, x1, x2, x3), dim=1))
x = self.conv3_3(x + xx)
return x
if __name__ == '__main__':
x1 = torch.randn([3, 256, 16, 16])
x2 = torch.randn([3, 512, 8, 8])
cam = CAM(hchannel=512, channel=256)
out = cam(x1, x2)
print(out.shape) # 3, 256, 16, 16
原文表述
为了将多层次的融合特征整合到伪装物体预测中,我们设计了一个上下文聚合模块(CAM)来挖掘上下文语义,以增强物体检测,如图5所示。不同于BBSNet中的全局上下文模块不考虑各分支之间的语义关联,CAM考虑到跨尺度交互作用以增强特征表示。