CBAM网络

18_CBAM Net:

图:

CBAM模块

CAM与SAM模块

网络描述:

CBAM表示卷积模块的注意力机制模块。是一种结合了空间(spatial)和通道(channel)的注意力机制模块,相比于senet只关注通道(channel)的注意力机制可以取得更好的效果。 作者提出了一个简单但有效的注意力模块 CBAM,给定一个中间特征图,我们沿着空间和通道两个维度依次推断出注意力权重,然后与原特征图相乘来对特征进行自适应调整。 由于 CBAM 是一个轻量级的通用模块,它可以无缝地集成到任何 CNN 架构中,额外开销忽略不计,并且可以与基本 CNN 一起进行端到端的训练。 在不同的分类和检测数据集上,将 CBAM 集成到不同的模型中后,模型的表现都有了一致的提升,展示了其广泛的可应用性。

特点,优点:

(1) 引入CBAM能提高目标检测和物体分类的精度,可以在神经网络中引入这一机制,而且花费的计算开销和参数大小都比较少

(2) 通道注意力和空间注意力这两个模块可以以并行或者顺序的方式组合在一起,但是作者发现顺序组合并且将通道注意力放在前面可以取得更好的效果。

(3)通过广泛的消融研究来验证注意力模块的有效性

(4)通过插入轻量级模块,验证了在多个基准上(ImageNet-1K、MS COCO和VOC 2007),各种网络的性能得到了极大的提升。

代码:
keras实现:
# 继承Layer,建立resnet50 101 152卷积层模块
def conv_block(inputs, filter_num, reduction_ratio, stride=1, name=None):
    
    x = inputs
    x = Conv2D(filter_num[0], (1,1), strides=stride, padding='same', name=name+'_conv1')(x)
    x = BatchNormalization(axis=3, name=name+'_bn1')(x)
    x = Activation('relu', name=name+'_relu1')(x)

    x = Conv2D(filter_num[1], (3,3), strides=1, padding='same', name=name+'_conv2')(x)
    x = BatchNormalization(axis=3, name=name+'_bn2')(x)
    x = Activation('relu', name=name+'_relu2')(x)

    x = Conv2D(filter_num[2], (1,1), strides=1, padding='same', name=name+'_conv3')(x)
    x = BatchNormalization(axis=3, name=name+'_bn3')(x)

    # Channel Attention
    avgpool = GlobalAveragePooling2D(name=name+'_channel_avgpool')(x)
    maxpool = GlobalMaxPool2D(name=name+'_channel_maxpool')(x)
    # Shared MLP
    Dense_layer1 = Dense(filter_num[2]//reduction_ratio, activation='relu', name=name+'_channel_fc1')
    Dense_layer2 = Dense(filter_num[2], activation='relu', name=name+'_channel_fc2')
    avg_out = Dense_layer2(Dense_layer1(avgpool))
    max_out = Dense_layer2(Dense_layer1(maxpool))

    channel = layers.add([avg_out, max_out])
    channel = Activation('sigmoid', name=name+'_channel_sigmoid')(channel)
    channel = Reshape((1,1,filter_num[2]), name=name+'_channel_reshape')(channel)
    channel_out = tf.multiply(x, channel)
    
    # Spatial Attention
    avgpool = tf.reduce_mean(channel_out, axis=3, keepdims=True, name=name+'_spatial_avgpool')
    maxpool = tf.reduce_max(channel_out, axis=3, keepdims=True, name=name+'_spatial_maxpool')
    spatial = Concatenate(axis=3)([avgpool, maxpool])

    spatial = Conv2D(1, (7,7), strides=1, padding='same',name=name+'_spatial_conv2d')(spatial)
    spatial_out = Activation('sigmoid', name=name+'_spatial_sigmoid')(spatial)

    CBAM_out = tf.multiply(channel_out, spatial_out)

    # residual connection
    r = Conv2D(filter_num[2], (1,1), strides=stride, padding='same', name=name+'_residual')(inputs)
    x = layers.add([CBAM_out, r])
    x = Activation('relu', name=name+'_relu3')(x)

    return x

def build_block (x, filter_num, blocks, reduction_ratio=16, stride=1, name=None):

    x = conv_block(x, filter_num, reduction_ratio, stride, name=name)

    for i in range(1, blocks):
        x = conv_block(x, filter_num, reduction_ratio, stride=1, name=name+'_block'+str(i))

    return x


# 创建resnet50 101 152
def SE_ResNet(Netname, nb_classes):

    ResNet_Config = {'ResNet50':[3,4,6,3],
                    'ResNet101':[3,4,23,3],
                    'ResNet152':[3,8,36,3]}
    layers_dims=ResNet_Config[Netname]

    filter_block1=[64, 64, 256]
    filter_block2=[128,128,512]
    filter_block3=[256,256,1024]
    filter_block4=[512,512,2048]

    # Reduction ratio in four blocks
    SE_reduction=[16,16,16,16]

    img_input = Input(shape=(224,224,3))
    # stem block 
    x = Conv2D(64, (7,7), strides=(2,2),padding='same', name='stem_conv')(img_input)
    x = BatchNormalization(axis=3, name='stem_bn')(x)
    x = Activation('relu', name='stem_relu')(x)
    x = MaxPooling2D((3,3), strides=(2,2), padding='same', name='stem_pool')(x)
    # convolution block
    x = build_block(x, filter_block1, layers_dims[0], SE_reduction[0], name='conv1')
    x = build_block(x, filter_block2, layers_dims[1], SE_reduction[1], stride=2, name='conv2')
    x = build_block(x, filter_block3, layers_dims[2], SE_reduction[2], stride=2, name='conv3')
    x = build_block(x, filter_block4, layers_dims[3], SE_reduction[3], stride=2, name='conv4')
    # top layer
    x = GlobalAveragePooling2D(name='top_layer_pool')(x)
    x = Dense(nb_classes, activation='softmax', name='fc')(x)

    model = models.Model(img_input, x, name=Netname)

    return model
    

if __name__=='__main__':
    model = SE_ResNet('ResNet50', 1000)
    model.summary()
pytorch实现:
#通道注意力机制
class ChannelAttention(nn.Module):
    def __init__(self, in_planes, ratio=16):
        super(ChannelAttention, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.max_pool = nn.AdaptiveMaxPool2d(1)

        self.fc1   = nn.Conv2d(in_planes, in_planes // 16, 1, bias=False)
        self.relu1 = nn.ReLU()
        self.fc2   = nn.Conv2d(in_planes // 16, in_planes, 1, bias=False)

        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        avg_out = self.fc2(self.relu1(self.fc1(self.avg_pool(x))))
        max_out = self.fc2(self.relu1(self.fc1(self.max_pool(x))))
        out = avg_out + max_out
        return self.sigmoid(out)
        
#空间注意力机制
class SpatialAttention(nn.Module):
    def __init__(self, kernel_size=7):
        super(SpatialAttention, self).__init__()

        assert kernel_size in (3, 7), 'kernel size must be 3 or 7'
        padding = 3 if kernel_size == 7 else 1

        self.conv1 = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        x = torch.cat([avg_out, max_out], dim=1)
        x = self.conv1(x)
        return self.sigmoid(x)

#在ResNet网络中添加注意力机制,因为不能改变ResNet的网络结构,所以CBAM不能加在block里面,因为加进去网络结构发生了变化,所以不能用预训练参数。加在最后一层卷积和第一层卷积不改变网络,可以用预训练参数
class ResNet(nn.Module):

    def __init__(self, block, layers, num_classes=1000, zero_init_residual=False,
                 groups=1, width_per_group=64, replace_stride_with_dilation=None,
                 norm_layer=None):
        super(ResNet, self).__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        self._norm_layer = norm_layer

        self.inplanes = 64
        self.dilation = 1
        if replace_stride_with_dilation is None:
            # each element in the tuple indicates if we should replace
            # the 2x2 stride with a dilated convolution instead
            replace_stride_with_dilation = [False, False, False]
        if len(replace_stride_with_dilation) != 3:
            raise ValueError("replace_stride_with_dilation should be None "
                             "or a 3-element tuple, got {}".format(replace_stride_with_dilation))
        self.groups = groups
        self.base_width = width_per_group
        self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3,
                               bias=False)
        self.bn1 = norm_layer(self.inplanes)
        self.relu = nn.ReLU(inplace=True)

        # 网络的第一层加入注意力机制
        self.ca = ChannelAttention(self.inplanes)
        self.sa = SpatialAttention()

        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2,
                                       dilate=replace_stride_with_dilation[0])
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
                                       dilate=replace_stride_with_dilation[1])
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2,
                                       dilate=replace_stride_with_dilation[2])
        # 网络的卷积层的最后一层加入注意力机制
        self.ca1 = ChannelAttention(self.inplanes)
        self.sa1 = SpatialAttention()

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

        # Zero-initialize the last BN in each residual branch,
        # so that the residual branch starts with zeros, and each residual block behaves like an identity.
        # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
        if zero_init_residual:
            for m in self.modules():
                if isinstance(m, Bottleneck):
                    nn.init.constant_(m.bn3.weight, 0)
                elif isinstance(m, BasicBlock):
                    nn.init.constant_(m.bn2.weight, 0)

    def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
        norm_layer = self._norm_layer
        downsample = None
        previous_dilation = self.dilation
        if dilate:
            self.dilation *= stride
            stride = 1
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                conv1x1(self.inplanes, planes * block.expansion, stride),
                norm_layer(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
                            self.base_width, previous_dilation, norm_layer))
        self.inplanes = planes * block.expansion
        for _ in range(1, blocks):
            layers.append(block(self.inplanes, planes, groups=self.groups,
                                base_width=self.base_width, dilation=self.dilation,
                                norm_layer=norm_layer))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)

        x = self.ca(x) * x
        x = self.sa(x) * x

        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.ca1(x) * x
        x = self.sa1(x) * x


        x = self.avgpool(x)
        x = x.reshape(x.size(0), -1)
        x = self.fc(x)

        return x
        
 # 网络的第一层加入注意力机制
 self.ca = ChannelAttention(self.inplanes)
 self.sa = SpatialAttention()
 # 网络的卷积层的最后一层加入注意力机制
 self.ca1 = ChannelAttention(self.inplanes)
 self.sa1 = SpatialAttention()
  • 3
    点赞
  • 13
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值