18_CBAM Net:
图:
网络描述:
CBAM表示卷积模块的注意力机制模块。是一种结合了空间(spatial)和通道(channel)的注意力机制模块,相比于senet只关注通道(channel)的注意力机制可以取得更好的效果。 作者提出了一个简单但有效的注意力模块 CBAM,给定一个中间特征图,我们沿着空间和通道两个维度依次推断出注意力权重,然后与原特征图相乘来对特征进行自适应调整。 由于 CBAM 是一个轻量级的通用模块,它可以无缝地集成到任何 CNN 架构中,额外开销忽略不计,并且可以与基本 CNN 一起进行端到端的训练。 在不同的分类和检测数据集上,将 CBAM 集成到不同的模型中后,模型的表现都有了一致的提升,展示了其广泛的可应用性。
特点,优点:
(1) 引入CBAM能提高目标检测和物体分类的精度,可以在神经网络中引入这一机制,而且花费的计算开销和参数大小都比较少
(2) 通道注意力和空间注意力这两个模块可以以并行或者顺序的方式组合在一起,但是作者发现顺序组合并且将通道注意力放在前面可以取得更好的效果。
(3)通过广泛的消融研究来验证注意力模块的有效性
(4)通过插入轻量级模块,验证了在多个基准上(ImageNet-1K、MS COCO和VOC 2007),各种网络的性能得到了极大的提升。
代码:
keras实现:
# 继承Layer,建立resnet50 101 152卷积层模块
def conv_block(inputs, filter_num, reduction_ratio, stride=1, name=None):
x = inputs
x = Conv2D(filter_num[0], (1,1), strides=stride, padding='same', name=name+'_conv1')(x)
x = BatchNormalization(axis=3, name=name+'_bn1')(x)
x = Activation('relu', name=name+'_relu1')(x)
x = Conv2D(filter_num[1], (3,3), strides=1, padding='same', name=name+'_conv2')(x)
x = BatchNormalization(axis=3, name=name+'_bn2')(x)
x = Activation('relu', name=name+'_relu2')(x)
x = Conv2D(filter_num[2], (1,1), strides=1, padding='same', name=name+'_conv3')(x)
x = BatchNormalization(axis=3, name=name+'_bn3')(x)
# Channel Attention
avgpool = GlobalAveragePooling2D(name=name+'_channel_avgpool')(x)
maxpool = GlobalMaxPool2D(name=name+'_channel_maxpool')(x)
# Shared MLP
Dense_layer1 = Dense(filter_num[2]//reduction_ratio, activation='relu', name=name+'_channel_fc1')
Dense_layer2 = Dense(filter_num[2], activation='relu', name=name+'_channel_fc2')
avg_out = Dense_layer2(Dense_layer1(avgpool))
max_out = Dense_layer2(Dense_layer1(maxpool))
channel = layers.add([avg_out, max_out])
channel = Activation('sigmoid', name=name+'_channel_sigmoid')(channel)
channel = Reshape((1,1,filter_num[2]), name=name+'_channel_reshape')(channel)
channel_out = tf.multiply(x, channel)
# Spatial Attention
avgpool = tf.reduce_mean(channel_out, axis=3, keepdims=True, name=name+'_spatial_avgpool')
maxpool = tf.reduce_max(channel_out, axis=3, keepdims=True, name=name+'_spatial_maxpool')
spatial = Concatenate(axis=3)([avgpool, maxpool])
spatial = Conv2D(1, (7,7), strides=1, padding='same',name=name+'_spatial_conv2d')(spatial)
spatial_out = Activation('sigmoid', name=name+'_spatial_sigmoid')(spatial)
CBAM_out = tf.multiply(channel_out, spatial_out)
# residual connection
r = Conv2D(filter_num[2], (1,1), strides=stride, padding='same', name=name+'_residual')(inputs)
x = layers.add([CBAM_out, r])
x = Activation('relu', name=name+'_relu3')(x)
return x
def build_block (x, filter_num, blocks, reduction_ratio=16, stride=1, name=None):
x = conv_block(x, filter_num, reduction_ratio, stride, name=name)
for i in range(1, blocks):
x = conv_block(x, filter_num, reduction_ratio, stride=1, name=name+'_block'+str(i))
return x
# 创建resnet50 101 152
def SE_ResNet(Netname, nb_classes):
ResNet_Config = {'ResNet50':[3,4,6,3],
'ResNet101':[3,4,23,3],
'ResNet152':[3,8,36,3]}
layers_dims=ResNet_Config[Netname]
filter_block1=[64, 64, 256]
filter_block2=[128,128,512]
filter_block3=[256,256,1024]
filter_block4=[512,512,2048]
# Reduction ratio in four blocks
SE_reduction=[16,16,16,16]
img_input = Input(shape=(224,224,3))
# stem block
x = Conv2D(64, (7,7), strides=(2,2),padding='same', name='stem_conv')(img_input)
x = BatchNormalization(axis=3, name='stem_bn')(x)
x = Activation('relu', name='stem_relu')(x)
x = MaxPooling2D((3,3), strides=(2,2), padding='same', name='stem_pool')(x)
# convolution block
x = build_block(x, filter_block1, layers_dims[0], SE_reduction[0], name='conv1')
x = build_block(x, filter_block2, layers_dims[1], SE_reduction[1], stride=2, name='conv2')
x = build_block(x, filter_block3, layers_dims[2], SE_reduction[2], stride=2, name='conv3')
x = build_block(x, filter_block4, layers_dims[3], SE_reduction[3], stride=2, name='conv4')
# top layer
x = GlobalAveragePooling2D(name='top_layer_pool')(x)
x = Dense(nb_classes, activation='softmax', name='fc')(x)
model = models.Model(img_input, x, name=Netname)
return model
if __name__=='__main__':
model = SE_ResNet('ResNet50', 1000)
model.summary()
pytorch实现:
#通道注意力机制
class ChannelAttention(nn.Module):
def __init__(self, in_planes, ratio=16):
super(ChannelAttention, self).__init__()
self.avg_pool = nn.AdaptiveAvgPool2d(1)
self.max_pool = nn.AdaptiveMaxPool2d(1)
self.fc1 = nn.Conv2d(in_planes, in_planes // 16, 1, bias=False)
self.relu1 = nn.ReLU()
self.fc2 = nn.Conv2d(in_planes // 16, in_planes, 1, bias=False)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
avg_out = self.fc2(self.relu1(self.fc1(self.avg_pool(x))))
max_out = self.fc2(self.relu1(self.fc1(self.max_pool(x))))
out = avg_out + max_out
return self.sigmoid(out)
#空间注意力机制
class SpatialAttention(nn.Module):
def __init__(self, kernel_size=7):
super(SpatialAttention, self).__init__()
assert kernel_size in (3, 7), 'kernel size must be 3 or 7'
padding = 3 if kernel_size == 7 else 1
self.conv1 = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
x = torch.cat([avg_out, max_out], dim=1)
x = self.conv1(x)
return self.sigmoid(x)
#在ResNet网络中添加注意力机制,因为不能改变ResNet的网络结构,所以CBAM不能加在block里面,因为加进去网络结构发生了变化,所以不能用预训练参数。加在最后一层卷积和第一层卷积不改变网络,可以用预训练参数
class ResNet(nn.Module):
def __init__(self, block, layers, num_classes=1000, zero_init_residual=False,
groups=1, width_per_group=64, replace_stride_with_dilation=None,
norm_layer=None):
super(ResNet, self).__init__()
if norm_layer is None:
norm_layer = nn.BatchNorm2d
self._norm_layer = norm_layer
self.inplanes = 64
self.dilation = 1
if replace_stride_with_dilation is None:
# each element in the tuple indicates if we should replace
# the 2x2 stride with a dilated convolution instead
replace_stride_with_dilation = [False, False, False]
if len(replace_stride_with_dilation) != 3:
raise ValueError("replace_stride_with_dilation should be None "
"or a 3-element tuple, got {}".format(replace_stride_with_dilation))
self.groups = groups
self.base_width = width_per_group
self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3,
bias=False)
self.bn1 = norm_layer(self.inplanes)
self.relu = nn.ReLU(inplace=True)
# 网络的第一层加入注意力机制
self.ca = ChannelAttention(self.inplanes)
self.sa = SpatialAttention()
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(block, 128, layers[1], stride=2,
dilate=replace_stride_with_dilation[0])
self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
dilate=replace_stride_with_dilation[1])
self.layer4 = self._make_layer(block, 512, layers[3], stride=2,
dilate=replace_stride_with_dilation[2])
# 网络的卷积层的最后一层加入注意力机制
self.ca1 = ChannelAttention(self.inplanes)
self.sa1 = SpatialAttention()
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.fc = nn.Linear(512 * block.expansion, num_classes)
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
# Zero-initialize the last BN in each residual branch,
# so that the residual branch starts with zeros, and each residual block behaves like an identity.
# This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
if zero_init_residual:
for m in self.modules():
if isinstance(m, Bottleneck):
nn.init.constant_(m.bn3.weight, 0)
elif isinstance(m, BasicBlock):
nn.init.constant_(m.bn2.weight, 0)
def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
norm_layer = self._norm_layer
downsample = None
previous_dilation = self.dilation
if dilate:
self.dilation *= stride
stride = 1
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
conv1x1(self.inplanes, planes * block.expansion, stride),
norm_layer(planes * block.expansion),
)
layers = []
layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
self.base_width, previous_dilation, norm_layer))
self.inplanes = planes * block.expansion
for _ in range(1, blocks):
layers.append(block(self.inplanes, planes, groups=self.groups,
base_width=self.base_width, dilation=self.dilation,
norm_layer=norm_layer))
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.ca(x) * x
x = self.sa(x) * x
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.ca1(x) * x
x = self.sa1(x) * x
x = self.avgpool(x)
x = x.reshape(x.size(0), -1)
x = self.fc(x)
return x
# 网络的第一层加入注意力机制
self.ca = ChannelAttention(self.inplanes)
self.sa = SpatialAttention()
# 网络的卷积层的最后一层加入注意力机制
self.ca1 = ChannelAttention(self.inplanes)
self.sa1 = SpatialAttention()