Coordinate Attention注意力机制注释讲解
原文链接:https://arxiv.org/pdf/2103.02907.pdf
源码链接:https://github.com/Andrew-Qibin/CoordAttention
该注意力机制用于轻量化模型,可以加入YOLOv5s尝试。
class CoordAtt(nn.Module):
def __init__(self, inp, oup, reduction=32):
super(CoordAtt, self).__init__()
self.pool_h = nn.AdaptiveAvgPool2d((None, 1))#沿着高度方向进行pooling
self.pool_w = nn.AdaptiveAvgPool2d((1, None))#沿着宽度方向进行pooling
mip = max(8, inp // reduction)#缩放系数,控制通道数,最小为8,可以结合论文实验部分来看
self.conv1 = nn.Conv2d(inp, mip, kernel_size=1, stride=1, padding=0)
self.bn1 = nn.BatchNorm2d(mip)
self.act = h_swish()
self.conv_h = nn.Conv2d(mip, oup, kernel_size=1, stride=1, padding=0)#通过1*1卷积得到全局信息
self.conv_w = nn.Conv2d(mip, oup, kernel_size=1, stride=1, padding=0)
def forward(self, x):
identity = x
n, c, h, w = x.size()
x_h = self.pool_h(x)#对图像的高进行pooling操作,[1,3,52,1]
x_w = self.pool_w(x).permute(0, 1, 3, 2)#对图像的宽进行pooling操作,通过permute对其进行调整.例子:不调整[1,3,1,52],调整后[1,3,52,1],可以与 x_h = self.pool_h(x)对应
y = torch.cat([x_h, x_w], dim=2)#concat的连接操作
y = self.conv1(y)
y = self.bn1(y)
y = self.act(y)
x_h, x_w = torch.split(y, [h, w], dim=2)#重新分开,[1,8,52,1]
x_w = x_w.permute(0, 1, 3, 2)#接着上述例子,再次调整得到[1,8,1,52]
a_h = self.conv_h(x_h).sigmoid()
a_w = self.conv_w(x_w).sigmoid()
out = identity * a_w * a_h#通过X与Y两个方向的权重相乘给原图添加权重信息
return out#返回结果