透过代码看其如何实现双注意力计算
self.c_m = c_m
self.c_n = c_n
self.in_channels = in_channels
self.reconstruct = reconstruct
self.convA = nn.Conv2d(in_channels, c_m, kernel_size = 1)
self.convB = nn.Conv2d(in_channels, c_n, kernel_size = 1)
self.convV = nn.Conv2d(in_channels, c_n, kernel_size = 1)
//input用1x1的卷积变成A,B,V(类似self-attention的Q,K,V)
batch_size, c, h, w = x.size()
assert c == self.in_channels, 'input channel not equal!'
A = self.convA(x) # (B, c_m, h, w) because kernel size is 1
tmpA = A.view(batch_size, self.c_m, h * w)
B = self.c