最近Attention广泛用于图像分割网络中,提升效果很明显。我也紧跟一波浪潮。这是基于Tensorflow的Attention实现。一块是针对区域Attention,一块是针对Channel的Attention。
def PAM_module(inputs):
inputs_shape = inputs.get_shape().as_list()
batchsize, height, width, C = inputs_shape[0], inputs_shape[1], inputs_shape[2], inputs_shape[3]
filter = tf.Variable(tf.truncated_normal([1, 1, C, C//8], dtype=tf.float32, stddev=0.1), name='weights')
filter1 = tf.Variable(tf.truncated_normal([1, 1, C, C], dtype=tf.float32, stddev=0.1), name='weights1')
query_conv = tf.nn.conv2d(inputs, filter, strides=[1, 1, 1, 1], padding='VALID')
key_conv = tf.nn.conv2d(inputs, filter, strides=[1, 1, 1, 1], padding='VALID')
value_conv = tf.nn.conv2d(inputs, filter1, strides=[1, 1, 1, 1], padding='VALID')
proj_query = tf.reshape(query_conv, [batchsize, width*height, -1])
proj_key = tf.transpose((tf.reshape(key_conv, [batchsize, width * height, -1])), perm=[0, 2, 1])
energy = tf.matmul(proj_query, proj_key)
attention = tf.nn.softmax(energy)
proj_value = tf.reshape(value_conv, [batchsize, width * height, -1 ])
out = tf.matmul(attention, proj_value)
out = tf.reshape(out, [batchsize, height, width, C ])
out = out + inputs
return out
def CAM_module(inputs):
inputs_shape = inputs.get_shape().as_list()
batchsize, height, width, C = inputs_shape[0], inputs_shape[1], inputs_shape[2], inputs_shape[3]
proj_query = tf.transpose(tf.reshape(inputs, [batchsize, width*height, -1]), perm=[0, 2, 1])
proj_key = tf.reshape(inputs, [batchsize, width*height, -1])
energy = tf.matmul(proj_query, proj_key)
energy_new = tf.maximum(energy, -1)-energy
attention = tf.nn.softmax(energy_new)
proj_value = tf.transpose(tf.reshape(inputs, [batchsize, width * height, -1 ]), perm=[0, 2, 1])
out = tf.transpose(tf.matmul(attention, proj_value), perm=[0, 2, 1])
out = (tf.reshape(out, [batchsize, height, width, C]))
out = out + inputs
return out