attention作为深度学习中一个非常重要的板块相信大家对它并不陌生。它的作用就是重载程序对对象的权重判断,以此来得到一个更为精确的结果。基础模型如下图:
α函数是能刻画相关性的函数,里面是两个已知的向量,这里我们使用高斯核来计算。
f()代表注意力权重之和。
==>f(57)=α(57,51)*v1+α(57,56)*v2+α(57,58)*v3
而α(q,ki)=softmax(-1/2(a-ki)^2)
下图是attention的模型构建,代码借鉴网络大神的:
import os
from tensorflow.keras import backend as K
from tensorflow.keras.layers import Dense, Lambda, Dot, Activation, Concatenate, Layer
class Attention(object if debug_flag else Layer):
def __init__(self, units=128, **kwargs):
super(Attention, self).__init__(**kwargs)
self.units = units
def build(self, input_shape):
input_dim = int(input_shape[-1])
with K.name_scope(self.name if not debug_flag else 'attention'):
self.attention_score_vec = Dense(input_dim, use_bias=False, name='attention_score_vec')
self.h_t = Lambda(lambda x: x[:, -1, :], output_shape=(input_dim,), name='last_hidden_state')
self.attention_score = Dot(axes=[1, 2], name='attention_score')
self.attention_weight = Activation('softmax', name='attention_weight')
self.context_vector = Dot(axes=[1, 1], name='context_vector')
self.attention_output = Concatenate(name='attention_output')
self.attention_vector = Dense(self.units, use_bias=False, activation='tanh', name='attention_vector')
# noinspection PyUnusedLocal
def call(self, inputs, training=None, **kwargs):
"""
Many-to-one attention mechanism for Keras.
@param inputs: 3D tensor with shape (batch_size, time_steps, input_dim).
@param training: not used in this layer.
@return: 2D tensor with shape (batch_size, units)
@author: felixhao28, philipperemy.
"""
if debug_flag:
self.build(inputs.shape)
# 第一步
score_first_part = self.attention_score_vec(inputs)
# 第二步
h_t = self.h_t(inputs)
# 第三步
score = self.attention_score([h_t, score_first_part])
# 第四步
attention_weights = self.attention_weight(score)
# 第五步
context_vector = self.context_vector([inputs, attention_weights])
# 第六步
pre_activation = self.attention_output([context_vector, h_t])
attention_vector = self.attention_vector(pre_activation)
return attention_vector
接下来便是如何应用了:
import numpy as np
from tensorflow.keras import Input
from tensorflow.keras.layers import Dense, LSTM
from tensorflow.keras.models import load_model, Model
from attention import Attention
def fun():
model_input = Input(shape=(time_steps, input_dim))
x = LSTM(64, return_sequences=True)(model_input)
x = Attention(units=32)(x) # 这里用到attention
x = Dense(1)(x)
model = Model(model_input, x)
model.compile(loss='mae', optimizer='adam')
model.summary()