def attention(queries, keys, keys_length):
'''
queries: shape: [B, H], 即i_emb
keys: shape: [B, T, H], 即h_emb
keys_length: shape: [B], 即self.sl
B:batch size; T: 用户序列的长度;H:embedding size
'''
queries_hidden_units = queries.get_shape().as_list()[-1]
# shape: [H]
queries = tf.tile(queries, [1, tf.shape(keys)[1]])
# [B,H] -> T*[B,H]
queries = tf.reshape(queries, [-1, tf.shape(keys)[1], queries_hidden_units])
# T*[B,H] ->[B, T, H]
din_all = tf.concat([queries, keys, queries-keys, queries*keys], axis=-1)
# attention操作,输出维度为[B, T, 4*H]
d_layer_1_all = tf.layers.dense(din_all, 80, activation=tf.nn.sigmoid, \
name='f1_att', reuse=tf.AUTO_REUSE) # [B, T, 80]
d_layer_2_all = tf.layers.dense(d_layer_1_all, 40, activation=tf.nn.sigmoid, \
name='f2_att', reuse=tf.AUTO_REUSE) # [B, T, 40]
d_layer_3_all = tf.layers.dense(d_layer_2_all, 1, activation=None, \
name='f3_att', reuse=tf.AUTO_REUSE) # [B, T, 1]
d_layer_3_all = tf.reshape(d_layer_3_all, [-1, 1, tf.shape(keys)[1]]) #[B, 1, T]
outputs = d_layer_3_all # attention的输出, [B, 1, T]
# Mask
key_masks = tf.sequence_mask(keys_length, tf.shape(keys)[1]) # [B, T]
key_masks = tf.expand_dims(key_masks, 1) # [B, 1, T]
paddings = tf.ones_like(outputs) * (-2 ** 32 + 1)
# padding的mask后补一个很小的负数,这样softmax之后就会接近0.
outputs = tf.where(key_masks, outputs, paddings)
# [B, 1, T] padding操作,将每个样本序列中空缺的商品都赋值为(-2 ** 32 + 1)
# Scale
outputs = outputs / (keys.get_shape().as_list()[-1] ** 0.5)
# Activation
outputs = tf.nn.softmax(outputs)
# [B, 1, T] #这里的output是attention计算出来的权重,即论文公式(3)里的w,
# Weighted sum
outputs = tf.matmul(outputs, keys)
# [B, 1, H]
return outputs
推荐模型DIN中的Attention实现
最新推荐文章于 2024-06-03 17:01:24 发布