推荐模型DIN中的Attention实现

最新推荐文章于 2024-06-03 17:01:24 发布

outs_802432

最新推荐文章于 2024-06-03 17:01:24 发布

阅读量585

点赞数

分类专栏：深度学习文章标签：深度学习 python

本文链接：https://blog.csdn.net/qq_31159375/article/details/128399960

版权

深度学习专栏收录该内容

3 篇文章

订阅专栏

def attention(queries, keys, keys_length):
  '''
    queries:     shape: [B, H], 即i_emb
    keys:        shape: [B, T, H], 即h_emb
    keys_length: shape: [B], 即self.sl
    B:batch size; T: 用户序列的长度；H：embedding size
  '''
  queries_hidden_units = queries.get_shape().as_list()[-1]                     
  # shape: [H]
  queries = tf.tile(queries, [1, tf.shape(keys)[1]])                            
  # [B,H] -> T*[B,H]
  queries = tf.reshape(queries, [-1, tf.shape(keys)[1], queries_hidden_units])  
  # T*[B,H] ->[B, T, H]
  din_all = tf.concat([queries, keys, queries-keys, queries*keys], axis=-1)     
  # attention操作，输出维度为[B, T, 4*H]
  d_layer_1_all = tf.layers.dense(din_all, 80, activation=tf.nn.sigmoid, \
                              name='f1_att', reuse=tf.AUTO_REUSE) # [B, T, 80]
  d_layer_2_all = tf.layers.dense(d_layer_1_all, 40, activation=tf.nn.sigmoid, \
                              name='f2_att', reuse=tf.AUTO_REUSE) # [B, T, 40]
  d_layer_3_all = tf.layers.dense(d_layer_2_all, 1, activation=None, \
                              name='f3_att', reuse=tf.AUTO_REUSE) # [B, T, 1]
  d_layer_3_all = tf.reshape(d_layer_3_all, [-1, 1, tf.shape(keys)[1]]) #[B, 1, T]
  outputs = d_layer_3_all # attention的输出, [B, 1, T]

  # Mask
  key_masks = tf.sequence_mask(keys_length, tf.shape(keys)[1])   # [B, T]
  key_masks = tf.expand_dims(key_masks, 1) # [B, 1, T]
  paddings = tf.ones_like(outputs) * (-2 ** 32 + 1) 
  # padding的mask后补一个很小的负数，这样softmax之后就会接近0.
  outputs = tf.where(key_masks, outputs, paddings)  
  # [B, 1, T] padding操作，将每个样本序列中空缺的商品都赋值为(-2 ** 32 + 1)

  # Scale
  outputs = outputs / (keys.get_shape().as_list()[-1] ** 0.5)

  # Activation
  outputs = tf.nn.softmax(outputs) 
  # [B, 1, T] #这里的output是attention计算出来的权重，即论文公式(3)里的w，

  # Weighted sum
  outputs = tf.matmul(outputs, keys)  
  # [B, 1, H]

  return outputs