基于 keyword deep semantic matching paper, keyword方面的关键代码复现
应用:1.对同一个embed-represent 的表达, 从 a,b-keyword角度进行attenion生成新的向量,再进行拼接,生成整体的表示向量
2.基于a,b的keyword-mask,和 a,b的mask真实表示,生成新的attention-mask矩阵,keyword-b和seq-a有交互的地方标记为1,如此作为attention添加到self-attention中,作为新的keyword-self-attention
# coding=utf-8
import tensorflow as tf
import numpy as np
def f1():
a1=np.reshape(np.arange(32),newshape=[2,16])
array_real_mask_a=np.array([
[1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0],
[1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0]
])
array_real_mask_b=np.array([
[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1],
[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
])
array_kw_mask_a = np.array([
[1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
])
array_kw_mask_b = np.array([
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1]
])
x1=tf.placeholder(dtype=tf.int32,shape=[2,16])
real_mask_a=tf.placeholder(dtype=tf.int32,shape=[2,16])
real_mask_b=tf.placeholder(dtype=tf.int32,shape=[2,16])
kw_mask_a=tf.placeholder(dtype=tf.int32,shape=[2,16])
kw_mask_b=tf.placeholder(dtype=tf.int32,shape=[2,16])
real_mask_a1=tf.reshape(real_mask_a,shape=[2,16,1])
real_mask_b1=tf.reshape(real_mask_b,shape=[2,16,1])
kw_mask_a1=tf.reshape(kw_mask_a,shape=[2,1,16])
kw_mask_b1=tf.reshape(kw_mask_b,shape=[2,1,16])
broadcast_ones=tf.ones(shape=[2,16,1],dtype=tf.int32)
kw_mask_a_new=broadcast_ones*real_mask_a1*kw_mask_b1
kw_mask_b_new=broadcast_ones*real_mask_b1*kw_mask_a1
kw_mask=tf.math.add(kw_mask_a_new,kw_mask_b_new)
with tf.Session() as sess:
result=sess.run([kw_mask_a_new,kw_mask_b_new,kw_mask],feed_dict={x1:a1,
real_mask_a:array_real_mask_a,
real_mask_b:array_real_mask_b,
kw_mask_a:array_kw_mask_a,
kw_mask_b:array_kw_mask_b})
print(result[0])
print('********************************')
print(result[1])
print('********************************')
print(result[2])
def f2():
a1 = np.reshape(np.arange(24), newshape=[2,4,3])
array_real_mask_a = np.array([
[1, 0, 0, 0],
[1, 1, 0, 0]
])
array_real_mask_b = np.array([
[0, 0, 0, 1],
[0, 0, 1, 1]
])
seq_output=tf.placeholder(dtype=tf.int32,shape=[2,4,3])
real_mask_a=tf.placeholder(dtype=tf.int32,shape=[2,4])
real_mask_b=tf.placeholder(dtype=tf.int32,shape=[2,4])
real_mask_a_exp=tf.reshape(real_mask_a,shape=[2,4,1])
real_mask_b_exp=tf.reshape(real_mask_b,shape=[2,4,1])
rep_a_1=tf.reduce_sum(seq_output*real_mask_a_exp,1)
real_len_a=tf.reduce_sum(real_mask_a_exp,1)
rep_a=tf.divide(rep_a_1,real_len_a)
rep_b_1=tf.reduce_sum(seq_output*real_mask_b_exp,1)
real_len_b=tf.reduce_sum(real_mask_b_exp,1)
rep_b=tf.divide(rep_b_1,real_len_b)
final_rep=tf.concat([rep_a,rep_b],-1)
with tf.Session() as sess:
result=sess.run(fetches=[rep_a_1,rep_b_1,final_rep],feed_dict={
seq_output:a1,
real_mask_a:array_real_mask_a,
real_mask_b:array_real_mask_b
})
print('****************************')
print(a1)
print('****************************')
print(result[0])
print('****************************')
print(result[1])
print('****************************')
print(result[2])
if __name__=='__main__':
f2()
Keywords-Guided Abstractive Sentence Summarization
是基于 keyword的摘要生成,
区别一个是 transformer通过mask形式+self-attention引入对keyword的学习,原始emb+-emb形式对emb强化
一个是 keywords整体的hidden_states,+原始token的 hidden_state,+sigma, 即 sigma(token-hidden_state+keywords-hidden_state) 作为 对emb的强化