文章目录
前置阅读
GLMP:任务型对话中全局到局部的记忆指针网络 论文阅读及代码解析
Dynamic Fusion Network for Multi-Domain End-to-end Task-Oriented Dialog
参数设定
{
'dataset': 'kvr', 'epoch': 1000, 'fixed': False, 'random_seed': 1, 'embeddings_dim': 128, 'hidden': 128, 'batch': 32, 'learn': 0.001, 'drop': '0.15', 'unk_mask': 1, 'gpu': 'True', 'layer': 3, 'layer_r': 2, 'limit': -10000, 'path': None, 'clip': 10, 'count': 8, 'teacher_forcing_ratio': 0.9, 'evalp': 1, 'addName': 'SMD', 'genSample': 0, 'earlyStop': 'ENTF1', 'record': 1, 'output': 'SMD.log'}
与GLMP的主要差异:
batch_size: 8 → 32
drop: 0.2 → 0.15
layer_r (GRU layer): 1 → 2
teacher_forcing_ratio: 0.5 → 0.9
count:
if (epoch + 1) % int(args['evalp']) == 0:
res = model.evaluate(dev, avg_best, early_stop=early_stop)
model.scheduler.step(res)
if res >= avg_best:
avg_best = res
cnt = 0
else:
cnt += 1
if cnt == args['count']:
print("Ran out of patient, early stop...")
break
仅为连续count个epoch没有提升就早停,并未改变GLMP的学习速率调度策略。
数据处理
GLMP
sketch_response = generate_template(global_entity, r, gold_ent, kb_arr, task_type)
DF-Net
sketch_response, gold_sketch = generate_template(global_entity, r, gold_ent, kb_arr, task_type)
即多返回了gold_sketch:
Dataset
仅列举增改代码
class Dataset(data.Dataset):
"""Custom data.Dataset compatible with data.DataLoader."""
def __init__(self, data_info, src_word2id, trg_word2id, lang):
...
...
def __getitem__(self, index):
...
...
conv_char_arr, conv_char_length = self.preprocess(conv_arr, self.src_word2id, trg=False, char=True)
...
data_info['conv_char_arr'] = conv_char_arr
data_info['conv_char_length'] = conv_char_length
...
data_info['gold_sketch_response'] = self.data_info['sketch_response'][index]
...
def preprocess(self, sequence, word2id, trg=True, char=False):
if trg:
...
elif char:
length = torch.Tensor([len(word[0]) for word in sequence])
char_arr = []
for word in sequence:
for char in word[0]:
temp = self.lang.char2index[char] if char in self.lang.char2index else UNK_token
char_arr.append(temp)
return torch.Tensor(char_arr), length
...
def collate_fn(self, data):
...
def merge_char(chars, length, max_seq_len):
max_word_len = max([max(leng.long())] for leng in length)[0].item()
seqs_char = torch.ones((len(length), max_seq_len, max_word_len))
seqs_char_lenghth = torch.ones((len(length), max_seq_len))
for i, leng in enumerate(length):
seqs_char_lenghth[i, :len(leng)] = leng
start = 0
for ii, word_len in enumerate(leng.long()):
seqs_char[i][ii][:word_len] = chars[i][start:start + word_len]
start += word_len
seqs_char = seqs_char.view(-1, max_word_len)
seqs_char_lenghth = seqs_char_lenghth.view(seqs_char.size(0), )
seqs_char_lenghth, char_perm_idx = seqs_char_lenghth.sort(0, descending=True)
seqs_char = seqs_char[char_perm_idx]
_, char_seq_recover = char_perm_idx.sort(0, descending=False)
return seqs_char, seqs_char_lenghth, char_seq_recover
...
...
max_seq_len = conv_arr.size(1)
conv_char_arr, conv_char_length, char_seq_recover = merge_char(item_info['conv_char_arr'],
item_info['conv_char_length'], max_seq_len)
label_arr = _cuda(torch.Tensor([domains[label] for label in item_info['domain']]).long().unsqueeze(-1))
...
conv_char_arr = _cuda(conv_char_arr.contiguous())
conv_char_length = _cuda(conv_char_length.contiguous())
char_seq_recover = _cuda(char_seq_recover.contiguous())
...
item_info['conv_char_arr'] = 0
item_info['conv_char_length'] = 0
item_info['char_seq_recover'] = 0
item_info['label_arr'] = 0
主要是多了一个char级的输入处理。
Train
GLMP
all_decoder_outputs_vocab, all_decoder_outputs_ptr, _, _, global_pointer = self.encode_and_decode(data, max_target_length, use_teacher_forcing, False)
DF-Net
all_decoder_outputs_vocab, all_decoder_outputs_ptr, _, _, global_pointer, label_e, label_d, label_mix_e, label_mix_d = self.encode_and_decode(
data, max_target_length, use_teacher_forcing, False)
相同的输入,多输出了label_e, label_d, label_mix_e, label_mix_d
m o d e l . e n c o d e _ a n d _ d e c o d e model.encode\_and\_decode model.encode_and_decode
GLMP
dh_outputs, dh_hidden = self.encoder(conv_story, data['conv_arr_lengths'])
DF-Net
# Encode dialog history and KB to vectors
dh_outputs, dh_hidden, label_e, label_mix_e = self.encoder(conv_story, data['conv_arr_lengths'],
data['conv_char_arr'],
data['conv_char_length'],
data['char_seq_recover'],
data['domain'])
Encoder
GLMP
self.encoder = ContextRNN(lang.n_words, hidden_size, dropout)
DF-Net
self.encoder = ContextEncoder(lang.n_words, hidden_size, dropout, lang.n_chars, domains)
lang.n_chars 未使用,冗余代码
GLMP
class ContextRNN(nn.Module):
def __init__(self, input_size, hidden_size, dropout, n_layers=1):
super(ContextRNN, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.n_layers = n_layers
self.dropout = dropout
self.dropout_layer = nn.Dropout(dropout)
self.embedding = nn.Embedding(input_size, hidden_size, padding_idx=PAD_token)
self.gru = nn.GRU(hidden_size, hidden_size, n_layers, dropout=dropout, bidirectional=True)
self.W = nn.Linear(2*hidden_size, hidden_size)
DF-Net
class ContextEncoder(nn.Module):
def __init__(self, input_size, hidden_size, dropout, vocab_size, domains, n_layers=args['layer_r']):
super(ContextEncoder, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.n_layers = n_layers
self