之前的几节已经介绍了数据集的准备以及数据每一个batch的读取,也介绍了实现过程中的一些小错误,这一节来介绍整个模型的构建。
seq2seq模型
import tensorflow as tf
from tensorflow.contrib import rnn
import numpy as np
from tensorflow.python.layers import core as layers_core
def getLayeredCell(layer_size, num_units):
return rnn.MultiRNNCell([rnn.BasicLSTMCell(num_units) for i in range(layer_size)])
def encoder(vinput, in_seq_len, num_units, layer_size):
# encode input into a vector
encode_cell = getLayeredCell(layer_size, num_units)
encoder_output, encoder_state = tf.nn.dynamic_rnn(
cell = encode_cell,
inputs = vinput,
sequence_length = in_seq_len,
dtype = vinput.dtype,
time_major = False)
# concat encode output and state
return encoder_output, encoder_state
def decoder(in_seq_len, target_seq, target_seq_len,
encoder_state, num_units, layers, output_size,embedding):
projection_layer=tf.layers.Dense(output_size)
decoder_cell = getLayeredCell(layers, num_units)
with tf.variable_scope("decoder"):
helper = tf.contrib.seq2seq.TrainingHelper(target_seq, target_seq_len, time_major=False)
decoder = tf.contrib.seq2seq.BasicDecoder(decoder_cell, helper, encoder_state, output_layer=projection_layer)
outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(decoder,impute_finished=True,maximum_iterations=20)
with tf.variable_scope("decoder",reuse = True):
batch_size = tf.shape(in_seq_len)[0]
start_tokens = tf.tile(tf.constant([2],dtype=tf.int32),[batch_size],name='start_token')
infer_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(embedding,start_tokens ,3)
infer_decoder = tf.contrib.seq2seq.BasicDecoder(decoder_cell, infer_helper, encoder_state, output_layer=projection_layer)
infer_outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(infer_decoder,impute_finished=True,maximum_iterations=20)
return outputs.rnn_output,infer_outputs,outputs.sample_id
上面的代码就是seq2seq构建encoder,decoder的函数,在之后的模型构建中会被调用。
model
import tensorflow as tf
import seq2seq
import reader
import numpy as np
from os import path
class Model():
def __init__(self, input_file, vocab_file,
num_units, layers,
batch_size, learning_rate, output_dir,
save_step = 100, eval_step = 1000,
param_histogram=False, restore_model=False,
init_train=True, init_infer=False):
self.num_units = num_units
self.layers = layers
self.batch_size = batch_size
self.learning_rate = learning_rate
self.save_step = save_step
self.eval_step = eval_step
self.param_histogram = param_histogram
self.restore_model = restore_model
self.init_train = init_train
self.init_infer = init_infer
self.matrix= np.load('vocmat.npy')
self.model_file = path.join(output_dir, 'model.ckpl')
self.input_file = input_file
self.vocab_file = vocab_file
self.train_data = reader.reader(input_file, vocab_file, batch_size)
if self.init_train is True:
self._init_train()
def _init_train(self):
self.train_graph = tf.Graph()
with self.train_graph.as_default():
self.embedding = tf.Variable(self.matrix)
self.train_in_seq = tf.placeholder(tf.int32, shape=[self.batch_size, None],name='train_in_seq')
self.train_in_seq_len = tf.placeholder(tf.int32, shape=[self.batch_size],name='train_in_seq_len')
self.train_target_seq = tf.placeholder(tf.int32, shape=[self.batch_size, None],name = 'train_target_seq')
self.train_target_seq_len = tf.placeholder(tf.int32, shape=[self.batch_size],name = 'train_target_seq_len')
#embedding 层
with tf.name_scope("embedding") :
self.embed_input = tf.nn.embedding_lookup(self.embedding, self.train_in_seq, name='embed_input')
self.embed_target = tf.nn.embedding_lookup(self.embedding, self.train_target_seq, name='embed_input')
with tf.name_scope("encoder") :
self.encoder_output,self.encoder_state = seq2seq.encoder(self.embed_input, self.train_in_seq_len, self.num_units, self.layers)
with tf.name_scope("decoder"):
self.decoder_output,self.infer_output,self.inf_id = seq2seq.decoder(self.train_in_seq_len, self.embed_target, self.train_target_seq_len,
self.encoder_state, self.num_units, self.layers, 16025,self.embedding )
with tf.name_scope("loss") :
mask = tf.sequence_mask(tf.to_float(self.train_target_seq_len),tf.to_float(tf.shape(self.train_target_seq)[1]) )
self.loss = tf.contrib.seq2seq.sequence_loss(logits = self.decoder_output,targets = self.train_target_seq,weights = tf.cast(mask,tf.float64))
tf.summary.scalar('LOSS', self.loss)
params = tf.trainable_variables()
gradients = tf.gradients(self.loss, params)
clipped_gradients, _ = tf.clip_by_global_norm(
gradients, 0.5)
self.train_op = tf.train.AdamOptimizer(
learning_rate=self.learning_rate
).apply_gradients(zip(clipped_gradients,params))
if self.param_histogram:
for v in tf.trainable_variables():
tf.summary.histogram('train_' + v.name, v)
tf.summary.scalar('loss', self.loss)
self.train_summary = tf.summary.merge_all()
self.train_init = tf.global_variables_initializer()
self.train_saver = tf.train.Saver()
self.train_session = tf.Session(graph=self.train_graph)
def train(self, epochs, start=0):
self.train_session.run(self.train_init)
self.log_writter = tf.summary.FileWriter('output/',self.train_session.graph)
for step in range(100000000):
data = next(self.train_data.read())
in_seq = data['in_seq']
in_seq_len = data['in_seq_len']
target_seq = data['target_seq']
target_seq_len = data['target_seq_len']
output, loss, train, summary = self.train_session.run(
[self.decoder_output, self.loss, self.train_op, self.train_summary],
feed_dict={
self.train_in_seq: in_seq,
self.train_in_seq_len: in_seq_len,
self.train_target_seq: target_seq,
self.train_target_seq_len: target_seq_len})
# self.log_writter.add_summary(summary, step)
if step % 5 == 0:
self.log_writter.add_summary(summary,step)
print(loss)
if step%20 ==0:
self.train_saver.save(self.train_session, self.model_file)
print("Saving model. Step: %d, loss: %f" % (loss,step))
def pridiction(self):
r= reader.reader(self.input_file, self.vocab_file, 1)
data = next(r.read())
in_seq = data['in_seq']
in_seq_len = data['in_seq_len']
target_seq = data['target_seq']
target_seq_len = data['target_seq_len']
module_file = tf.train.latest_checkpoint('output/output/')
self.train_saver.restore(self.train_session, module_file)
[output,oupot1] = self.train_session.run(
[self.infer_output,self.inf_id],
feed_dict={
self.train_in_seq: in_seq,
self.train_in_seq_len: in_seq_len,
self.train_target_seq: target_seq,
self.train_target_seq_len: target_seq_len
})
print(self.train_data.inttoword(in_seq),self.train_data.inttoword(target_seq),self.train_data.inttoword(output.sample_id),self.train_data.inttoword(oupot1))
return in_seq,output
if __name__ == "__main__":
m = Model('part-r-00000', "vocmat.npy",
512, 4,
1, 0.01, 'output/')
该代码主要包含四部分,init部分是模型超参数的设定,init_train是模型每一层的设定,train是训练整个模型的,pridiction则是用来测试模型学习效果。总体来说是网上的一些教程的简化版,但是不如别人的封装完整。