nlp项目实践古诗创作tensorflow ---(4)模型构造

之前的几节已经介绍了数据集的准备以及数据每一个batch的读取,也介绍了实现过程中的一些小错误,这一节来介绍整个模型的构建。

seq2seq模型


        import tensorflow as tf
        from tensorflow.contrib import rnn
        import numpy as np
        from tensorflow.python.layers import core as layers_core
        
        
        def getLayeredCell(layer_size, num_units):
        return rnn.MultiRNNCell([rnn.BasicLSTMCell(num_units) for i in range(layer_size)])
        
    def encoder(vinput, in_seq_len, num_units, layer_size):
        # encode input into a vector
        encode_cell = getLayeredCell(layer_size, num_units)
        
        encoder_output, encoder_state = tf.nn.dynamic_rnn(
                cell = encode_cell,
                inputs = vinput,
                sequence_length = in_seq_len,
                dtype = vinput.dtype,
                time_major = False)
    
        # concat encode output and state
        return encoder_output, encoder_state
        
    def decoder(in_seq_len, target_seq, target_seq_len,
            encoder_state, num_units, layers, output_size,embedding):
        
        projection_layer=tf.layers.Dense(output_size)
        decoder_cell = getLayeredCell(layers, num_units)    
        with tf.variable_scope("decoder"):
            helper = tf.contrib.seq2seq.TrainingHelper(target_seq, target_seq_len, time_major=False)
            decoder = tf.contrib.seq2seq.BasicDecoder(decoder_cell, helper, encoder_state, output_layer=projection_layer)
            outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(decoder,impute_finished=True,maximum_iterations=20)
       
        with tf.variable_scope("decoder",reuse = True):
            batch_size = tf.shape(in_seq_len)[0]
            start_tokens = tf.tile(tf.constant([2],dtype=tf.int32),[batch_size],name='start_token')
            infer_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(embedding,start_tokens ,3)
            infer_decoder = tf.contrib.seq2seq.BasicDecoder(decoder_cell, infer_helper, encoder_state, output_layer=projection_layer)
            infer_outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(infer_decoder,impute_finished=True,maximum_iterations=20)
            
        return outputs.rnn_output,infer_outputs,outputs.sample_id

上面的代码就是seq2seq构建encoder,decoder的函数,在之后的模型构建中会被调用。

model

import tensorflow as tf
import seq2seq
import reader
import numpy as np
from os import path



class Model():

    def __init__(self, input_file, vocab_file,
            num_units, layers, 
            batch_size, learning_rate, output_dir,
            save_step = 100, eval_step = 1000,
            param_histogram=False, restore_model=False,
            init_train=True, init_infer=False):
        
        self.num_units = num_units
        self.layers = layers
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.save_step = save_step
        self.eval_step = eval_step
        self.param_histogram = param_histogram
        self.restore_model = restore_model
        self.init_train = init_train
        self.init_infer = init_infer
        self.matrix= np.load('vocmat.npy')
        self.model_file = path.join(output_dir, 'model.ckpl')
        self.input_file = input_file
        self.vocab_file = vocab_file
        self.train_data = reader.reader(input_file, vocab_file, batch_size)
        if  self.init_train is True:
            self._init_train()
        
    
    def _init_train(self):
        self.train_graph = tf.Graph()
        with self.train_graph.as_default():
            self.embedding = tf.Variable(self.matrix)
            self.train_in_seq = tf.placeholder(tf.int32, shape=[self.batch_size, None],name='train_in_seq')
            self.train_in_seq_len = tf.placeholder(tf.int32, shape=[self.batch_size],name='train_in_seq_len')
            self.train_target_seq = tf.placeholder(tf.int32, shape=[self.batch_size, None],name = 'train_target_seq')
            self.train_target_seq_len = tf.placeholder(tf.int32, shape=[self.batch_size],name = 'train_target_seq_len')
            
            
            #embedding 层
            with tf.name_scope("embedding") :
                self.embed_input = tf.nn.embedding_lookup(self.embedding, self.train_in_seq, name='embed_input')
                self.embed_target = tf.nn.embedding_lookup(self.embedding, self.train_target_seq, name='embed_input')
            with tf.name_scope("encoder") :
                self.encoder_output,self.encoder_state = seq2seq.encoder(self.embed_input, self.train_in_seq_len, self.num_units, self.layers)            
            with tf.name_scope("decoder"):
                self.decoder_output,self.infer_output,self.inf_id = seq2seq.decoder(self.train_in_seq_len, self.embed_target, self.train_target_seq_len,
                                                      self.encoder_state, self.num_units, self.layers, 16025,self.embedding )
            
            with tf.name_scope("loss") :
                mask = tf.sequence_mask(tf.to_float(self.train_target_seq_len),tf.to_float(tf.shape(self.train_target_seq)[1]) )
                self.loss = tf.contrib.seq2seq.sequence_loss(logits = self.decoder_output,targets = self.train_target_seq,weights = tf.cast(mask,tf.float64))          
                tf.summary.scalar('LOSS', self.loss)
            params = tf.trainable_variables()
            gradients = tf.gradients(self.loss, params)
            clipped_gradients, _ = tf.clip_by_global_norm(
                        gradients, 0.5)
            self.train_op = tf.train.AdamOptimizer(
                    learning_rate=self.learning_rate
                ).apply_gradients(zip(clipped_gradients,params))
            
            if self.param_histogram:
                for v in tf.trainable_variables():
                    tf.summary.histogram('train_' + v.name, v)
            tf.summary.scalar('loss', self.loss)
            self.train_summary = tf.summary.merge_all()
            self.train_init = tf.global_variables_initializer()
            self.train_saver = tf.train.Saver()
            self.train_session = tf.Session(graph=self.train_graph)
            
    
    def train(self, epochs, start=0):
        self.train_session.run(self.train_init)
        self.log_writter = tf.summary.FileWriter('output/',self.train_session.graph)
        for step in range(100000000):
            data = next(self.train_data.read())
            in_seq = data['in_seq']
            in_seq_len = data['in_seq_len']
            target_seq = data['target_seq']
            target_seq_len = data['target_seq_len']
            output, loss, train, summary = self.train_session.run(
                    [self.decoder_output, self.loss, self.train_op, self.train_summary],
                    feed_dict={
                        self.train_in_seq: in_seq,
                        self.train_in_seq_len: in_seq_len,
                        self.train_target_seq: target_seq,
                        self.train_target_seq_len: target_seq_len})
            
#            self.log_writter.add_summary(summary, step)
            if step % 5 == 0:
                self.log_writter.add_summary(summary,step)
                print(loss)
            if step%20 ==0:
                self.train_saver.save(self.train_session, self.model_file)
                print("Saving model. Step: %d, loss: %f" % (loss,step))
    
    def pridiction(self):
        r= reader.reader(self.input_file, self.vocab_file, 1)
        data = next(r.read())
        in_seq = data['in_seq']
        in_seq_len = data['in_seq_len']
        target_seq = data['target_seq']
        target_seq_len = data['target_seq_len']
                
        
        module_file = tf.train.latest_checkpoint('output/output/')
        self.train_saver.restore(self.train_session, module_file)
        [output,oupot1]  = self.train_session.run(
                [self.infer_output,self.inf_id],
                feed_dict={
                    self.train_in_seq: in_seq,
                    self.train_in_seq_len: in_seq_len,
                    self.train_target_seq: target_seq,
                        self.train_target_seq_len: target_seq_len
                    })
       
        print(self.train_data.inttoword(in_seq),self.train_data.inttoword(target_seq),self.train_data.inttoword(output.sample_id),self.train_data.inttoword(oupot1))
        return in_seq,output
            

if __name__ == "__main__":
    m = Model('part-r-00000', "vocmat.npy",
            512, 4, 
            1, 0.01, 'output/')
        

该代码主要包含四部分,init部分是模型超参数的设定,init_train是模型每一层的设定,train是训练整个模型的,pridiction则是用来测试模型学习效果。总体来说是网上的一些教程的简化版,但是不如别人的封装完整。

  • 2
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值