在PTB数据上使用TensorFlow建立语言模型3

词汇表中所有单词的向量维度:[VOCAB,EMB_SIZE]

每个batch中的输入维度input_data:[batch_size,num_steps]

经tf.nn.embedding_lookup提取词向量后,输入维度input_embedding:[batch_size,num_steps,EMB_SIZE]

在训练的每一个时刻,输入的维度[batch_size,EMB_SIZE],输出的维度[batch_size,hidden_size]

共有num_steps个时刻

经reshape后循环模型的输出维度[batch_size*num_steps,hidden_size]

在Softmax层,输出维度[batch_size*num_steps,VOCAB_SIZE]

具体代码如下:

import numpy as np
import tensorflow as tf

TRAIN_DATA="ptb.train"
EVAL_DATA="ptb.valid"
TEST_DATA="ptb.test"
LSTM_KEEP_PROB=0.9            #LSTM节点不被dropout的概率
EMBEDDING_KEEP_PROB=0.9       #词向量不被dropout的概率
HIDDEN_SIZE=300               #隐藏层规模
NUM_LAYERS=2                  #深层循环神经网络中LSTM的层数
VOCAB_SIZE=10000              #词典规模
SHARE_EMB_AND_SOFTMAX=True    #在Softmax层和词向量层之间共享参数
MAX_GRAD_NORM=5               #用于控制梯度膨胀的梯度大小上限
TRAIN_BATCH_SIZE=20
TRAIN_NUM_STEP=35
EVAL_BATCH_SIZE=1
EVAL_NUM_STEP=1
NUM_EPOCH=5
class PTBModel():
    def __init__(self,is_training,batch_size,num_steps):

        self.batch_size=batch_size
        self.num_steps=num_steps

        #定义每一步的输入和预期输出
        self.input_data=tf.placeholder(tf.int32,[batch_size,num_steps])
        self.targets=tf.placeholder(tf.int32,[batch_size,num_steps])

        dropout_keep_prob=LSTM_KEEP_PROB if is_training else 1.0
        lstm_cells=[tf.nn.rnn_cell.DropoutWrapper(tf.nn.rnn_cell.BasicLSTMCell
                    (HIDDEN_SIZE), output_keep_prob=dropout_keep_prob)
                    for _ in range(NUM_LAYERS)]
        cell=tf.nn.rnn_cell.MultiRNNCell(lstm_cells) #两层循环神经网络

        #初始化最初的状态,即全零的向量
        self.initial_state=cell.zero_state(batch_size,tf.float32)
        #所有单词的词向量矩阵
        embedding=tf.get_variable("embedding",[VOCAB_SIZE,HIDDEN_SIZE])
        #将输入单词转化为词向量矩阵
        inputs=tf.nn.embedding_lookup(embedding,self.input_data) 
        #inputs的shape:[batch_size,num_steps,HIDDEN_SIZE]

        if is_training:
            inputs=tf.nn.dropout(inputs,EMBEDDING_KEEP_PROB)

        #将num_steps个时刻LSTM的输出收集到outputs
        outputs=[]
        state=self.initial_state
        with tf.variable_scope("RNN"):
            for time_step in range(num_steps):
                #在第一个时刻声明LSTM结构中使用的变量,在之后的时刻都需要复用之前定义好的变量
                if time_step>0:tf.get_variable_scope().reuse_variables()
                cell_output,state=cell(inputs[:,time_step,:],state)
                outputs.append(cell_output)
        #outputs为一个tensor维度[batch,hidden_size]的列表,列表的长度num_steps
        output=tf.reshape(tf.concat(outputs,1),[-1,HIDDEN_SIZE])  
        #output的shape:[batch*numsteps,hidden_size]

        if SHARE_EMB_AND_SOFTMAX:
            weight=tf.transpose(embedding)#转置
        else:
            weight=tf.get_variable("weight",[HIDDEN_SIZE,VOCAB_SIZE])
        bias=tf.get_variable("bias",[VOCAB_SIZE])
        logits=tf.matmul(output,weight)+bias  #[batch*num_steps,VOCAB_SIZE]

        loss=tf.nn.sparse_softmax_cross_entropy_with_logits(labels=tf.reshape  (self.targets,[-1]),logits=logits)
        self.cost=tf.reduce_sum(loss)/batch_size
        #计算该batch的log(perplexity),应除以(batch_size*num_steps)
        #后面程序调用run_epoch时除了iters,可理解为执行完iters后的平均log(perplexity)
        #self.cost=tf.reduce_mean(loss)*num_steps
        self.final_state=state

        if not is_training:return

        trainable_variables=tf.trainable_variables() 
        #tf.trainable_variables返回的是需要训练的变量列表

        grads,_=tf.clip_by_global_norm(
            tf.gradients(self.cost,trainable_variables),MAX_GRAD_NORM)
        #gradients是计算向量的导数,输入是loss和所有trainable的向量。

        optimizer=tf.train.GradientDescentOptimizer(learning_rate=1.0)

        # apply_gradients是tf.train.Optimizer.minimize实际操作中两步中的一步,
        # minizie其实是分了两步运算,第一步计算梯度tf.train.Optimizer.compute_gradients
        # 和第二步更新梯度tf.train.Optimizer.apply_gradients,
        # 由于我们已经计算梯度了,所以我们只用更新梯度就可以了,
        # 输入格式就是如下zip(梯度向量,变量向量)
        self.train_op=optimizer.apply_gradients(zip(grads,trainable_variables))

#返回全部数据上的perplexity
def run_epoch(session,model,batches,train_op,output_log,step):
    total_costs=0.0
    iters=0
    state=session.run(model.initial_state)
    for x,y in batches:
        cost,state,_=session.run([model.cost,model.final_state,train_op],
                                 {model.input_data:x,model.targets:y,
                                  model.initial_state:state})
        total_costs+=cost
        iters+=model.num_steps

        if output_log:
            if step % 100 == 0:
                #log(perplexity)通过np.exp转化为perplexity
                print("After %d steps,perplexity is %.3f"%
                (step,np.exp(total_costs/iters)))
        step+=1
    return step,np.exp(total_costs/iters)

def read_data(file_path):
    with open(file_path,"r") as fin:
        id_string=' '.join([line.strip() for line in fin.readlines()])
    id_list=[int(w) for w in id_string.split()]
    return id_list

def make_batches(id_list,batch_size,num_step):
    num_batches=(len(id_list)-1)//(batch_size*num_step)
    data=np.array(id_list[:num_batches*batch_size*num_step])
    data=np.reshape(data,[batch_size,num_batches*num_step])
    data_batches=np.split(data,num_batches,axis=1)

    label = np.array(id_list[1:num_batches * batch_size * num_step+1])
    label = np.reshape(label, [batch_size, num_batches * num_step])
    label_batches = np.split(label, num_batches, axis=1)

    return list(zip(data_batches,label_batches))

def main():
    initializer=tf.random_uniform_initializer(-0.05,0.05)
    with tf.variable_scope("language_model",reuse=None,initializer=initializer):
        train_model=PTBModel(True,TRAIN_BATCH_SIZE,TRAIN_NUM_STEP)

    with tf.variable_scope("language_model",reuse=True,initializer=initializer):
        eval_model=PTBModel(False,EVAL_BATCH_SIZE,EVAL_NUM_STEP)

    with tf.Session() as session:
        tf.global_variables_initializer().run()
        train_batches=make_batches(read_data(TRAIN_DATA),TRAIN_BATCH_SIZE,TRAIN_NUM_STEP)
        eval_batches = make_batches(read_data(EVAL_DATA), EVAL_BATCH_SIZE, EVAL_NUM_STEP)
        print(len(eval_batches))
        test_batches = make_batches(read_data(TEST_DATA), EVAL_BATCH_SIZE, EVAL_NUM_STEP)
        print(len(test_batches))
        step=0
        for i in range(NUM_EPOCH):
            print("In iteration: %d"%(i+1))
            step, train_pplx = run_epoch (session,train_model,
            train_batches,train_model.train_op,True,step)
            print("Epoch: %d Train Perplexity: %.3f "%(i+1,train_pplx))
            _, eval_pplx = run_epoch(session, 
            eval_model, eval_batches, tf.no_op(), False,0)#tf.no_op什么都不做
            print("Epoch: %d Eval Perplexity: %.3f "%(i+1,eval_pplx))
        _, test_pplx = run_epoch(session, eval_model, test_batches, tf.no_op(), False, 0)
        print("Test Perplexity: %.3f " % (i + 1, test_pplx))
if __name__=="__main__":
    main()


 

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值