Tensorflow: recurrent neural network char-level 0

import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.models.rnn import rnn, rnn_cell
from tensorflow.models.rnn import seq2seq

import collections

# @karpathy
data = open('ThreeMusketeers.txt').read()
chars = list(set(data))
data_size , vocab_size = len(data), len(chars)
print 'data has %d characters, %d unique.' %(data_size, vocab_size)
char_to_ix = {ch:i for i, ch in enumerate(chars)}
ix_to_char = {i:ch for i, ch in enumerate(chars)}
counter = collections.Counter(data)
counter = sorted(counter.items(), key=lambda x:-x[1])
for i in xrange(5):
    print counter[i]

corpus = [char_to_ix[c] for c in data]

batch_size = 50
seq_length = 200
num_batches = len(corpus) / (batch_size*seq_length)
print 'num_batches: ', num_batches
corpus_reduced_0 = corpus[0 : num_batches*batch_size*seq_length]
corpus_reduced_1 = corpus[1 : num_batches*batch_size*seq_length+1]
xdata = np.copy(corpus_reduced_0)
ydata = np.copy(corpus_reduced_1)
# ydata[:-1] = xdata[1:]
# ydata[-1] = xdata[0]

xbatches = np.split(xdata.reshape(batch_size, -1), num_batches, 1)
ybatches = np.split(ydata.reshape(batch_size, -1), num_batches, 1)
print 'numbers of xbatches:', len(xbatches)
print type(xbatches[0]), xbatches[0].shape

hidden_size = 128
num_layers = 2
max_grad_norm = 5.0

an_lstm = rnn_cell.BasicLSTMCell(hidden_size)
multi_lstm = rnn_cell.MultiRNNCell([an_lstm] * num_layers)
x = tf.placeholder(tf.int32, [batch_size, seq_length])
y = tf.placeholder(tf.int32, [batch_size, seq_length])
init_state = multi_lstm.zero_state(batch_size, tf.float32)

with tf.variable_scope('rnn'):
    softmax_w = tf.get_variable('softmax_w', [hidden_size, vocab_size])
    softmax_b = tf.get_variable('softmax_b', [vocab_size])
    with tf.device('/cpu:0'):
        embedding = tf.get_variable('embedding', [vocab_size, hidden_size])
        inputs = tf.nn.embedding_lookup(embedding, x)
        inputs = tf.split(1, seq_length, inputs)
        inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

def loop(prev):
    prev = tf.nn.xw_plus_b(prev, softmax_w, softmax_b)
    prev_symbol = tf.stop_gradient(tf.arg_max(prev, 1))
    return tf.nn.embedding_lookup(embedding, prev_symbol)

outputs, last_state = seq2seq.rnn_decoder(inputs, init_state, 
                                          multi_lstm, 
                                          loop_function=None, 
                                          scope='rnn')
# outputs is a list of 2D-Tensor with shape [batch_size , hidden_size]
# the len(outputs)) is seq_length

# first, hiddenlayer outputs belong to same sequence should be concatenated together 
out_conca = tf.concat(1, outputs) # [batch_size, hidden_size*seq_length]
# second, to get the softmax prob and add the fc layer, the out_conca's second dim should
# be reshaped to the size: hidden_size
# [batch_size*seq_length, hidden_size]
output = tf.reshape(out_conca, [-1, hidden_size])
# [batch_size*seq_length, vocab_size]
score = tf.nn.xw_plus_b(output, softmax_w, softmax_b)
# [batch_size*seq_length, vocab_size]
probs = tf.nn.softmax(score)

loss = seq2seq.sequence_loss_by_example([score], 
                                        [tf.reshape(y, [-1])],
                                        [tf.ones([batch_size*seq_length])],
                                        vocab_size)
cost = tf.reduce_sum(loss)/ batch_size/seq_length

######################################################################
lr = 0.001
max_grad_norm = 5
tvars = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), max_grad_norm)
# opt = tf.train.GradientDescentOptimizer(lr)
opt = tf.train.AdamOptimizer(lr)
optimizer = opt.apply_gradients(zip(grads, tvars))
######################################################################

init = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init)

epoch = 20
batch_size = 100
snapshot = 5
save_step = 1
saver = tf.train.Saver()

loss_cache = []
for ep in xrange(epoch):
    avg_loss = 0
    state = sess.run(init_state)
    for nb in xrange(num_batches):
        rand_idx = np.random.randint(num_batches)
        batch_x, batch_y = xbatches[rand_idx], ybatches[rand_idx]

        train_loss, state, _ = sess.run([cost, last_state, optimizer], 
                                        feed_dict={x:batch_x, y:batch_y, init_state: state})
        avg_loss += train_loss/num_batches

    loss_cache.append(avg_loss)
    if ep % snapshot ==0:
        print 'Epoch: %d/%d, loss: %.4f'%(ep, epoch, avg_loss)
    if ep % save_step == 0:
            saver.save(sess, save_path='net_snapshot/rnn_char_net_tfmodel', global_step=ep)    
plt.figure(1)
plt.plot(range(len(loss_cache)), loss_cache, 'b-', label='loss')
plt.legend(loc = 'upper right')
plt.show()
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值