"""
单词字符级别预测RNN
"""
import time
from collections import namedtuple
import numpy as np
import tensorflow as tf
import os
with open('../datas/anna.txt', 'r') as f:
text = f.read()
vocab = sorted(set(text)) # 取文本中唯一的字符,共83个。
vocab_to_int = {c: i for i, c in enumerate(vocab)}
int_to_vocab = dict(enumerate(vocab))
# todo-使用字典表vocab_to_int 将文本的字符转换为 相应的数字。
encoded = np.array([vocab_to_int[c] for c in text], dtype=np.int32)
print('所有的字符是:{}'.format(vocab))
print(text[:100])
print(len(text))
print(encoded[:100])
print(len(vocab))
# todo 制作训练 batches函数。每次调用该函数可以返回一个batch,多用generator
def get_batches(arr, batch_size, n_steps):
"""
:param arr: 原始文本(已经转换为 tokenize)
:param batch_size: 批量
:param n_steps: 时间步
:return:
"""
# 1、每个批量字符的数量
chars_per_batch = batch_size*n_steps
n_batches = len(arr) // chars_per_batch
arr = arr[:n_batches*chars_per_batch]
# 2、重塑 [N, -1]
arr = np.reshape(arr, newshape=[batch_size, -1])
# 3、构造批量数据
for i in range(0, arr.shape[1], n_steps):
# 构造输入x
x = arr[:, i:i+n_steps]
# 目标y 是平移了一位
y_temp = arr[:, i+1: i+n_steps+1]
y = np.zeros(shape=x.shape, dtype=x.dtype)
y[:, :y_temp.shape[1]] = y_temp # y_temp可能少一列
yield x, y
def test_batches_func():
"""
验证上面的函数的
:return:
"""
batches = get_batches(encoded, 8, 10)
x, y = next(batches)
print(x)
print('\n', '**' *40)
print(y)
def build_inputs(batch_size, n_steps):
"""
创建模型输入,占位符
:param batch_size:
:param n_steps:
:return:
"""
inputs = tf.placeholder(tf.int32, shape=[batch_size, n_steps], name='x')
targets = tf.placeholder(tf.int32, shape=[batch_size, n_steps], name='y')
keep_prob = tf.placeholder(tf.float32, shape=None, name='keep_prob')
return inputs, targets, keep_prob
def build_lstm(lstm_size, num_layers, batch_size, keep_prob):
"""
创建rnn 隐藏层细胞核(cell)
:param lstm_size: 隐藏层节点数量(神经元个数)
:param num_layers: 隐藏层层数
:param batch_size: 批量
:param keep_prob: dropout
:return:
"""
def build_cell(lstm_size, keep_prob):
# 1、实例化一个 lstm cell
cell = tf.nn.rnn_cell.BasicLSTMCell(num_units=lstm_size)
# 2、添加dropput
drop = tf.nn.rnn_cell.DropoutWrapper(cell, output_keep_prob=keep_prob)
return drop
# 堆栈多层rnn隐藏层
# cell1 = build_cell(lstm_size, keep_prob)
# cell2 = build_cell(lstm_size, keep_prob)
# multi_cell = tf.nn.rnn_cell.MultiRNNCell([cell1, cell2])
multi_cell = tf.nn.rnn_cell.MultiRNNCell(
[build_cell(lstm_size, keep_prob) for _ in range(num_layers)]
)
# 初始化状态值。
initial_state = multi_cell.zero_state(batch_size, dtype=tf.float32)
return multi_cell, initial_state
def build_output(rnn_output, lstm_size, output_size):
"""
将隐藏层的输出 做全连接,得到logits。
:param rnn_output: 隐藏层的输出 3-D tensor [N, n_steps, lstm_size]
:param lstm_size: 隐藏层节点数量
:param output_size: logits的维度(output_size == num_classes == vocab_size)
:return:
"""
# 对隐藏层的输出 进行重塑[N, n_steps, lstm_size] ---> [N*n_steps, lstm_size]
x = tf.reshape(rnn_output, shape=[-1, lstm_size])
# 构建全连接的变量
with tf.variable_scope('logits'):
softmax_w = tf.get_variable(
'w', shape=[lstm_size, output_size], dtype=tf.float32,
initializer=tf.truncated_normal_initializer(stddev=0.1)
)
softmax_b = tf.get_variable(
'b', shape=[output_size], dtype=tf.float32,
initializer=tf.zeros_initializer()
)
logits = tf.nn.xw_plus_b(x, softmax_w, softmax_b)
# 使用softmax激活得到预测值
predictions = tf.nn.softmax(logits)
return predictions, logits
def build_loss(logits, labels, num_classes):
"""
创建模型损失
:param logits: shape 2-D [N*n_steps, num_classes]
:param labels:
:param num_classes: 类别数量,即 vocab_size
:return:
"""
# 1、对标签做one-hot
y_one_hot = tf.one_hot(indices=labels, depth=num_classes)
# [N, n_steps, num_classes]
# 2、reshape
y_reshaped = tf.reshape(y_one_hot, shape=logits.get_shape())
# 3、求损失
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(
logits=logits, labels=y_reshaped
))
return loss
def build_optimizer(loss, learning_rate, grads_clip):
"""
构建优化器
:param loss:
:param learning_rate:
:param grads_clip: 梯度裁剪的阈值
:return:
"""
# 获取所有变量
vars_list = tf.trainable_variables()
grads_clipped, _ = tf.clip_by_global_norm(tf.gradients(loss, vars_list), grads_clip)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_opt = optimizer.apply_gradients(zip(grads_clipped, vars_list))
return train_opt
class CharRNN:
"""
构建模型图
"""
def __init__(self, num_classes, batch_size=64, n_steps=50, lstm_size=128, num_layers=2,
lr=1e-3, grads_clip=5, sampling=False):
# 后面用于预测的。
if sampling == True:
batch_size, n_steps = 1, 1
else:
batch_size, n_steps = batch_size, n_steps
tf.reset_default_graph() # 移除之前所有默认图
# 1、占位符
self.inputs, self.targets, self.keep_prob = build_inputs(batch_size, n_steps)
# 2、创建细胞核 cell
multi_cell, self.initial_state = build_lstm(lstm_size, num_layers, batch_size, self.keep_prob)
# 3、调用 动态rnn运行模型
# 将输入 ---> one-hot
x_one_hot = tf.one_hot(self.inputs, num_classes)
# 动态rnn
rnn_outputs, self.final_state = tf.nn.dynamic_rnn(
multi_cell, x_one_hot, initial_state=self.initial_state)
# rnn_outputs shape [N, n_steps, lstm_size]
# 获得logits 和预测值
self.prediction, self.logits = build_output(rnn_outputs, lstm_size, num_classes)
# 创建损失
self.loss = build_loss(self.logits, self.targets, num_classes)
# 创建优化器
self.train_opt = build_optimizer(self.loss, lr, grads_clip)
# 设置超参数
batch_size = 64
n_steps = 100 # 时间步,也就是序列长度
lstm_size = 128 # 隐藏层节点数量
num_layers = 2 # 隐藏层层数
learning_rate = 1e-3
keep_probab = 0.5
epochs = 20
print_every_n = 10
save_every_n = 300
model = CharRNN(
len(vocab), batch_size=batch_size, n_steps=n_steps, lstm_size=lstm_size,
num_layers=num_layers, lr=learning_rate, grads_clip=5)
def train():
saver = tf.train.Saver(max_to_keep=1)
# 构建持久化路径
checkpoint_dir = './models/anna_rnn'
if not os.path.exists(checkpoint_dir):
os.makedirs(checkpoint_dir)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
step = 1
for e in range(1, epochs):
# 将初始化状态值跑出来
new_state = sess.run(model.initial_state)
for x, y in get_batches(encoded, batch_size, n_steps):
feed = {model.inputs:x, model.targets: y, model.keep_prob: keep_probab,
model.initial_state: new_state}
# 执行训练
_, new_state = sess.run([model.train_opt, model.final_state], feed)
if step % print_every_n ==0:
loss_ = sess.run(model.loss, feed)
print('Epochs:{} - Step:{} - Train loss:{:.5f}'.format(e, step, loss_))
# 模型持久化
if step % save_every_n == 0:
files = 'model.ckpt'
save_files = os.path.join(checkpoint_dir, files)
saver.save(sess, save_path=save_files)
step += 1
def pick_top_n(preds, vocab_size, top_n=5):
# 取样,生成新小说。
p = np.squeeze(preds)
p[np.argsort(p)[:-top_n]] = 0
p = p / np.sum(p)
c = np.random.choice(vocab_size, 1, p=p)[0]
return c
def sample(checkpoint, n_samples, lstm_size, vocab_size, prime="The "):
samples = [c for c in prime]
model = CharRNN(len(vocab), lstm_size=lstm_size, sampling=True)
saver = tf.train.Saver()
with tf.Session() as sess:
saver.restore(sess, checkpoint)
new_state = sess.run(model.initial_state)
for c in prime:
x = np.zeros((1, 1))
x[0, 0] = vocab_to_int[c]
feed = {model.inputs: x,
model.keep_prob: 1.,
model.initial_state: new_state}
preds, new_state = sess.run([model.prediction, model.final_state],
feed_dict=feed)
c = pick_top_n(preds, len(vocab))
samples.append(int_to_vocab[c])
for i in range(n_samples):
x[0, 0] = c
feed = {model.inputs: x,
model.keep_prob: 1.,
model.initial_state: new_state}
preds, new_state = sess.run([model.prediction, model.final_state],
feed_dict=feed)
c = pick_top_n(preds, len(vocab))
samples.append(int_to_vocab[c])
return ''.join(samples)
def test():
print(tf.train.get_checkpoint_state('checkpoints'))
tf.train.latest_checkpoint('checkpoints') # 读取checkpoint传入网络中
print(tf.train.latest_checkpoint('checkpoints'))
checkpoint = tf.train.latest_checkpoint('checkpoints')
samp = sample(checkpoint, 2000, lstm_size, len(vocab), prime="Far")
print('训练800个batch效果是:')
print(samp)
beifeng = tf.train.get_checkpoint_state('checkpoints')
checkpoint = beifeng.all_model_checkpoint_paths[2]
samp = sample(checkpoint, 1000, lstm_size, len(vocab), prime="Far")
print('训练200个batch效果是:')
print(samp)
checkpoint = 'checkpoints\i2000_l64.ckpt'
samp = sample(checkpoint, 1000, lstm_size, len(vocab), prime="Far")
print('训练2000个batch效果是:')
print(samp)
checkpoint = 'checkpoints\i3960_l64.ckpt'
samp = sample(checkpoint, 1000, lstm_size, len(vocab), prime="Far")
print('训练3900个batch效果是:')
print(samp)
if __name__ == '__main__':
train()
charrnn
最新推荐文章于 2021-06-24 09:33:06 发布