charrnn


"""
单词字符级别预测RNN
"""
import time
from collections import namedtuple
import numpy as np
import tensorflow as tf
import os



with open('../datas/anna.txt', 'r') as f:
    text = f.read()
vocab = sorted(set(text))    # 取文本中唯一的字符,共83个。

vocab_to_int = {c: i for i, c in enumerate(vocab)}
int_to_vocab = dict(enumerate(vocab))
# todo-使用字典表vocab_to_int 将文本的字符转换为 相应的数字。
encoded = np.array([vocab_to_int[c] for c in text], dtype=np.int32)

print('所有的字符是:{}'.format(vocab))
print(text[:100])
print(len(text))
print(encoded[:100])
print(len(vocab))

# todo 制作训练 batches函数。每次调用该函数可以返回一个batch,多用generator
def get_batches(arr, batch_size, n_steps):
    """
    :param arr:  原始文本(已经转换为 tokenize)
    :param batch_size:  批量
    :param n_steps:   时间步
    :return:
    """
    # 1、每个批量字符的数量
    chars_per_batch = batch_size*n_steps
    n_batches = len(arr) // chars_per_batch

    arr = arr[:n_batches*chars_per_batch]
    # 2、重塑 [N, -1]
    arr = np.reshape(arr, newshape=[batch_size, -1])

    # 3、构造批量数据
    for i in range(0, arr.shape[1], n_steps):
        # 构造输入x
        x = arr[:, i:i+n_steps]
        # 目标y 是平移了一位
        y_temp = arr[:, i+1: i+n_steps+1]
        y = np.zeros(shape=x.shape, dtype=x.dtype)
        y[:, :y_temp.shape[1]] = y_temp  # y_temp可能少一列
        yield x, y


def test_batches_func():
    """
    验证上面的函数的
    :return:
    """
    batches = get_batches(encoded, 8, 10)
    x, y = next(batches)
    print(x)
    print('\n', '**' *40)
    print(y)

def build_inputs(batch_size, n_steps):
    """
    创建模型输入,占位符
    :param batch_size:
    :param n_steps:
    :return:
    """
    inputs = tf.placeholder(tf.int32, shape=[batch_size, n_steps], name='x')
    targets = tf.placeholder(tf.int32, shape=[batch_size, n_steps], name='y')
    keep_prob = tf.placeholder(tf.float32, shape=None, name='keep_prob')
    return inputs, targets, keep_prob


def build_lstm(lstm_size, num_layers, batch_size, keep_prob):
    """
    创建rnn 隐藏层细胞核(cell)
    :param lstm_size:  隐藏层节点数量(神经元个数)
    :param num_layers:  隐藏层层数
    :param batch_size:  批量
    :param keep_prob:   dropout
    :return:
    """
    def build_cell(lstm_size, keep_prob):
        # 1、实例化一个 lstm cell
        cell = tf.nn.rnn_cell.BasicLSTMCell(num_units=lstm_size)
        # 2、添加dropput
        drop = tf.nn.rnn_cell.DropoutWrapper(cell, output_keep_prob=keep_prob)
        return drop
    # 堆栈多层rnn隐藏层
    # cell1 = build_cell(lstm_size, keep_prob)
    # cell2 = build_cell(lstm_size, keep_prob)
    # multi_cell = tf.nn.rnn_cell.MultiRNNCell([cell1, cell2])

    multi_cell = tf.nn.rnn_cell.MultiRNNCell(
        [build_cell(lstm_size, keep_prob) for _ in range(num_layers)]
    )
    # 初始化状态值。
    initial_state = multi_cell.zero_state(batch_size, dtype=tf.float32)
    return multi_cell, initial_state

def build_output(rnn_output, lstm_size, output_size):
    """
    将隐藏层的输出 做全连接,得到logits。
    :param rnn_output:  隐藏层的输出 3-D tensor [N, n_steps, lstm_size]
    :param lstm_size:   隐藏层节点数量
    :param output_size:  logits的维度(output_size == num_classes == vocab_size)
    :return:
    """
    # 对隐藏层的输出 进行重塑[N, n_steps, lstm_size] ---> [N*n_steps, lstm_size]
    x = tf.reshape(rnn_output, shape=[-1, lstm_size])

    # 构建全连接的变量
    with tf.variable_scope('logits'):
        softmax_w = tf.get_variable(
            'w', shape=[lstm_size, output_size], dtype=tf.float32,
            initializer=tf.truncated_normal_initializer(stddev=0.1)
        )
        softmax_b = tf.get_variable(
            'b', shape=[output_size], dtype=tf.float32,
            initializer=tf.zeros_initializer()
        )
    logits = tf.nn.xw_plus_b(x, softmax_w, softmax_b)

    # 使用softmax激活得到预测值
    predictions = tf.nn.softmax(logits)
    return predictions, logits

def build_loss(logits, labels, num_classes):
    """
    创建模型损失
    :param logits:   shape 2-D [N*n_steps, num_classes]
    :param labels:
    :param num_classes:  类别数量,即 vocab_size
    :return:
    """
    # 1、对标签做one-hot
    y_one_hot = tf.one_hot(indices=labels, depth=num_classes)
    # [N, n_steps, num_classes]

    # 2、reshape
    y_reshaped = tf.reshape(y_one_hot, shape=logits.get_shape())

    # 3、求损失
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(
        logits=logits, labels=y_reshaped
    ))
    return loss

def build_optimizer(loss, learning_rate, grads_clip):
    """
    构建优化器
    :param loss:
    :param learning_rate:
    :param grads_clip:  梯度裁剪的阈值
    :return:
    """
    # 获取所有变量
    vars_list = tf.trainable_variables()

    grads_clipped, _ = tf.clip_by_global_norm(tf.gradients(loss, vars_list), grads_clip)
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
    train_opt = optimizer.apply_gradients(zip(grads_clipped, vars_list))
    return train_opt


class CharRNN:
    """
    构建模型图
    """
    def __init__(self, num_classes, batch_size=64, n_steps=50, lstm_size=128, num_layers=2,
                 lr=1e-3, grads_clip=5, sampling=False):
        # 后面用于预测的。
        if sampling == True:
            batch_size, n_steps = 1, 1
        else:
            batch_size, n_steps = batch_size, n_steps

        tf.reset_default_graph()  # 移除之前所有默认图
        # 1、占位符
        self.inputs, self.targets, self.keep_prob = build_inputs(batch_size, n_steps)
        # 2、创建细胞核 cell
        multi_cell, self.initial_state = build_lstm(lstm_size, num_layers, batch_size, self.keep_prob)

        # 3、调用  动态rnn运行模型
        # 将输入 ---> one-hot
        x_one_hot = tf.one_hot(self.inputs, num_classes)

        # 动态rnn
        rnn_outputs, self.final_state = tf.nn.dynamic_rnn(
            multi_cell, x_one_hot, initial_state=self.initial_state)
        # rnn_outputs shape [N, n_steps, lstm_size]

        # 获得logits 和预测值
        self.prediction, self.logits = build_output(rnn_outputs, lstm_size, num_classes)

        # 创建损失
        self.loss = build_loss(self.logits, self.targets, num_classes)
        # 创建优化器
        self.train_opt = build_optimizer(self.loss, lr, grads_clip)

# 设置超参数
batch_size = 64
n_steps = 100  # 时间步,也就是序列长度
lstm_size = 128  # 隐藏层节点数量
num_layers = 2  # 隐藏层层数
learning_rate = 1e-3
keep_probab = 0.5
epochs = 20
print_every_n = 10
save_every_n = 300

model = CharRNN(
    len(vocab), batch_size=batch_size, n_steps=n_steps, lstm_size=lstm_size,
    num_layers=num_layers, lr=learning_rate, grads_clip=5)


def train():
    saver = tf.train.Saver(max_to_keep=1)
    # 构建持久化路径
    checkpoint_dir = './models/anna_rnn'
    if not os.path.exists(checkpoint_dir):
        os.makedirs(checkpoint_dir)

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        step = 1
        for e in range(1, epochs):
            # 将初始化状态值跑出来
            new_state = sess.run(model.initial_state)
            for x, y in get_batches(encoded, batch_size, n_steps):
                feed = {model.inputs:x, model.targets: y, model.keep_prob: keep_probab,
                        model.initial_state: new_state}
                # 执行训练
                _, new_state = sess.run([model.train_opt, model.final_state], feed)

                if step % print_every_n ==0:
                    loss_ = sess.run(model.loss, feed)
                    print('Epochs:{} - Step:{} - Train loss:{:.5f}'.format(e, step, loss_))
                # 模型持久化
                if step % save_every_n == 0:
                    files = 'model.ckpt'
                    save_files = os.path.join(checkpoint_dir, files)
                    saver.save(sess, save_path=save_files)
                step += 1


def pick_top_n(preds, vocab_size, top_n=5):
    # 取样,生成新小说。
    p = np.squeeze(preds)
    p[np.argsort(p)[:-top_n]] = 0
    p = p / np.sum(p)
    c = np.random.choice(vocab_size, 1, p=p)[0]
    return c


def sample(checkpoint, n_samples, lstm_size, vocab_size, prime="The "):
    samples = [c for c in prime]
    model = CharRNN(len(vocab), lstm_size=lstm_size, sampling=True)
    saver = tf.train.Saver()
    with tf.Session() as sess:
        saver.restore(sess, checkpoint)
        new_state = sess.run(model.initial_state)
        for c in prime:
            x = np.zeros((1, 1))
            x[0, 0] = vocab_to_int[c]
            feed = {model.inputs: x,
                    model.keep_prob: 1.,
                    model.initial_state: new_state}
            preds, new_state = sess.run([model.prediction, model.final_state],
                                        feed_dict=feed)

        c = pick_top_n(preds, len(vocab))
        samples.append(int_to_vocab[c])

        for i in range(n_samples):
            x[0, 0] = c
            feed = {model.inputs: x,
                    model.keep_prob: 1.,
                    model.initial_state: new_state}
            preds, new_state = sess.run([model.prediction, model.final_state],
                                        feed_dict=feed)

            c = pick_top_n(preds, len(vocab))
            samples.append(int_to_vocab[c])

    return ''.join(samples)


def test():
    print(tf.train.get_checkpoint_state('checkpoints'))

    tf.train.latest_checkpoint('checkpoints')      # 读取checkpoint传入网络中
    print(tf.train.latest_checkpoint('checkpoints'))
    checkpoint = tf.train.latest_checkpoint('checkpoints')
    samp = sample(checkpoint, 2000, lstm_size, len(vocab), prime="Far")
    print('训练800个batch效果是:')
    print(samp)

    beifeng = tf.train.get_checkpoint_state('checkpoints')

    checkpoint = beifeng.all_model_checkpoint_paths[2]
    samp = sample(checkpoint, 1000, lstm_size, len(vocab), prime="Far")
    print('训练200个batch效果是:')
    print(samp)

    checkpoint = 'checkpoints\i2000_l64.ckpt'
    samp = sample(checkpoint, 1000, lstm_size, len(vocab), prime="Far")
    print('训练2000个batch效果是:')
    print(samp)

    checkpoint = 'checkpoints\i3960_l64.ckpt'
    samp = sample(checkpoint, 1000, lstm_size, len(vocab), prime="Far")
    print('训练3900个batch效果是:')
    print(samp)

if __name__ == '__main__':
    train()
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
基于RNN(循环神经网络)的项目代码可以用于各种自然语言处理任务,如语音识别、机器翻译、文本生成等。以下是一个简单的RNN项目代码框架: 首先,我们需要导入所需的库和模块,如tensorflow、numpy等。 ``` import tensorflow as tf import numpy as np ``` 接下来,我们可以定义一些超参数,如学习率、训练轮数、批次大小等。 ``` learning_rate = 0.001 training_epochs = 100 batch_size = 128 ``` 然后,我们可以准备数据集,将数据进行预处理,并将其转换为适合RNN输入的格式。例如,对于文本生成任务,我们可以将每个字符编码为一个整数,并将其转换为数字矩阵。 ``` # 准备数据集并进行预处理 data = 'hello world' char_to_int = {ch: i for i, ch in enumerate(sorted(set(data)))} int_to_char = {i: ch for i, ch in enumerate(sorted(set(data)))} n_chars = len(data) n_vocab = len(set(data)) # 将数据转换为RNN输入格式 X = np.zeros((n_chars, n_vocab)) Y = np.zeros((n_chars, n_vocab)) for i, char in enumerate(data): X[i, char_to_int[char]] = 1 if i < n_chars - 1: Y[i, char_to_int[data[i+1]]] = 1 ``` 然后,我们可以定义RNN模型的结构,如RNN单元的数量、隐藏层大小等。 ``` # 定义RNN模型 n_units = 128 n_layers = 3 # 输入层 inputs = tf.keras.Input(shape=(n_vocab,)) x = tf.keras.layers.Embedding(n_vocab, n_units)(inputs) # RNN单元 for _ in range(n_layers): x = tf.keras.layers.LSTM(n_units, return_sequences=True)(x) # 输出层 outputs = tf.keras.layers.Dense(n_vocab, activation='softmax')(x) # 构建模型 model = tf.keras.Model(inputs=inputs, outputs=outputs) ``` 最后,我们可以定义损失函数、优化器和训练过程。 ``` # 定义损失函数、优化器和训练过程 loss_fn = tf.keras.losses.CategoricalCrossentropy() optimizer = tf.keras.optimizers.Adam(lr=learning_rate) model.compile(loss=loss_fn, optimizer=optimizer) # 开始训练 model.fit(X, Y, batch_size=batch_size, epochs=training_epochs) ``` 以上是一个简单的基于RNN神经网络的项目代码框架,可以根据不同的任务和需求进行调整和修改。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值