双向rnn英小短文

yongquanfengjie

于 2020-06-09 00:40:14 发布

阅读量94

点赞数

本文链接：https://blog.csdn.net/yongquanfengjie/article/details/106632499

版权



import os
import numpy as np
import tensorflow as tf

def load_data(file_path):
    """
    加载原始数据
    :param file_path:
    :return:
    """
    with open(file_path, 'r') as reader:
        data = reader.readlines()
    return data

def create_lookup_table(text):
    """
    构建字典表  {单词：数字} {数字：单词}
    :param text:
    :return:
    """
    words = sorted(list(set(text)))
    # 构建字典
    word2int = {word:idx for idx,word in enumerate(words)}
    int2word = dict(enumerate(words))
    return word2int, int2word

def create_X_and_Y(data, word2int, number_time_steps=3):
    """
    基于原始数据，构建训练数据集的 X和Y
    :param data:
    :param word2int:
    :param number_time_steps:
    :return:
    """
    X, Y = [], []
    for content in data:
        # 得到当前文本对应的单词序列。  strip()去除前后空格
        words = content.strip().split(' ')
        # 获得单词总数量
        words_number = len(words)

        offset = 0
        while offset < words_number - number_time_steps:
            temp_x = words[offset: offset+number_time_steps]
            temp_y = words[offset+number_time_steps]
            X.append([word2int[tx] for tx in temp_x])
            Y.append(word2int[temp_y])
            offset +=1
    # 将列表转为numpy ndarray
    X = np.asarray(X).reshape([-1, number_time_steps])
    Y = np.asarray(Y).reshape(-1)
    return X, Y




def create_model(vocab_size, num_units=32, number_time_steps=3):
    """
    :param vocab_size:   词表大小
    :param num_units:     隐藏层的节点数量（神经元个数）
    :param number_time_steps:  时间步
    :return:
    """
    with tf.variable_scope('Network', initializer=tf.truncated_normal_initializer(stddev=0.1)):
        with tf.variable_scope('input'):
            # 输入数据的形状
            """
            x:
              [[2, 3, 4],
              [7, 8, 9]]
            y:
               [[5],
                [10]],
            """
            _x = tf.placeholder(tf.int32, shape=[None, number_time_steps], name='x')
            _y = tf.placeholder(tf.int32, shape=[None], name='y')
            _x = tf.cast(_x, tf.float32)

            # 需要将原始的输入_x 按照时间步进行分割,变成列表。
            # todo 用的真实的值，但实际项目中 应该 用one-hot或者embedding。
            input_x = tf.split(_x, num_or_size_splits=number_time_steps, axis=1)
            # [[N, 1], [N,1], ......]
        with tf.variable_scope('rnn'):
            # a、定义cell
            cell_fw = tf.nn.rnn_cell.BasicLSTMCell(num_units=num_units)
            cell_bw = tf.nn.rnn_cell.BasicLSTMCell(num_units=num_units)

            # b、调用双向静态rnn 获取隐藏层输出结果
            rnn_outputs, _, _ = tf.nn.static_bidirectional_rnn(
                cell_fw=cell_fw, cell_bw=cell_bw, inputs=input_x, dtype=tf.float32
            )
            # rnn_outputs: [[N, 2*lstm_size], [N, 2*lstm_size], ....]

        with tf.variable_scope('logits'):
            # a、获取隐藏层最后一个时刻的输出
            rnn_output = rnn_outputs[-1]

            # b、构建输出层变量
            softmax_w = tf.get_variable(
                'w', shape=[2*num_units, vocab_size], dtype=tf.float32
            )
            softmax_b = tf.get_variable(
                'b', shape=[vocab_size], dtype=tf.float32, initializer=tf.zeros_initializer()
            )
            logits = tf.nn.xw_plus_b(rnn_output, softmax_w, softmax_b)

        with tf.variable_scope('Predict'):
            predictions = tf.argmax(logits, axis=1)
    return _x, _y, logits, predictions

def create_loss(logits, labels):
    """
    创建损失
    :param logits:
    :param labels:
    :return:
    """
    with tf.name_scope('loss'):
        # a\将标签转换为1维的形式
        labels = tf.reshape(labels, shape=[-1])
        loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logits, labels=labels
        ))
        return loss


def create_optimizer(loss, lr=1e-3):
    """
    构建优化器
    :param loss:
    :param lr:
    :return:
    """
    with tf.name_scope('optimizer'):
        optimizer = tf.train.AdamOptimizer(learning_rate=lr)
        train_opt = optimizer.minimize(loss)
    return train_opt

def train(checkpoint_dir, max_steps=10000, batch_size=64, num_units=32, number_time_steps=10):
    graph = tf.Graph()
    with graph.as_default():
        # 加载数据
        data = load_data(file_path='../datas/belling_the_cat.txt')

        text = []
        for line in data:
            line = line.strip()
            for word in line.split(' '):
                text.append(word)

        word2int, int2word = create_lookup_table(text)
        x, y = create_X_and_Y(data, word2int, number_time_steps=number_time_steps)
        # print(word2int, '\n', int2word)

        # 1、构建网络
        _x, _y, logits, predictions = create_model(
            len(word2int), num_units=num_units, number_time_steps=number_time_steps)

        # 2、模型损失
        loss = create_loss(logits, _y)
        # 3、优化器
        train_opt = create_optimizer(loss)

        saver = tf.train.Saver()

    with tf.Session(graph=graph) as sess:
        sess.run(tf.global_variables_initializer())

        # 构建迭代数据
        total_samples = x.shape[0]
        n_batches = total_samples // batch_size
        time = 0
        # 返回一个随机打乱下标的  array， 功能就是shuffle
        random_index = np.random.permutation(total_samples)

        for step in range(1, max_steps):
            # 获取当前批量的训练数据
            start_idx = time * batch_size
            end_idx = start_idx + batch_size
            idx = random_index[start_idx: end_idx]
            train_x = x[idx]
            train_y = y[idx]

            # 构建输入数据对象
            feed = {_x: train_x, _y: train_y}

            sess.run(train_opt, feed)
            if step % 200==0:
                train_loss = sess.run(loss, feed)
                print('step:{} - Train loss:{}'.format(step, train_loss))
                # 做一个预测的
                index = np.random.randint(low=0, high=total_samples)
                sample_in = np.reshape(x[index], newshape=[-1, number_time_steps])
                sample_out = sess.run(predictions, feed_dict={_x: sample_in})
                print('输入:{} - 预测:{} VS 真实值:{}'.format(
                    x[index], int2word[sample_out[0]], int2word[y[index]]))
            if step % 1000 == 0:
                # 模型持久化
                files = 'model.ckpt'
                save_files = os.path.join(checkpoint_dir, files)
                saver.save(sess, save_path=save_files, global_step=step)
                print('model saved!!')

            # 更新样本顺序的
            time += 1
            if time == n_batches:
                time =0
                random_index = np.random.permutation(total_samples)


if __name__ == '__main__':
    checkpoint_dir = './models'
    if not os.path.exists(checkpoint_dir):
        os.makedirs(checkpoint_dir)
    train(
        checkpoint_dir, max_steps=10000, batch_size=64, num_units=32, number_time_steps=10
    )

yongquanfengjie

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
双向rnn英小短文

import osimport numpy as npimport tensorflow as tfdef load_data(file_path): """ 加载原始数据 :param file_path: :return: """ with open(file_path, 'r') as reader: data = reader.readlines() return datadef create_lookup_ta..
复制链接

扫一扫