tensorflow2.0笔记--情感分类实战--RNN-LSTM-GRU-详细备注

最新推荐文章于 2024-02-16 23:07:23 发布

weixin_44140703

最新推荐文章于 2024-02-16 23:07:23 发布

阅读量1k

点赞数 4

分类专栏：学习总结文章标签： tensorflow 深度学习自然语言处理神经网络

本文链接：https://blog.csdn.net/weixin_44140703/article/details/103636569

版权

学习总结专栏收录该内容

20 篇文章 0 订阅

订阅专栏

RNN Cell

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = "2"

import tensorflow as tf
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers


tf.random.set_seed(22)
np.random.seed(22)
# 判断 tf 的版本是否是以 ‘2.’开头 如果是，则返回True，否则返回 False  assert用于测试<表达式> 的值，如果 <表达式> 的值为 True ，正常通过,如果 <表达式> 的值为 False ，则报错 ‘AssertionError:’
assert tf.__version__.startswith('2.')

# # 获取所有的物理GPU CPU
# physical_devices = tf.config.experimental.list_physical_devices(device_type='GPU')
# physical_devices_cpu = tf.config.experimental.list_physical_devices(device_type='CPU')
# # print(physical_devices, physical_devices_cpu)
# assert len(physical_devices) > 0
# # 通过调用tf.config.experimental.set_memory_growth来打开内存增长，在需要时申请显存空间（程序初始运行时消耗很少的显存，随着程序的运行而动态申请显存）
# tf.config.experimental.set_memory_growth(physical_devices[0], True)
# 下面的方式是设置Tensorflow固定消耗GPU:0的2GB显存。
# tf.config.experimental.set_virtual_device_configuration(
#     physical_devices[0],
#     [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=2048)]
# )

batchsz = 128

# 出现频率最高的词 取前10000个
total_words = 10000
max_review_len = 80
embedding_len = 100
# 只为常见的10000个单词进行编码，超出这10000个的，都标记为1个单词
(x_train, y_train), (x_test, y_test) = keras.datasets.imdb.load_data(num_words=total_words)

# padding 处理之后，长句子会截断，短句子会以0填充
# x_train: [b, 80] padding后的数据形状
# x_test:[b, 80]
x_train = keras.preprocessing.sequence.pad_sequences(x_train, maxlen=max_review_len)
x_test = keras.preprocessing.sequence.pad_sequences(x_test, maxlen=max_review_len)


# 构建数据集
db_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
# drop_remainder=True 最后的橘子数量小于batchsz ，则 忽略掉,即把不够一个batch的数据去掉
db_train = db_train.shuffle(10000).batch(batchsz, drop_remainder=True)
db_test = tf.data.Dataset.from_tensor_slices((x_test, y_test))
db_test = db_test.batch(batchsz, drop_remainder=True)
print('x_train_shape:', x_train.shape, tf.reduce_max(y_train), tf.reduce_min(y_train))
print('x_test_shape:', x_test.shape)

sample = next(iter(db_test))
print(sample[0].shape)


# 构建网络结构 新建一个类，MyRNN，继承自keras.Model
class MyRNN(keras.Model):

    # 实现2个基本方法：__init__ 和 call
    def __init__(self, units):
        super(MyRNN, self).__init__()

        # [b, 64]  因为state形状和输出形状是一致的，所以其形状为[batchsz, units]
        self.state0 = [tf.zeros([batchsz, units])]
        self.state1 = [tf.zeros([batchsz, units])]
        # 第一个层是embedding层 作用是把文本变化成数值类型 transform text to embedding representation第一个参数是接收的单词数量。第二个是每个单词的embedding维度, 第三个参数是句子长度，即句子一共多少个单词
        # [b, 80] => [b, 80, 100]
        self.embedding = layers.Embedding(total_words, embedding_len, input_length=max_review_len)

        # 构建RNNcell 负责把句子的单词数量在时间轴上展开
        # [b, 80, 100] => [b, 64] 这里要把100 这个维度的向量进行转换，转换为h_dim:units.目的是做信息的提取
        # RNN: cell1 cell2 cell3
        self.RNNCell0 = layers.SimpleRNNCell(units, dropout=0.2)
        self.RNNCell1 = layers.SimpleRNNCell(units, dropout=0.2)

        # fc: [b, 64] => [b, 1]
        self.outlayer = layers.Dense(1)


    # 完成前向运算的过程
    def call(self, inputs, training=None):

        """
        net(x) net(x, training=True)  ：这两种写法是训练模式
        net(x, training=False) : 这种是测试模式
        :param inputs: [b, 80]
        :param training: 当前运算是training过程还是test过程. dropout 在训练时起效，测试时不起效
        :return:
        """
        # [b, 80]
        x = inputs
        # [b, 80] => [b, 80, 100]
        x = self.embedding(x)
        # rnn cell compute
        # [b, 80, 100] => [b, 64]   tf.unstack(x, axis=1) 在axis=1的维度展开
        state0 = self.state0  # 初始状态，全为0
        state1 = self.state1
        for word in tf.unstack(x, axis=1):    # word: [b, 100]

            # h1 = x*wxh + h0*whh  输入word 和 state0 得到新的state1 和输出 out. state1代表h1
            out0, state0 = self.RNNCell0(word, state0, training)   # 加上training参数，即在训练和测试时做一个区分，训练时和测试时逻辑不一样（dropout）
            # state0 = state1 # 把state1的值赋给state0 这样下一次循环，即会使用state1的值了
            out1, state1 = self.RNNCell1(out0, state1, training)
        # 最后得到的out :[b, 64] 是语意信息的聚合
        x = self.outlayer(out1)
        prob = tf.sigmoid(x)

        return prob

def main():

    units = 64
    epochs = 4

    model = MyRNN(units)
    # experimental_run_tf_function=False 如果不设置这个，在tf2.0正式版本中会报错
    # 快捷训练方法：compile中 设置1.optimizer 2.loss 3.指定测试指标 例如metrics   fit中设置 1. 训练数据集 2. 数据集训练次数 epochs 3. validation_data ：测试集 4. validation_freq ：每多少个epoch做一次测试
    model.compile(optimizer=keras.optimizers.Adam(0.001),
                  loss=tf.losses.BinaryCrossentropy(),
                  metrics=['accuracy'],
                  experimental_run_tf_function=False)

    model.fit(db_train, epochs=epochs, validation_data=db_test, validation_freq=2)

    # model.evaluate(db_test)



if __name__ == '__main__':
    main()

RNN layer

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = "2"
import time

import tensorflow as tf
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers


tf.random.set_seed(22)
np.random.seed(22)
# 判断 tf 的版本是否是以 ‘2.’开头 如果是，则返回True，否则返回 False  assert用于测试<表达式> 的值，如果 <表达式> 的值为 True ，正常通过,如果 <表达式> 的值为 False ，则报错 ‘AssertionError:’
assert tf.__version__.startswith('2.')

# # 获取所有的物理GPU CPU
# physical_devices = tf.config.experimental.list_physical_devices(device_type='GPU')
# physical_devices_cpu = tf.config.experimental.list_physical_devices(device_type='CPU')
# # print(physical_devices, physical_devices_cpu)
# assert len(physical_devices) > 0
# # 通过调用tf.config.experimental.set_memory_growth来打开内存增长，在需要时申请显存空间（程序初始运行时消耗很少的显存，随着程序的运行而动态申请显存）
# tf.config.experimental.set_memory_growth(physical_devices[0], True)
# 下面的方式是设置Tensorflow固定消耗GPU:0的2GB显存。
# tf.config.experimental.set_virtual_device_configuration(
#     physical_devices[0],
#     [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=2048)]
# )

batchsz = 128

# 出现频率最高的词 取前10000个
total_words = 10000
max_review_len = 80
embedding_len = 100
# 只为常见的10000个单词进行编码，超出这10000个的，都标记为1个单词
(x_train, y_train), (x_test, y_test) = keras.datasets.imdb.load_data(num_words=total_words)

# padding 处理之后，长句子会截断，短句子会以0填充
# x_train: [b, 80] padding后的数据形状
# x_test:[b, 80]
x_train = keras.preprocessing.sequence.pad_sequences(x_train, maxlen=max_review_len)
x_test = keras.preprocessing.sequence.pad_sequences(x_test, maxlen=max_review_len)


# 构建数据集
db_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
# drop_remainder=True 最后的橘子数量小于batchsz ，则 忽略掉,即把不够一个batch的数据去掉
db_train = db_train.shuffle(10000).batch(batchsz, drop_remainder=True)
db_test = tf.data.Dataset.from_tensor_slices((x_test, y_test))
db_test = db_test.batch(batchsz, drop_remainder=True)
print('x_train_shape:', x_train.shape, tf.reduce_max(y_train), tf.reduce_min(y_train))
print('x_test_shape:', x_test.shape)


# 构建网络结构 新建一个类，MyRNN，继承自keras.Model
class MyRNN(keras.Model):

    # 实现2个基本方法：__init__ 和 call
    def __init__(self, units):
        super(MyRNN, self).__init__()

        # 第一个层是embedding层 作用是把文本变化成数值类型 transform text to embedding representation第一个参数是接收的单词数量。第二个是每个单词的embedding维度, 第三个参数是句子长度，即句子一共多少个单词
        # [b, 80] => [b, 80, 100]
        self.embedding = layers.Embedding(total_words, embedding_len, input_length=max_review_len)

        # 构建RNNcell 负责把句子的单词数量在时间轴上展开
        # [b, 80, 100] => [b, 64] 这里要把100 这个维度的向量进行转换，转换为h_dim:units.目的是做信息的提取
        # RNN: cell1 cell2 cell3
        # SimpleRNN
        # unroll = True 可以加快RNN
        # return_sequences：控制hidden_state,True 输出全部，False输出最后一个
        self.rnn = tf.keras.Sequential([
            tf.keras.layers.SimpleRNN(units, dropout=0.5, return_sequences=True),
            tf.keras.layers.SimpleRNN(units, dropout=0.5)
        ])

        # fc: [b, 64] => [b, 1]
        self.outlayer = layers.Dense(1)


    # 完成前向运算的过程
    def call(self, inputs, training=None):

        """
        net(x) net(x, training=True)  ：这两种写法是训练模式
        net(x, training=False) : 这种是测试模式
        :param inputs: [b, 80]
        :param training: 当前运算是training过程还是test过程. dropout 在训练时起效，测试时不起效
        :return:
        """
        # [b, 80]
        x = inputs
        # [b, 80] => [b, 80, 100]
        x = self.embedding(x)
        # rnn cell compute
        # [b, 80, 100] => [b, 64]   tf.unstack(x, axis=1) 在axis=1的维度展开
        x = self.rnn(x)
        # 最后得到的out :[b, 64] 是语意信息的聚合
        x = self.outlayer(x)
        prob = tf.sigmoid(x)

        return prob

def main():

    units = 64
    epochs = 4
    t0 = time.time()
    model = MyRNN(units)
    # 快捷训练方法：compile中 设置1.optimizer 2.loss 3.指定测试指标 例如metrics   fit中设置 1. 训练数据集 2. 数据集训练次数 epochs 3. validation_data ：测试集 4. validation_freq ：每多少个epoch做一次测试
    model.compile(optimizer=keras.optimizers.Adam(0.001),
                  loss=tf.losses.BinaryCrossentropy(),
                  metrics=['accuracy'])
    model.fit(db_train, epochs=epochs, validation_data=db_test, validation_freq=2)

    # 最后做一次评估
    model.evaluate(db_test)
    t1 = time.time()
    print('total_time_cost：', t1-t0)


if __name__ == '__main__':
    main()

LSTM-Cell

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = "2"
import time

import tensorflow as tf
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers


tf.random.set_seed(22)
np.random.seed(22)
# 判断 tf 的版本是否是以 ‘2.’开头 如果是，则返回True，否则返回 False  assert用于测试<表达式> 的值，如果 <表达式> 的值为 True ，正常通过,如果 <表达式> 的值为 False ，则报错 ‘AssertionError:’
assert tf.__version__.startswith('2.')

# # 获取所有的物理GPU CPU
# physical_devices = tf.config.experimental.list_physical_devices(device_type='GPU')
# physical_devices_cpu = tf.config.experimental.list_physical_devices(device_type='CPU')
# # print(physical_devices, physical_devices_cpu)
# assert len(physical_devices) > 0
# # 通过调用tf.config.experimental.set_memory_growth来打开内存增长，在需要时申请显存空间（程序初始运行时消耗很少的显存，随着程序的运行而动态申请显存）
# tf.config.experimental.set_memory_growth(physical_devices[0], True)
# 下面的方式是设置Tensorflow固定消耗GPU:0的2GB显存。
# tf.config.experimental.set_virtual_device_configuration(
#     physical_devices[0],
#     [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=2048)]
# )

batchsz = 128

# 出现频率最高的词 取前10000个
total_words = 10000
max_review_len = 80
embedding_len = 100
# 只为常见的10000个单词进行编码，超出这10000个的，都标记为1个单词
(x_train, y_train), (x_test, y_test) = keras.datasets.imdb.load_data(num_words=total_words)

# padding 处理之后，长句子会截断，短句子会以0填充
# x_train: [b, 80] padding后的数据形状
# x_test:[b, 80]
x_train = keras.preprocessing.sequence.pad_sequences(x_train, maxlen=max_review_len)
x_test = keras.preprocessing.sequence.pad_sequences(x_test, maxlen=max_review_len)


# 构建数据集
db_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
# drop_remainder=True 最后的橘子数量小于batchsz ，则 忽略掉,即把不够一个batch的数据去掉
db_train = db_train.shuffle(10000).batch(batchsz, drop_remainder=True)
db_test = tf.data.Dataset.from_tensor_slices((x_test, y_test))
db_test = db_test.batch(batchsz, drop_remainder=True)
print('x_train_shape:', x_train.shape, tf.reduce_max(y_train), tf.reduce_min(y_train))
print('x_test_shape:', x_test.shape)

sample = next(iter(db_test))
print(sample[0].shape)


# 构建网络结构 新建一个类，MyRNN，继承自keras.Model
class MyRNN(keras.Model):

    # 实现2个基本方法：__init__ 和 call
    def __init__(self, units):
        super(MyRNN, self).__init__()

        #  LSTM中 state 需要有2个tensor 分别代表 c 和 h
        self.state0 = [tf.zeros([batchsz, units]), tf.zeros([batchsz, units])]
        self.state1 = [tf.zeros([batchsz, units]), tf.zeros([batchsz, units])]
        # 第一个层是embedding层 作用是把文本变化成数值类型 transform text to embedding representation第一个参数是接收的单词数量。第二个是每个单词的embedding维度, 第三个参数是句子长度，即句子一共多少个单词
        # [b, 80] => [b, 80, 100]
        self.embedding = layers.Embedding(total_words, embedding_len, input_length=max_review_len)

        # 构建RNNcell 负责把句子的单词数量在时间轴上展开
        # [b, 80, 100] => [b, 64] 这里要把100 这个维度的向量进行转换，转换为h_dim:units.目的是做信息的提取
        # RNN: cell1 cell2 cell3
        # self.RNNCell0 = layers.SimpleRNNCell(units, dropout=0.2)
        # self.RNNCell1 = layers.SimpleRNNCell(units, dropout=0.2)
        self.RNNCell0 = layers.LSTMCell(units, dropout=0.5)
        self.RNNCell1 = layers.LSTMCell(units, dropout=0.5)

        # fc: [b, 64] => [b, 1]
        self.outlayer = layers.Dense(1)


    # 完成前向运算的过程
    def call(self, inputs, training=None):

        """
        net(x) net(x, training=True)  ：这两种写法是训练模式
        net(x, training=False) : 这种是测试模式
        :param inputs: [b, 80]
        :param training: 当前运算是training过程还是test过程. dropout 在训练时起效，测试时不起效
        :return:
        """
        # [b, 80]
        x = inputs
        # [b, 80] => [b, 80, 100]
        x = self.embedding(x)
        # rnn cell compute
        # [b, 80, 100] => [b, 64]   tf.unstack(x, axis=1) 在axis=1的维度展开
        state0 = self.state0  # 初始状态，全为0
        state1 = self.state1
        for word in tf.unstack(x, axis=1):    # word: [b, 100]

            # h1 = x*wxh + h0*whh  输入word 和 state0 得到新的state1 和输出 out. state1代表h1
            out0, state0 = self.RNNCell0(word, state0, training)   # 加上training参数，即在训练和测试时做一个区分，训练时和测试时逻辑不一样（dropout）
            # state0 = state1 # 把state1的值赋给state0 这样下一次循环，即会使用state1的值了
            out1, state1 = self.RNNCell1(out0, state1, training)
        # 最后得到的out :[b, 64] 是语意信息的聚合
        x = self.outlayer(out1)
        prob = tf.sigmoid(x)

        return prob

def main():

    units = 64
    epochs = 4

    t0 = time.time()
    model = MyRNN(units)
    # experimental_run_tf_function=False 如果不设置这个，在tf2.0正式版本中会报错
    # 快捷训练方法：compile中 设置1.optimizer 2.loss 3.指定测试指标 例如metrics   fit中设置 1. 训练数据集 2. 数据集训练次数 epochs 3. validation_data ：测试集 4. validation_freq ：每多少个epoch做一次测试
    model.compile(optimizer=keras.optimizers.Adam(0.001),
                  loss=tf.losses.BinaryCrossentropy(),
                  metrics=['accuracy'],
                  experimental_run_tf_function=False)

    model.fit(db_train, epochs=epochs, validation_data=db_test, validation_freq=2)

    model.evaluate(db_test)
    t1 = time.time()
    print('time:', t1-t0)


if __name__ == '__main__':
    main()

LSTM-layer

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = "2"
import time

import tensorflow as tf
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers


tf.random.set_seed(22)
np.random.seed(22)
# 判断 tf 的版本是否是以 ‘2.’开头 如果是，则返回True，否则返回 False  assert用于测试<表达式> 的值，如果 <表达式> 的值为 True ，正常通过,如果 <表达式> 的值为 False ，则报错 ‘AssertionError:’
assert tf.__version__.startswith('2.')

# # 获取所有的物理GPU CPU
# physical_devices = tf.config.experimental.list_physical_devices(device_type='GPU')
# physical_devices_cpu = tf.config.experimental.list_physical_devices(device_type='CPU')
# # print(physical_devices, physical_devices_cpu)
# assert len(physical_devices) > 0
# # 通过调用tf.config.experimental.set_memory_growth来打开内存增长，在需要时申请显存空间（程序初始运行时消耗很少的显存，随着程序的运行而动态申请显存）
# tf.config.experimental.set_memory_growth(physical_devices[0], True)
# 下面的方式是设置Tensorflow固定消耗GPU:0的2GB显存。
# tf.config.experimental.set_virtual_device_configuration(
#     physical_devices[0],
#     [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=2048)]
# )

batchsz = 128

# 出现频率最高的词 取前10000个
total_words = 10000
max_review_len = 80
embedding_len = 100
# 只为常见的10000个单词进行编码，超出这10000个的，都标记为1个单词
(x_train, y_train), (x_test, y_test) = keras.datasets.imdb.load_data(num_words=total_words)

# padding 处理之后，长句子会截断，短句子会以0填充
# x_train: [b, 80] padding后的数据形状
# x_test:[b, 80]
x_train = keras.preprocessing.sequence.pad_sequences(x_train, maxlen=max_review_len)
x_test = keras.preprocessing.sequence.pad_sequences(x_test, maxlen=max_review_len)


# 构建数据集
db_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
# drop_remainder=True 最后的橘子数量小于batchsz ，则 忽略掉,即把不够一个batch的数据去掉
db_train = db_train.shuffle(10000).batch(batchsz, drop_remainder=True)
db_test = tf.data.Dataset.from_tensor_slices((x_test, y_test))
db_test = db_test.batch(batchsz, drop_remainder=True)
print('x_train_shape:', x_train.shape, tf.reduce_max(y_train), tf.reduce_min(y_train))
print('x_test_shape:', x_test.shape)


# 构建网络结构 新建一个类，MyRNN，继承自keras.Model
class MyRNN(keras.Model):

    # 实现2个基本方法：__init__ 和 call
    def __init__(self, units):
        super(MyRNN, self).__init__()

        # 第一个层是embedding层 作用是把文本变化成数值类型 transform text to embedding representation第一个参数是接收的单词数量。第二个是每个单词的embedding维度, 第三个参数是句子长度，即句子一共多少个单词
        # [b, 80] => [b, 80, 100]
        self.embedding = layers.Embedding(total_words, embedding_len, input_length=max_review_len)

        # 构建RNNcell 负责把句子的单词数量在时间轴上展开
        # [b, 80, 100] => [b, 64] 这里要把100 这个维度的向量进行转换，转换为h_dim:units.目的是做信息的提取
        # RNN: cell1 cell2 cell3
        # SimpleRNN
        # unroll = True 可以加快RNN unroll: 布尔值 (默认 False)。 如果为 True，则网络将展开，否则将使用符号循环。 展开可以加速 RNN，但它往往会占用更多的内存。 展开只适用于短序列
        # return_sequences：控制hidden_state,True 输出全部，False输出最后一个
        self.rnn = tf.keras.Sequential([
            # tf.keras.layers.SimpleRNN(units, dropout=0.5, return_sequences=True),
            # tf.keras.layers.SimpleRNN(units, dropout=0.5)
            
            # 相比于SimpleRNN, 只需要修改成LSTM即可，其他不需要改变
            tf.keras.layers.LSTM(units, dropout=0.5, return_sequences=True, unroll=True),
            tf.keras.layers.LSTM(units, dropout=0.5, unroll=True)
        ])

        # fc: [b, 64] => [b, 1]
        self.outlayer = layers.Dense(1)


    # 完成前向运算的过程
    def call(self, inputs, training=None):

        """
        net(x) net(x, training=True)  ：这两种写法是训练模式
        net(x, training=False) : 这种是测试模式
        :param inputs: [b, 80]
        :param training: 当前运算是training过程还是test过程. dropout 在训练时起效，测试时不起效
        :return:
        """
        # [b, 80]
        x = inputs
        # [b, 80] => [b, 80, 100]
        x = self.embedding(x)
        # rnn cell compute
        # [b, 80, 100] => [b, 64]   tf.unstack(x, axis=1) 在axis=1的维度展开
        x = self.rnn(x)
        # 最后得到的out :[b, 64] 是语意信息的聚合
        x = self.outlayer(x)
        prob = tf.sigmoid(x)

        return prob

def main():

    units = 64
    epochs = 4
    t0 = time.time()
    model = MyRNN(units)
    # 快捷训练方法：compile中 设置1.optimizer 2.loss 3.指定测试指标 例如metrics   fit中设置 1. 训练数据集 2. 数据集训练次数 epochs 3. validation_data ：测试集 4. validation_freq ：每多少个epoch做一次测试
    model.compile(optimizer=keras.optimizers.Adam(0.001),
                  loss=tf.losses.BinaryCrossentropy(),
                  metrics=['accuracy'])
    model.fit(db_train, epochs=epochs, validation_data=db_test, validation_freq=2)

    # 最后做一次评估
    model.evaluate(db_test)
    t1 = time.time()
    print('total_time_cost：', t1-t0)


if __name__ == '__main__':
    main()

GRU_cell

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = "2"
import time

import tensorflow as tf
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers


tf.random.set_seed(22)
np.random.seed(22)
# 判断 tf 的版本是否是以 ‘2.’开头 如果是，则返回True，否则返回 False  assert用于测试<表达式> 的值，如果 <表达式> 的值为 True ，正常通过,如果 <表达式> 的值为 False ，则报错 ‘AssertionError:’
assert tf.__version__.startswith('2.')

# # 获取所有的物理GPU CPU
# physical_devices = tf.config.experimental.list_physical_devices(device_type='GPU')
# physical_devices_cpu = tf.config.experimental.list_physical_devices(device_type='CPU')
# # print(physical_devices, physical_devices_cpu)
# assert len(physical_devices) > 0
# # 通过调用tf.config.experimental.set_memory_growth来打开内存增长，在需要时申请显存空间（程序初始运行时消耗很少的显存，随着程序的运行而动态申请显存）
# tf.config.experimental.set_memory_growth(physical_devices[0], True)
# 下面的方式是设置Tensorflow固定消耗GPU:0的2GB显存。
# tf.config.experimental.set_virtual_device_configuration(
#     physical_devices[0],
#     [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=2048)]
# )

batchsz = 128

# 出现频率最高的词 取前10000个
total_words = 10000
max_review_len = 80
embedding_len = 100
# 只为常见的10000个单词进行编码，超出这10000个的，都标记为1个单词
(x_train, y_train), (x_test, y_test) = keras.datasets.imdb.load_data(num_words=total_words)

# padding 处理之后，长句子会截断，短句子会以0填充
# x_train: [b, 80] padding后的数据形状
# x_test:[b, 80]
x_train = keras.preprocessing.sequence.pad_sequences(x_train, maxlen=max_review_len)
x_test = keras.preprocessing.sequence.pad_sequences(x_test, maxlen=max_review_len)


# 构建数据集
db_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
# drop_remainder=True 最后的橘子数量小于batchsz ，则 忽略掉,即把不够一个batch的数据去掉
db_train = db_train.shuffle(10000).batch(batchsz, drop_remainder=True)
db_test = tf.data.Dataset.from_tensor_slices((x_test, y_test))
db_test = db_test.batch(batchsz, drop_remainder=True)
print('x_train_shape:', x_train.shape, tf.reduce_max(y_train), tf.reduce_min(y_train))
print('x_test_shape:', x_test.shape)

sample = next(iter(db_test))
print(sample[0].shape)


# 构建网络结构 新建一个类，MyRNN，继承自keras.Model
class MyRNN(keras.Model):

    # 实现2个基本方法：__init__ 和 call
    def __init__(self, units):
        super(MyRNN, self).__init__()

        #  LSTM中 state 需要有2个tensor 分别代表 c 和 h 而GRU只有一个初始状态，所以state中一个tensor即可
        self.state0 = [tf.zeros([batchsz, units])]
        self.state1 = [tf.zeros([batchsz, units])]
        # 第一个层是embedding层 作用是把文本变化成数值类型 transform text to embedding representation第一个参数是接收的单词数量。第二个是每个单词的embedding维度, 第三个参数是句子长度，即句子一共多少个单词
        # [b, 80] => [b, 80, 100]
        self.embedding = layers.Embedding(total_words, embedding_len, input_length=max_review_len)

        # 构建RNNcell 负责把句子的单词数量在时间轴上展开
        # [b, 80, 100] => [b, 64] 这里要把100 这个维度的向量进行转换，转换为h_dim:units.目的是做信息的提取
        # RNN: cell1 cell2 cell3
        # self.RNNCell0 = layers.SimpleRNNCell(units, dropout=0.2)
        # self.RNNCell1 = layers.SimpleRNNCell(units, dropout=0.2)
        self.RNNCell0 = layers.GRUCell(units, dropout=0.5)
        self.RNNCell1 = layers.GRUCell(units, dropout=0.5)

        # fc: [b, 64] => [b, 1]
        self.outlayer = layers.Dense(1)


    # 完成前向运算的过程
    def call(self, inputs, training=None):

        """
        net(x) net(x, training=True)  ：这两种写法是训练模式
        net(x, training=False) : 这种是测试模式
        :param inputs: [b, 80]
        :param training: 当前运算是training过程还是test过程. dropout 在训练时起效，测试时不起效
        :return:
        """
        # [b, 80]
        x = inputs
        # [b, 80] => [b, 80, 100]
        x = self.embedding(x)
        # rnn cell compute
        # [b, 80, 100] => [b, 64]   tf.unstack(x, axis=1) 在axis=1的维度展开
        state0 = self.state0  # 初始状态，全为0
        state1 = self.state1
        for word in tf.unstack(x, axis=1):    # word: [b, 100]

            # h1 = x*wxh + h0*whh  输入word 和 state0 得到新的state1 和输出 out. state1代表h1
            out0, state0 = self.RNNCell0(word, state0, training)   # 加上training参数，即在训练和测试时做一个区分，训练时和测试时逻辑不一样（dropout）
            # state0 = state1 # 把state1的值赋给state0 这样下一次循环，即会使用state1的值了
            out1, state1 = self.RNNCell1(out0, state1, training)
        # 最后得到的out :[b, 64] 是语意信息的聚合
        x = self.outlayer(out1)
        prob = tf.sigmoid(x)

        return prob

def main():

    units = 64
    epochs = 4

    t0 = time.time()
    model = MyRNN(units)
    # experimental_run_tf_function=False 如果不设置这个，在tf2.0正式版本中会报错
    # 快捷训练方法：compile中 设置1.optimizer 2.loss 3.指定测试指标 例如metrics   fit中设置 1. 训练数据集 2. 数据集训练次数 epochs 3. validation_data ：测试集 4. validation_freq ：每多少个epoch做一次测试
    model.compile(optimizer=keras.optimizers.Adam(0.001),
                  loss=tf.losses.BinaryCrossentropy(),
                  metrics=['accuracy'],
                  experimental_run_tf_function=False)

    model.fit(db_train, epochs=epochs, validation_data=db_test, validation_freq=2)

    model.evaluate(db_test)
    t1 = time.time()
    print('time:', t1-t0)


if __name__ == '__main__':
    main()

GRU_layer

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = "2"
import time

import tensorflow as tf
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers


tf.random.set_seed(22)
np.random.seed(22)
# 判断 tf 的版本是否是以 ‘2.’开头 如果是，则返回True，否则返回 False  assert用于测试<表达式> 的值，如果 <表达式> 的值为 True ，正常通过,如果 <表达式> 的值为 False ，则报错 ‘AssertionError:’
assert tf.__version__.startswith('2.')

# # 获取所有的物理GPU CPU
# physical_devices = tf.config.experimental.list_physical_devices(device_type='GPU')
# physical_devices_cpu = tf.config.experimental.list_physical_devices(device_type='CPU')
# # print(physical_devices, physical_devices_cpu)
# assert len(physical_devices) > 0
# # 通过调用tf.config.experimental.set_memory_growth来打开内存增长，在需要时申请显存空间（程序初始运行时消耗很少的显存，随着程序的运行而动态申请显存）
# tf.config.experimental.set_memory_growth(physical_devices[0], True)
# 下面的方式是设置Tensorflow固定消耗GPU:0的2GB显存。
# tf.config.experimental.set_virtual_device_configuration(
#     physical_devices[0],
#     [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=2048)]
# )

batchsz = 128

# 出现频率最高的词 取前10000个
total_words = 10000
max_review_len = 80
embedding_len = 100
# 只为常见的10000个单词进行编码，超出这10000个的，都标记为1个单词
(x_train, y_train), (x_test, y_test) = keras.datasets.imdb.load_data(num_words=total_words)

# padding 处理之后，长句子会截断，短句子会以0填充
# x_train: [b, 80] padding后的数据形状
# x_test:[b, 80]
x_train = keras.preprocessing.sequence.pad_sequences(x_train, maxlen=max_review_len)
x_test = keras.preprocessing.sequence.pad_sequences(x_test, maxlen=max_review_len)


# 构建数据集
db_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
# drop_remainder=True 最后的橘子数量小于batchsz ，则 忽略掉,即把不够一个batch的数据去掉
db_train = db_train.shuffle(10000).batch(batchsz, drop_remainder=True)
db_test = tf.data.Dataset.from_tensor_slices((x_test, y_test))
db_test = db_test.batch(batchsz, drop_remainder=True)
print('x_train_shape:', x_train.shape, tf.reduce_max(y_train), tf.reduce_min(y_train))
print('x_test_shape:', x_test.shape)


# 构建网络结构 新建一个类，MyRNN，继承自keras.Model
class MyRNN(keras.Model):

    # 实现2个基本方法：__init__ 和 call
    def __init__(self, units):
        super(MyRNN, self).__init__()

        # 第一个层是embedding层 作用是把文本变化成数值类型 transform text to embedding representation第一个参数是接收的单词数量。第二个是每个单词的embedding维度, 第三个参数是句子长度，即句子一共多少个单词
        # [b, 80] => [b, 80, 100]
        self.embedding = layers.Embedding(total_words, embedding_len, input_length=max_review_len)

        # 构建RNNcell 负责把句子的单词数量在时间轴上展开
        # [b, 80, 100] => [b, 64] 这里要把100 这个维度的向量进行转换，转换为h_dim:units.目的是做信息的提取
        # RNN: cell1 cell2 cell3
        # SimpleRNN
        # unroll = True 可以加快RNN unroll: 布尔值 (默认 False)。 如果为 True，则网络将展开，否则将使用符号循环。 展开可以加速 RNN，但它往往会占用更多的内存。 展开只适用于短序列.实际上设置为unroll = True 并不一定能加速网络，有时候会减速。可以根据具体情况调节
        # return_sequences：控制hidden_state,True 输出全部，False输出最后一个
        self.rnn = tf.keras.Sequential([
            # tf.keras.layers.SimpleRNN(units, dropout=0.5, return_sequences=True),
            # tf.keras.layers.SimpleRNN(units, dropout=0.5)

            # 相比于simpleRnn 只需要修改成GRU即可，其他不需要改变
            tf.keras.layers.GRU(units, dropout=0.5, return_sequences=True, unroll=True),
            tf.keras.layers.GRU(units, dropout=0.5, unroll=True)
        ])

        # fc: [b, 64] => [b, 1]
        self.outlayer = layers.Dense(1)


    # 完成前向运算的过程
    def call(self, inputs, training=None):

        """
        net(x) net(x, training=True)  ：这两种写法是训练模式
        net(x, training=False) : 这种是测试模式
        :param inputs: [b, 80]
        :param training: 当前运算是training过程还是test过程. dropout 在训练时起效，测试时不起效
        :return:
        """
        # [b, 80]
        x = inputs
        # [b, 80] => [b, 80, 100]
        x = self.embedding(x)
        # rnn cell compute
        # [b, 80, 100] => [b, 64]   tf.unstack(x, axis=1) 在axis=1的维度展开
        x = self.rnn(x)
        # 最后得到的out :[b, 64] 是语意信息的聚合
        x = self.outlayer(x)
        prob = tf.sigmoid(x)

        return prob

def main():

    units = 64
    epochs = 4
    t0 = time.time()
    model = MyRNN(units)
    # 快捷训练方法：compile中 设置1.optimizer 2.loss 3.指定测试指标 例如metrics   fit中设置 1. 训练数据集 2. 数据集训练次数 epochs 3. validation_data ：测试集 4. validation_freq ：每多少个epoch做一次测试
    model.compile(optimizer=keras.optimizers.Adam(0.001),
                  loss=tf.losses.BinaryCrossentropy(),
                  metrics=['accuracy'])
    model.fit(db_train, epochs=epochs, validation_data=db_test, validation_freq=2)

    # 最后做一次评估
    model.evaluate(db_test)
    t1 = time.time()
    print('total_time_cost：', t1-t0)


if __name__ == '__main__':
    main()

weixin_44140703

关注

4
点赞
踩
16

收藏

觉得还不错? 一键收藏
0
评论
tensorflow2.0笔记--情感分类实战--RNN-LSTM-GRU-详细备注

RNN Cellimport osos.environ['TF_CPP_MIN_LOG_LEVEL'] = "2"import tensorflow as tfimport numpy as npfrom tensorflow import kerasfrom tensorflow.keras import layerstf.random.set_seed(22)np.ran...
复制链接

扫一扫