1.方案
2.代码实现-一个SimpleRNNCell
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, optimizers, Sequential, losses
tf.random.set_seed(22)
np.random.seed(22)
assert tf.__version__.startswith('2.')
# 数据集加载
# imdb是一个电影评论的数据集
# 将生僻单词归结为一个单词(未知单词),total_words表示常见单词的数量
total_words = 10000
max_review_len = 80 # 设置句子长度
batchsz = 64
embedding_len = 100
(x_train, y_train), (x_test, y_test) = keras.datasets.imdb.load_data(num_words=total_words)
# pad,将句子长度padding成一个长度,方便用一个线性层来处理
# x_train :[b,80] x_test : [b,80]
x_train = keras.preprocessing.sequence.pad_sequences(x_train, maxlen=max_review_len)
x_test = keras.preprocessing.sequence.pad_sequences(x_test, maxlen=max_review_len)
# 构建数据集
db_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
# drop_remainder=True表示当最后一个bath小于batchsz时,会将最后一个batch丢弃掉
db_train = db_train.shuffle(1000).batch(batchsz, drop_remainder=True)
db_test = tf.data.Dataset.from_tensor_slices((x_test, y_test))
db_test = db_test.batch(batchsz, drop_remainder=True)
print('x_train shape:', x_train.shape, tf.reduce_max(y_train), tf.reduce_min(y_train))
print('x_test shape:', x_test.shape)
# 构建网络结构
class MyRNN(keras.Model):
def __init__(self, units):
super(MyRNN, self).__init__()
# [b,64]
self.state0 = [tf.zeros([batchsz, units])]
# transfrom test to embedding representation
# [b,80] => [b,80,100] 每个单词用一个100维的向量来表示
self.embedding = layers.Embedding(total_words, embedding_len, input_length=max_review_len)
# 将句子单词的数量在时间轴上展开
# [b,80,100] => [b,64] ,h_dim : 64
# RNN : cell1.cell2.cell3
# SimpleRNN已经在内部完成了时间轴上的展开
# 在这一层上training与test的逻辑是不一样的
self.rnn_cell0 = layers.SimpleRNNCell(units, dropout=0.5)
# outlayer , [b,64] => [b,1]
self.outlayer = layers.Dense(1) # 1个输出结点
# 定义前向传播
def call(self, inputs, training=None):
"""
train mode : net(x),net(x,training=True)
test mode : net(x,training=False)
:param inputs: [b,80]
:param training: 通过设置training参数来设置是否断掉短路连接(dropout是否起作用)
:return:
"""
# [b,80]
x = inputs
# embedding : [b,80] => [b,80,100]
x = self.embedding(x)
# rnn cell compute
# [b,80,100] => [b,64]
state0 = self.state0
# word : [b,100]
for word in tf.unstack(x, axis=1): # tf.unstack(x,axis=1)表示对x在1维上进行展开
# h_t = x_t*w_xh + h_t-1 * w_hh , 输入状态word,历史化状态state
# out,state1是相同的,只是为了与rnn作match,这种情况两者返回是不同的
# training为True时,使用dropout;training为False时,不使用dropout
out, state1 = self.rnn_cell0(word, state0,training)
# 重新赋值做循环
state0 = state1
# 累积的所有的单词的语义信息
# out : [b,64] => [b,1]
x = self.outlayer(out)
prob = tf.sigmoid(x)
return prob
def main():
units = 64
epochs = 4
model = MyRNN(units)
# 模型训练
model.compile(optimizer = optimizers.Adam(lr = 1e-3),
loss = tf.losses.BinaryCrossentropy(),
metrics = ['accuracy'],
experimental_run_tf_function=False) # 增加这一行即可解决
model.fit(db_train, epochs=epochs, validation_data=db_test)
# test
model.evaluate(db_test)
if __name__ == '__main__':
main()
x_train shape: (25000, 80) tf.Tensor(1, shape=(), dtype=int64) tf.Tensor(0, shape=(), dtype=int64)
x_test shape: (25000, 80)
Epoch 4/4
1/97 [..............................] - ETA: 2s - loss: 0.1859 - accuracy: 0.9141
3/97 [..............................] - ETA: 2s - loss: 0.2098 - accuracy: 0.9136
5/97 [>.............................] - ETA: 2s - loss: 0.2274 - accuracy: 0.9094
7/97 [=>............................] - ETA: 2s - loss: 0.2337 - accuracy: 0.9061
9/97 [