%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
import sys
import time
import sklearn
import tensorflow as tf
from tensorflow import keras
print(tf.__version__)
print(sys.version_info)
for module in mpl,np,pd,sklearn,tf,keras:
print(module.__name__,module.__version__)
import tensorflow_datasets as tfds
# subword-level model
dataset, info = tfds.load('imdb_reviews/subwords8k',with_info=True,as_supervised=True)
train_dataset , test_dataset = dataset['train'],dataset['test']
print(info)
# subword
tokenizer = info.features['text'].encoder
print('vocabulary size: {}'.format(tokenizer.vocab_size))
# word -> subword
sample_string = "Tensorflow is cool."
tokenized_string = tokenizer.encode(sample_string)
print('tokenized string is {}'.format(tokenized_string))
original_string = tokenizer.decode(tokenized_string)
print('original string is {}'.format(original_string))
for token in tokenized_string:
print('{} --> "{}"'.format(token,tokenizer.decode([token])))
buffer_size = 10000
batch_size = 64
train_dataset = train_dataset.shuffle(buffer_size)
#train_dataset = train_dataset.padded_batch(batch_size,train_dataset.output_shapes)
#test_dataset = test_dataset.padded_batch(batch_szie,test_dataset.output_shapes)
train_dataset = train_dataset.padded_batch(batch_size,tf.compat.v1.data.get_output_shapes(train_dataset))
test_dataset = test_dataset.padded_batch(batch_size,tf.compat.v1.data.get_output_shapes(test_dataset))
print(tf.compat.v1.data.get_output_shapes(train_dataset))
print(tf.compat.v1.data.get_output_shapes(test_dataset))
# 双向 循环网络
embedding_dim = 16
batch_size = 128
varcob_size = tokenizer.vocab_size
# keras.layers.Bidirectional
# keras.layers.SimpleRNN
bidir_rnn_model = keras.models.Sequential([
keras.layers.Embedding(varcob_size,embedding_dim),
keras.layers.Bidirectional(keras.layers.LSTM(units=32,return_sequences=True)),
keras.layers.Bidirectional(keras.layers.LSTM(units=32,return_sequences=False)),#因为后面是全连接层 故而 False
keras.layers.Dense(32,activation='relu'),
keras.layers.Dense(1,activation='sigmoid')
])
bidir_rnn_model.summary()
bidir_rnn_model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
epochs =5
history = bidir_rnn_model.fit(train_dataset,epochs=epochs,validation_data=test_dataset)
def plot_leraning_curves(history, label, epochs, min_value, max_value):
data = {}
data[label] = history.history[label]
data['val_'+label] = history.history['val_'+label]
pd.DataFrame(data).plot(figsize=(8, 5))
plt.grid(True)
plt.axis([0, epochs, min_value, max_value])
plt.show()
plot_leraning_curves(history, 'accuracy', epochs, 0, 1)
plot_leraning_curves(history, 'loss', epochs, 0, 1)
bidir_rnn_model.evaluate(test_dataset)