1、训练测试模型
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
import numpy as np
imdb = keras.datasets.imdb
#num_words 定义的是大于该词频的单词会被读取。如果单词的词频小于该整数,会用oov_char定义的数字代替。默认是用2代替。
(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=10000)
# print("Training entries: {}, labels: {}".format(len(train_data), len(test_labels)))
# print(train_data[0])
# print(len(train_data[0]), len(train_data[1]))#第一条、第二条单词数量不同
####将整数转换成单词
# 一个映射单词到整数索引的词典
word_index = imdb.get_word_index()
# 保留第一个索引
word_index = {
k:(v+3) for k,v in word_index.items()}
word_index["<PAD>"] = 0
word_index["<START>"] = 1
word_index["<UNK>"] = 2 # unknown
word_index["<UNUSED>"] = 3
reverse_word_index = dict(