#数据导入以及处理
batches = 128
(x_train,y_train),(x_test,y_test) = keras.datasets.imdb.load_data(num_words=10000)
x_train = keras.preprocessing.sequence.pad_sequences(x_train,maxlen=200)
x_test = keras.preprocessing.sequence.pad_sequences(x_test,maxlen=200)
db_train = tf.data.Dataset.from_tensor_slices((x_train,y_train))
db_train = db_train.shuffle(1000).batch(batches,drop_remainder=True)
db_test = tf.data.Dataset.from_tensor_slices((x_test,y_test))
db_test = db_test.batch(batches,drop_remainder=True)
#定义模型
class Lstm_Net(keras.Model):
def __init__(self,units):
super(Lstm_Net,self).__init__()
#[batch,200(maxlen)] ->[batch,200,100] batch 200个字 每个字编码为100维Vector
self.embedding = layers.Embedding(10000,100,input_length=200)
#[b, 80, 100] => [b, 64]
self.lstm = keras.Sequential([
layers.LSTM(units,dropout=0.2,return_sequences=True),
layers.LSTM(units,dropout=0.2)
])
#[b, 64] ->[b,1]
self.outlayer = layers.Dense(1)
def call(self,inputs):
x = self.embedding(inputs)
x = self.lstm(x)
x = self.outlayer(x)
prob = tf.sigmoid(x)
return prob
#训练以及测试
model = Lstm_Net(64)
model.compile(optimizer=keras.optimizers.Adam(learning_rate=1e-3),
loss=tf.losses.BinaryCrossentropy(),
metrics=['accuracy'])
model.fit(db_train,epochs=10)
model.evaluate(db_test)
训练集准确率98%
测试loss=0.63768857717514,acc=0.85108172893524
产生了过拟合