代码来自https://github.com/keras-team/keras/blob/master/examples/imdb_lstm.py
这是一个 IMDB 电影评论情感分类的任务:采用词序列的LSTM来对评论语句做情感分类
IMDB 的数据集介绍见:https://blog.csdn.net/ltochange/article/details/78355753
from __future__ import print_function
from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Dense, Embedding
from keras.layers import LSTM
from keras.datasets import imdb
max_features = 20000 #最大单词量
# cut texts after this number of words (among top max_features most common words)
maxlen = 80 #评论最大长度
batch_size = 32
print('Loading data...')
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
print(len(x_train), 'train sequences')
print(len(x_test), 'test sequences')
print('Pad sequences (samples x time)')
x_train = sequence.pad_sequences(x_train, maxlen=maxlen) #这里做一个padding,大于maxlen的部分直接截掉、小于长度的用0填充
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)
print('Build model...')
model = Sequential()
model.add(Embedding(max_features, 128)) #先做一个Embedding,类似于做一个Word2Vec,因为输入数据中的数字是单词所在字典的位置
model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2)) #加一个 LSTM 层,设置 Dropout
model.add(Dense(1, activation='sigmoid')) #全连接成,输出一维的数据,激活函数是sigmoid
# try using different optimizers and different optimizer configs
model.compile(loss='binary_crossentropy',
optimizer='adam',
metrics=['accuracy']) #优化器的定义,使用 binary_crossentropy 损失,adam 优化器,评价指标是 accuracy
print('Train...')
model.fit(x_train, y_train,
batch_size=batch_size,
epochs=15,
validation_data=(x_test, y_test)) #给网络喂入训练的数据
score, acc = model.evaluate(x_test, y_test,
batch_size=batch_size) #评价网络的情况
print('Test score:', score)
print('Test accuracy:', acc)