IMDB数据集使用tensorflow训练优化过程

下面是tf官网的例子,https://www.tensorflow.org/tutorials/keras/overfit_and_underfit?hl=zh-cn


# coding: utf-8

# In[1]:

import tensorflow as tf
from tensorflow import keras

import numpy as np
import matplotlib.pyplot as plt

print(tf.__version__)


# In[3]:

NUM_WORDS = 10000
(train_data, train_labels), (test_data, test_labels) = keras.datasets.imdb.load_data(num_words=NUM_WORDS)


# In[4]:

def multi_hot_sequences(sequences, dimension):
    # Create an all-zero matrix of shape (len(sequences), dimension)
    results = np.zeros((len(sequences), dimension))
    for i, word_indices in enumerate(sequences):
        results[i, word_indices] = 1.0  # set specific indices of results[i] to 1s
    return results

train_data = multi_hot_sequences(train_data, dimension=NUM_WORDS)
test_data = multi_hot_sequences(test_data, dimension=NUM_WORDS)


# In[6]:

get_ipython().magic('matplotlib inline')
plt.plot(train_data[0])


# In[7]:

#base model
baseline_model = keras.Sequential([
    # `input_shape` is only required here so that `.summary` works.
    keras.layers.Dense(16, activation=tf.nn.relu, input_shape=(NUM_WORDS,)),
    keras.layers.Dense(16, activation=tf.nn.relu),
    keras.layers.Dense(1, activation=tf.nn.sigmoid)
])

baseline_model.compile(optimizer='adam',
                       loss='binary_crossentropy',
                       metrics=['accuracy', 'binary_crossentropy'])

baseline_model.summary()


# In[8]:

baseline_history = baseline_model.fit(train_data,
                                      train_labels,
                                      epochs=20,
                                      batch_size=512,
                                      validation_data=(test_data, test_labels),
                                      verbose=2)


# In[9]:

smaller_model = keras.Sequential([
    keras.layers.Dense(4, activation=tf.nn.relu, input_shape=(NUM_WORDS,)),
    keras.layers.Dense(4, activation=tf.nn.relu),
    keras.layers.Dense(1, activation=tf.nn.sigmoid)
])

smaller_model.compile(optimizer='adam',
                loss='binary_crossentropy',
                metrics=['accuracy', 'binary_crossentropy'])

smaller_model.summary()


# In[10]:

smaller_history = smaller_model.fit(train_data,
                                    train_labels,
                                    epochs=20,
                                    batch_size=512,
                                    validation_data=(test_data, test_labels),
                                    verbose=2)


# In[11]:

bigger_model = keras.models.Sequential([
    keras.layers.Dense(512, activation=tf.nn.relu, input_shape=(NUM_WORDS,)),
    keras.layers.Dense(512, activation=tf.nn.relu),
    keras.layers.Dense(1, activation=tf.nn.sigmoid)
])

bigger_model.compile(optimizer='adam',
                     loss='binary_crossentropy',
                     metrics=['accuracy','binary_crossentropy'])

bigger_model.summary()


# In[12]:

bigger_history = bigger_model.fit(train_data, train_labels,
                                  epochs=20,
                                  batch_size=512,
                                  validation_data=(test_data, test_labels),
                                  verbose=2)


# In[13]:

def plot_history(histories, key='binary_crossentropy'):
  plt.figure(figsize=(16,10))

  for name, history in histories:
    val = plt.plot(history.epoch, history.history['val_'+key],
                   '--', label=name.title()+' Val')
    plt.plot(history.epoch, history.history[key], color=val[0].get_color(),
             label=name.title()+' Train')

  plt.xlabel('Epochs')
  plt.ylabel(key.replace('_',' ').title())
  plt.legend()

  plt.xlim([0,max(history.epoch)])

plot_history([('baseline', baseline_history),
              ('smaller', smaller_history),
              ('bigger', bigger_history)])


# In[14]:

l2_model = keras.models.Sequential([
    keras.layers.Dense(16, kernel_regularizer=keras.regularizers.l2(0.001),
                       activation=tf.nn.relu, input_shape=(NUM_WORDS,)),
    keras.layers.Dense(16, kernel_regularizer=keras.regularizers.l2(0.001),
                       activation=tf.nn.relu),
    keras.layers.Dense(1, activation=tf.nn.sigmoid)
])

l2_model.compile(optimizer='adam',
                 loss='binary_crossentropy',
                 metrics=['accuracy', 'binary_crossentropy'])

l2_model_history = l2_model.fit(train_data, train_labels,
                                epochs=20,
                                batch_size=512,
                                validation_data=(test_data, test_labels),
                                verbose=2)


# In[15]:

plot_history([('baseline', baseline_history),
              ('l2', l2_model_history)])


# In[16]:

dpt_model = keras.models.Sequential([
    keras.layers.Dense(16, activation=tf.nn.relu, input_shape=(NUM_WORDS,)),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(16, activation=tf.nn.relu),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(1, activation=tf.nn.sigmoid)
])

dpt_model.compile(optimizer='adam',
                  loss='binary_crossentropy',
                  metrics=['accuracy','binary_crossentropy'])

dpt_model_history = dpt_model.fit(train_data, train_labels,
                                  epochs=20,
                                  batch_size=512,
                                  validation_data=(test_data, test_labels),
                                  verbose=2)


# In[17]:

plot_history([('baseline', baseline_history),
              ('dropout', dpt_model_history)])

下面总结一下防止神经网络出现过拟合的最常见方法:

获取更多训练数据。
降低网络容量。
添加权重正则化。
添加丢弃层。
还有两个重要的方法在本指南中没有介绍:数据增强和批次归一化。

 

好的,以下是使用 TensorFlow 框架搭建 GRU 模型实现对 IMDB 数据集的情感分析的代码: 首先,导入必要的库和数据集: ```python import tensorflow as tf from tensorflow import keras from tensorflow.keras.datasets import imdb from tensorflow.keras.preprocessing.sequence import pad_sequences from tensorflow.keras.layers import Dense, GRU, Embedding from tensorflow.keras.models import Sequential # 加载IMDB数据集 num_words = 10000 (X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=num_words) # 将评论长度进行填充或截断 max_len = 80 X_train = pad_sequences(X_train, maxlen=max_len) X_test = pad_sequences(X_test, maxlen=max_len) ``` 然后,定义模型结构并进行编译: ```python # 定义模型结构 embedding_size=32 model = Sequential([ Embedding(input_dim=num_words, output_dim=embedding_size, input_length=max_len), GRU(units=32), Dense(1, activation='sigmoid') ]) # 编译模型 model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) ``` 接下来,训练模型并输出结果: ```python # 训练模型 batch_size = 128 epochs = 10 model.fit(X_train, y_train, validation_data=(X_test, y_test), batch_size=batch_size, epochs=epochs) # 输出模型结果 scores = model.evaluate(X_test, y_test, verbose=0) print("Accuracy: %.2f%%" % (scores[1]*100)) ``` 完整的代码如下: ```python import tensorflow as tf from tensorflow import keras from tensorflow.keras.datasets import imdb from tensorflow.keras.preprocessing.sequence import pad_sequences from tensorflow.keras.layers import Dense, GRU, Embedding from tensorflow.keras.models import Sequential # 加载IMDB数据集 num_words = 10000 (X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=num_words) # 将评论长度进行填充或截断 max_len = 80 X_train = pad_sequences(X_train, maxlen=max_len) X_test = pad_sequences(X_test, maxlen=max_len) # 定义模型结构 embedding_size=32 model = Sequential([ Embedding(input_dim=num_words, output_dim=embedding_size, input_length=max_len), GRU(units=32), Dense(1, activation='sigmoid') ]) # 编译模型 model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) # 训练模型 batch_size = 128 epochs = 10 model.fit(X_train, y_train, validation_data=(X_test, y_test), batch_size=batch_size, epochs=epochs) # 输出模型结果 scores = model.evaluate(X_test, y_test, verbose=0) print("Accuracy: %.2f%%" % (scores[1]*100)) ``` 这段代码使用TensorFlow 中的 Sequential 模型,包含了 Embedding 层、GRU 层和 Dense 层。其中 Embedding 层将每个单词映射成一个向量,GRU 层对这些向量进行处理并输出一个固定长度的向量,Dense 层则将这个向量映射成一个标量,表示评论的情感极性。在编译模型时,使用了 Adam 优化器和二元交叉熵损失函数。使用 fit() 函数训练模型,并在测试集上计算准确率。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值