文章目录
Activation Function
reuters_mlp_relu_vs_selu.py
'''Compares self-normalizing MLPs with regular MLPs.
Compares the performance of a simple MLP using two
different activation functions: RELU and SELU
on the Reuters newswire topic classification task.
# Reference
- Klambauer, G., Unterthiner, T., Mayr, A., & Hochreiter, S. (2017).
Self-Normalizing Neural Networks. arXiv preprint arXiv:1706.02515.
https://arxiv.org/abs/1706.02515
'''
from __future__ import print_function
import numpy as np
import matplotlib.pyplot as plt
import keras
from keras.datasets import reuters
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
from keras.layers.noise import AlphaDropout
from keras.preprocessing.text import Tokenizer
max_words = 1000
batch_size = 16
epochs = 40
plot = True
def create_network(n_dense=6,
dense_units=16,
activation='selu',
dropout=AlphaDropout,
dropout_rate=0.1,
kernel_initializer='lecun_normal',
optimizer='adam',
num_classes=1,
max_words=max_words):
"""Generic function to create a fully-connected neural network.
# Arguments
n_dense: int > 0. Number of dense layers.
dense_units: int > 0. Number of dense units per layer.
dropout: keras.layers.Layer. A dropout layer to apply.
dropout_rate: 0 <= float <= 1. The rate of dropout.
kernel_initializer: str. The initializer for the weights.
optimizer: str/keras.optimizers.Optimizer. The optimizer to use.
num_classes: int > 0. The number of classes to predict.
max_words: int > 0. The maximum number of words per data point.
# Returns
A Keras model instance (compiled).
"""
model = Sequential()
model.add(Dense(dense_units, input_shape=(max_words,),
kernel_initializer=kernel_initializer))
model.add(Activation(activation))
model.add(dropout(dropout_rate))
for i in range(n_dense - 1):
model.add(Dense(dense_units, kernel_initializer=kernel_initializer))
model.add(Activation(activation))
model.add(dropout(dropout_rate))
model.add(Dense(num_classes))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy',
optimizer=optimizer,
metrics=['accuracy'])
return model
network1 = {
'n_dense': 6,
'dense_units': 16,
'activation': 'relu',
'dropout': Dropout,
'dropout_rate': 0.5,
'kernel_initializer': 'glorot_uniform',
'optimizer': 'sgd'
}
network2 = {
'n_dense': 6,
'dense_units': 16,
'activation': 'selu',
'dropout': AlphaDropout,
'dropout_rate': 0.1,
'kernel_initializer': 'lecun_normal',
'optimizer': 'sgd'
}
print('Loading data...')
(x_train, y_train), (x_test, y_test) = reuters.load_data(num_words=max_words,
test_split=0.2)
print(len(x_train), 'train sequences')
print(len(x_test), 'test sequences')
num_classes = np.max(y_train) + 1
print(num_classes, 'classes')
print('Vectorizing sequence data...')
tokenizer = Tokenizer(num_words=max_words)
x_train = tokenizer.sequences_to_matrix(x_train, mode='binary')
x_test = tokenizer.sequences_to_matrix(x_test, mode='binary')
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)
print('Convert class vector to binary class matrix '
'(for use with categorical_crossentropy)')
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
print('y_train shape:', y_train.shape)
print('y_test shape:', y_test.shape)
print('\nBuilding network 1...')
model1 = create_network(num_classes=num_classes, **network1)
history_model1 = model1.fit(x_train,
y_train,
batch_size=batch_size,
epochs=epochs,
verbose=1,
validation_split=0.1)
score_model1 = model1.evaluate(x_test,
y_test,
batch_size=batch_size,
verbose=1)
print('\nBuilding network 2...')
model2 = create_network(num_classes=num_classes, **network2)
history_model2 = model2.fit(x_train,
y_train,
batch_size=batch_size,
epochs=epochs,
verbose=1,
validation_split=0.1)
score_model2 = model2.evaluate(x_test,
y_test,
batch_size=batch_size,
verbose=1)
print('\nNetwork 1 results')
print('Hyperparameters:', network1)
print('Test score:', score_model1[0])
print('Test accuracy:', score_model1[1])
print('Network 2 results')
print('Hyperparameters:', network2)
print('Test score:', score_model2[0])
print('Test accuracy:', score_model2[1])
plt.plot(range(epochs),
history_model1.history['val_loss'],
'g-',
label='Network 1 Val Loss')
plt.plot(range(epochs),
history_model2.history['val_loss'],
'r-',
label='Network 2 Val Loss')
plt.plot(range(epochs),
history_model1.history['loss'],
'g--',
label='Network 1 Loss')
plt.plot(range(epochs),
history_model2.history['loss'],
'r--',
label='Network 2 Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.savefig('comparison_of_networks.png')
Network 1 results
Hyperparameters: {‘n_dense’: 6, ‘dense_units’: 16, ‘activation’: ‘relu’, ‘dropout’: <class ‘keras.layers.core.Dropout’>, ‘dropout_rate’: 0.5, ‘kernel_initializer’: ‘glorot_uniform’, ‘optimizer’: ‘sgd’}
Test score: 1.9538986459971537
Test accuracy: 0.5213713268297439
Network 2 results
Hyperparameters: {‘n_dense’: 6, ‘dense_units’: 16, ‘activation’: ‘selu’, ‘dropout’: <class ‘keras.layers.noise.AlphaDropout’>, ‘dropout_rate’: 0.1, ‘kernel_initializer’: ‘lecun_normal’, ‘optimizer’: ‘sgd’}
Test score: 1.5389474697231929
Test accuracy: 0.6714158504007124
MLP
mnist_mlp.py
'''Trains a simple deep NN on the MNIST dataset.
Gets to 98.40% test accuracy after 20 epochs
(there is *a lot* of margin for parameter tuning).
2 seconds per epoch on a K520 GPU.
'''
from __future__ import print_function
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import RMSprop
batch_size = 128
num_classes = 10
epochs = 20
# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape(60000, 784)
x_test = x_test.reshape(10000, 784)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
model = Sequential()
model.add(Dense(512, activation='relu', input_shape=(784,)))
model.add(Dropout(0.2))
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(num_classes, activation='softmax'))
model.summary()
model.compile(loss='categorical_crossentropy',
optimizer=RMSprop(),
metrics=['accuracy'])
history = model.fit(x_train, y_train,
batch_size=batch_size,
epochs=epochs,
verbose=1,
validation_data=(x_test, y_test))
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
Test loss: 0.11690922913084084
Test accuracy: 0.9836
reuters_mlp.py
'''Trains and evaluate a simple MLP
on the Reuters newswire topic classification task.
'''
from __future__ import print_function
import numpy as np
import keras
from keras.datasets import reuters
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.preprocessing.text import Tokenizer
max_words = 1000
batch_size = 32
epochs = 5
print('Loading data...')
(x_train, y_train), (x_test, y_test) = reuters.load_data(num_words=max_words,
test_split=0.2)
print(len(x_train), 'train sequences')
print(len(x_test), 'test sequences')
num_classes = np.max(y_train) + 1
print(num_classes, 'classes')
print('Vectorizing sequence data...')
tokenizer = Tokenizer(num_words=max_words)
x_train = tokenizer.sequences_to_matrix(x_train, mode='binary')
x_test = tokenizer.sequences_to_matrix(x_test, mode='binary')
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)
print('Convert class vector to binary class matrix '
'(for use with categorical_crossentropy)')
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
print('y_train shape:', y_train.shape)
print('y_test shape:', y_test.shape)
print('Building model...')
model = Sequential()
model.add(Dense(512, input_shape=(max_words,)))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy',
optimizer='adam',
metrics=['accuracy'])
history = model.fit(x_train, y_train,
batch_size=batch_size,
epochs=epochs,
verbose=1,
validation_split=0.1)
score = model.evaluate(x_test, y_test,
batch_size=batch_size, verbose=1)
print('Test score:', score[0])
print('Test accuracy:', score[1])
Test score: 0.890080013130779
Test accuracy: 0.7934105075690115
CNN
mnist_cnn.py
'''Trains a simple convnet on the MNIST dataset.
Gets to 99.25% test accuracy after 12 epochs
(there is still a lot of margin for parameter tuning).
16 seconds per epoch on a GRID K520 GPU.
'''
from __future__ import print_function
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K
batch_size = 128
num_classes = 10
epochs = 12
# input image dimensions
img_rows, img_cols = 28, 28
# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()
if K.image_data_format() == 'channels_first':
x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
input_shape = (1, img_rows, img_cols)
else:
x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
input_shape = (img_rows, img_cols, 1)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
activation='relu',
input_shape=input_shape))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))
model.compile(loss=keras.losses.categorical_crossentropy,
optimizer=keras.optimizers.Adadelta(),
metrics=['accuracy'])
model.fit(x_train, y_train,
batch_size=batch_size,
epochs=epochs,
verbose=1,
validation_data=(x_test, y_test))
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
K.image_data_format()
Out[2]: 'channels_last'
相当于在tensorflow的4个维度上,分别是[batch, heigh, weight, channel]
与普通RGB格式相似。
x_train.shape
Out[3]: (60000, 28, 28)
需要增加一个轴
x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
input_shape = (img_rows, img_cols, 1)
需要增加一个轴
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
归一化
y_train
Out[4]: array([5, 0, 4, ..., 5, 6, 8], dtype=uint8)
需要将label进行dummy化(转one hot)
# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
y_train
Out[5]:
array([[0., 0., 0., ..., 0., 0., 0.],
[1., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0