1.搭建一个有20层全连接的深度神经网络
model = keras.models.Sequential()
model.add(keras.layers.Flatten(input_shape = [28, 28]))
# begin
for _ in range(20):
model.add(keras.layers.Dense(100, activation = 'relu'))
# end
model.add(keras.layers.Dense(10, activation = 'softmax'))
model.compile(loss = 'sparse_categorical_crossentropy',
optimizer = 'sgd',
metrics = ['accuracy'])
logdir = './dnn-callbacks'
if not os.path.exists(logdir):
os.mkdir(logdir)
output_model_file = os.path.join(logdir, 'fashion_mnist_model.h5')
callbacks = [
keras.callbacks.TensorBoard(logdir),
keras.callbacks.ModelCheckpoint(output_model_file, save_best_only = True),
keras.callbacks.EarlyStopping(patience = 5, min_delta = 1e-3)
]
history = model.fit(x_train_scaler, y_train, epochs = 10,
validation_data=(x_valid_scaler, y_valid),
callbacks = callbacks)
def plot_learning_curves(history):
pd.DataFrame(history.history).plot(figsize=(8,5))
plt.grid(True)
plt.gca().set_ylim(0, 3)
plt.show()
plot_learning_curves(history)
在训练初期出现loss不下降的原因:
(1)深度神经网络参数众多,在前期训练还不充分
(2)梯度消失
2.批归一化
有助于缓解梯度消失问题
for _ in range(20):
# 批归一化在激活函数之后
model.add(keras.layers.Dense(100, activation = 'relu'))
model.add(keras.layers.BatchNormalization())
'''
# 批归一化在激活函数之前
model.add(keras.layers.Dense(100))
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.Activation('relu'))
'''
model.add(keras.layers.Dense(10, activation = 'softmax'))
3.激活函数selu
tensorflow实现:
def selu(x):
with ops.name_scope('elu') as scope:
alpha = 1.6732632423543772848170429916717
scale = 1.0507009873554804934193349852946
return scale*tf.where(x>=0.0, x, alpha*tf.nn.elu(x))
用的时候直接把"relu"换成"selu"
model.add(keras.layers.Dense(100, activation = 'selu'))
也可以缓解梯度消失,效果比Batch Normalization 要好
4.dropout
通常使用dropout防止过拟合
# AlphaDropout: 1.均值和方差不变 2.归一化性质不变
model.add(keras.layers.AlphaDropout(rate=0.5))
model.add(keras,layers.Dropout(rate=0.5))