读取数据和预处理
import keras
from keras import layers
import numpy as np
from matplotlib import pyplot as plt
import pandas as pd
%matplotlib inline
Using TensorFlow backend.
data = pd.read_csv("./data/credit-a.csv", header=None)
# Shuffle数据
index = np.random.permutation(len(data))
data = data.iloc[index ,:]
# 划分数据
x = data.iloc[:, 0:-1]
y = data.iloc[:, -1]
x.shape, y.shape
y = y.replace(-1, 0)
x.shape, y.shape
((653, 15), (653,))
k = int(len(x)*0.75)
x_train = x[:k]
x_test = x[k:]
y_train = y[:k]
y_test = y[k:]
x_train.shape, x_test.shape, y_train.shape, y_test.shape
((489, 15), (164, 15), (489,), (164,))
使用Dropout抑制过拟合
model = keras.Sequential()
model.add(layers.Dense(128, input_dim=15, activation='relu'))
model.add(layers.Dropout(0.5)) # 保持连接的概率是0.5
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dropout(0.5)) # 保持连接的概率是0.5
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dropout(0.5)) # 保持连接的概率是0.5
model.add(layers.Dense(1, activation='sigmoid'))
WARNING:tensorflow:From E:\MyProgram\Anaconda\envs\krs\lib\site-packages\tensorflow\python\framework\op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.
Instructions for updating:
Colocations handled automatically by placer.
WARNING:tensorflow:From E:\MyProgram\Anaconda\envs\krs\lib\site-packages\keras\backend\tensorflow_backend.py:3445: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
model.summary()
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense_1 (Dense) (None, 128) 2048
_________________________________________________________________
dropout_1 (Dropout) (None, 128) 0
_________________________________________________________________
dense_2 (Dense) (None, 128) 16512
_________________________________________________________________
dropout_2 (Dropout) (None, 128) 0
_________________________________________________________________
dense_3 (Dense) (None, 128) 16512
_________________________________________________________________
dropout_3 (Dropout) (None, 128) 0
_________________________________________________________________
dense_4 (Dense) (None, 1) 129
=================================================================
Total params: 35,201
Trainable params: 35,201
Non-trainable params: 0
_________________________________________________________________
model.compile(
optimizer='adam',
loss='binary_crossentropy',
metrics=['acc']
)
history = model.fit(x_train, y_train, epochs=1000, validation_data=(x_test, y_test), verbose=0)
WARNING:tensorflow:From E:\MyProgram\Anaconda\envs\krs\lib\site-packages\tensorflow\python\ops\math_ops.py:3066: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.
上节没用Dropout时候评估值是训练集上[0.05114140091863878, 0.9754601228212774],测试集上[1.233076281663848, 0.8658536585365854],现在比较一下。
model.evaluate(x_train, y_train)
489/489 [==============================] - 0s 33us/step
[0.2712044728070437, 0.8793456033938746]
model.evaluate(x_test, y_test)
164/164 [==============================] - 0s 43us/step
[0.50074572170653, 0.8170731707317073]
plt.plot(history.epoch, history.history.get('val_acc'), c='g', label='validation acc')
plt.plot(history.epoch, history.history.get('acc'), c='b', label='train acc')
plt.legend()
<matplotlib.legend.Legend at 0x1469ccf8>
可以看到训练集和测试集上的ACC走得比较近,成功抑制了过拟合。如果在测试集上的表现都不如之前没有使用Dropout抑制过拟合的情况,那可能是因为训练次数还不够(可以理解加了Dropout之后因为随机断开了一些神经元,所以同样的训练epoch其训练强度肯定是变少了的)。
添加正则化项抑制过拟合
L1:
l
o
s
s
=
λ
⋅
s
u
m
(
∣
w
i
∣
)
+
o
l
d
l
o
s
s
loss = \lambda \cdot sum(|w_{i}|)+oldloss
loss=λ⋅sum(∣wi∣)+oldloss
L2:
l
o
s
s
=
λ
⋅
s
u
m
(
w
i
2
)
+
o
l
d
l
o
s
s
loss = \lambda \cdot sum(w_{i}^2)+oldloss
loss=λ⋅sum(wi2)+oldloss
from keras import regularizers
model = keras.Sequential()
# 在layer上添加L2正则的参数,则这一层中的参数将被计算到正则化项加到loss里,这里为其设置Weight Decay
model.add(layers.Dense(128, kernel_regularizer=regularizers.l2(0.005), input_dim=15, activation='relu'))
model.add(layers.Dense(128, kernel_regularizer=regularizers.l2(0.005), activation='relu'))
model.add(layers.Dense(128, kernel_regularizer=regularizers.l2(0.005), activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))
model.compile(
optimizer='adam',
loss='binary_crossentropy',
metrics=['acc']
)
history = model.fit(x_train, y_train, epochs=1000, validation_data=(x_test, y_test), verbose=0)
model.evaluate(x_train, y_train)
489/489 [==============================] - 0s 33us/step
[0.3962598642932369, 0.8404907977897941]
model.evaluate(x_test, y_test)
164/164 [==============================] - 0s 43us/step
[0.7071414546268743, 0.6951219512195121]
plt.plot(history.epoch, history.history.get('val_acc'), c='g', label='validation acc')
plt.plot(history.epoch, history.history.get('acc'), c='b', label='train acc')
plt.legend()
<matplotlib.legend.Legend at 0x18208208>
Performance不好可能是训练次数不够,或者网络的超参数选择的不好。