项目要点
- 过拟合处理方式, 正则化: L1, L2, droptout, BN
- 模型创建: model = Sequential()
- 添加卷积层: model.add(Dense(32, activation='relu', input_dim=100)) # 第一层需要 input_dim
- 添加dropout: model.add(Dropout(0.2))
- 添加第二次网络: model.add(Dense(512, activation='relu')) # 除了first, 其他层不用输入shape
- 添加输出层: model.add(Dense(num_classes, activation='softmax')) # last 通常使用softmax
- TensorFlow 中,使用 model.compile 方法来选择优化器和损失函数:
-
optimizer: 优化器: 主要有: tf.train.AdamOptimizer , tf.train.RMSPropOptimizer , or tf.train.GradientDescentOptimizer .
-
loss: 损失函数: 主要有:mean square error (mse, 回归), categorical_crossentropy (多分类) , and binary_crossentropy (二分类).
-
metrics: 算法的评估标准, 一般分类用 accuracy.
-
- model.fit(x_train, y_train, batch_size = 64, epochs = 20, validation_data = (x_test, y_test)) # 模型训练
- score = model.evaluate(x_test, y_test, verbose=0) 两个返回值: [ 损失率 , 准确率 ]
- 画背景网格: plt.grid()
1 实例演示Keras的使用 (手写数字识别)
1.1 导包
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import rmsprop_v2
1.2 导入数据
# 导入手写数字数据集
(x_train, y_train), (x_test, y_test) = mnist.load_data()
print(x_train.shape, y_train.shape, x_test.shape, y_test.shape)
'''(60000, 28, 28) (60000,) (10000, 28, 28) (10000,)'''
import matplotlib.pyplot as plt
plt.imshow(x_train[0], cmap = 'gray')
1.3 数据初步处理
# 对数据进行初步处理
x_train = x_train.reshape(60000, 784)
x_test = x_test.reshape(10000, 784)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print(x_train.shape, 'train samples') # (60000, 784) train samples
print(x_test.shape, 'test samples') # (10000, 784) test samples
1.4 数据初步处理
- 独热编码
import tensorflow
# 将标记结果转化为独热编码
num_classes = 10
y_train = tensorflow.keras.utils.to_categorical(y_train, num_classes)
y_test = tensorflow.keras.utils.to_categorical(y_test, num_classes)
y_train
1.5 创建模型
# 创建顺序模型
model = Sequential()
# 添加第一层网络, 512个神经元, 激活函数为relu
model.add(Dense(512, activation='relu', input_shape=(784,)))
# 添加Dropout
model.add(Dropout(0.2))
# 第二层网络
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.2))
# 输出层
model.add(Dense(num_classes, activation='softmax'))
# 打印神经网络参数情况
model.summary()
1.6 模型训练
# 编译
model.compile(loss='categorical_crossentropy',
optimizer='rmsprop',
metrics=['accuracy'])
batch_size = 128
epochs = 20
# 训练并打印中间过程
history = model.fit(x_train, y_train,
batch_size=batch_size,
epochs=epochs,
verbose=1,
validation_data=(x_test, y_test))
# 计算预测数据的准确率
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0]) # Test loss: 0.14742641150951385
print('Test accuracy:', score[1]) # Test accuracy: 0.9815000295639038
2 模型优化
2.1 标准化原始数据
# 标准化处理 # x - u /s
# 神经网络要求传入数据是二维的, 也可以指定为三维
from sklearn.preprocessing import StandardScaler # standardscaler
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train.astype(np.float32).reshape(-1, 1)).reshape(-1, 28, 28)
x_valid_scaled = scaler.transform(x_valid.astype(np.float32).reshape(-1, 1)).reshape(-1, 28, 28)
x_test_scaled = scaler.transform(x_test.astype(np.float32).reshape(-1, 1)).reshape(-1, 28, 28)
print(x_train_scaled.shape, x_valid_scaled.shape, x_test_scaled.shape)
# (55000, 28, 28) (5000, 28, 28) (10000, 28, 28)
2.2 画图显示
import pandas as pd
pd.DataFrame(history.history).plot(figsize = (8, 5))
plt.grid()
plt.gca().set_ylim(0, 1)
plt.show()
2.3 添加L1正则化
- 过拟合处理方式, 正则化: L1, L2, droptout, BN
model = tf.keras.models.Sequential()
# 定义输入数据的格式
model.add(tf.keras.layers.Flatten(input_shape = [28, 28])) # 定义模型
model.add(tf.keras.layers.Dense(512, activation = 'relu', kernel_regularizer = 'l1'))
model.add(tf.keras.layers.Dense(256, activation = 'relu', kernel_regularizer = 'l1'))
model.add(tf.keras.layers.Dense(10, activation = 'softmax'))
model.compile(loss= 'sparse_categorical_crossentropy',
optimizer = 'sgd',
metrics = ['accuracy'])
histroy = model.fit(x_train_scaled, y_train, epochs = 10,
validation_data = (x_valid_scaled, y_valid))
2.4 添加L2正则化
model = tf.keras.models.Sequential()
# 定义输入数据的格式
model.add(tf.keras.layers.Flatten(input_shape = [28, 28])) # 定义模型
model.add(tf.keras.layers.Dense(512, activation = 'relu', kernel_regularizer = 'l2'))
model.add(tf.keras.layers.Dense(256, activation = 'relu', kernel_regularizer = 'l2'))
model.add(tf.keras.layers.Dense(10, activation = 'softmax'))
model.compile(loss= 'sparse_categorical_crossentropy',
optimizer = 'sgd',
metrics = ['accuracy'])
histroy = model.fit(x_train_scaled, y_train, epochs = 10,
validation_data = (x_valid_scaled, y_valid))
- 还是L2效果好一些
2.5 添加Dropout层
model = tf.keras.models.Sequential()
# 定义输入数据的格式
model.add(tf.keras.layers.Flatten(input_shape = [28, 28])) # 定义模型
model.add(tf.keras.layers.Dense(512, activation = 'relu'))
# 使用dropout效果较好, 常用, 按比例随机去除部分因子, 防止过拟合
model.add(tf.keras.layers.Dropout(0.2))
model.add(tf.keras.layers.Dense(256, activation = 'relu'))
model.add(tf.keras.layers.Dense(10, activation = 'softmax'))
model.compile(loss= 'sparse_categorical_crossentropy',
optimizer = 'sgd',
metrics = ['accuracy'])
histroy = model.fit(x_train_scaled, y_train, epochs = 10,
validation_data = (x_valid_scaled, y_valid))
2.6 添加AlphaDropout 层
model = tf.keras.models.Sequential()
# 定义输入数据的格式
model.add(tf.keras.layers.Flatten(input_shape = [28, 28])) # 定义模型
model.add(tf.keras.layers.Dense(512, activation = 'relu'))
# 使用dropout效果较好, 常用, 按比例随机去除部分因子, 防止过拟合
# 改进版的dropout: 均值方差不变, 归一化性质也不变
model.add(tf.keras.layers.AlphaDropout(0.2))
model.add(tf.keras.layers.Dense(256, activation = 'relu'))
model.add(tf.keras.layers.Dense(20, activation = 'softmax'))
model.compile(loss= 'sparse_categorical_crossentropy',
optimizer = 'sgd',
metrics = ['accuracy'])
histroy = model.fit(x_train_scaled, y_train, epochs = 10,
validation_data = (x_valid_scaled, y_valid))
2.7 添加BN层
# BN, 现在默认加BN层
model = tf.keras.models.Sequential()
# 定义输入数据的格式
model.add(tf.keras.layers.Flatten(input_shape = [28, 28])) # 定义模型
model.add(tf.keras.layers.Dense(512, activation = 'relu'))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dense(256, activation = 'relu'))
model.add(tf.keras.layers.Dense(10, activation = 'softmax'))
model.compile(loss= 'sparse_categorical_crossentropy',
optimizer = 'sgd',
metrics = ['accuracy'])
histroy = model.fit(x_train_scaled, y_train, epochs = 10,
validation_data = (x_valid_scaled, y_valid))
- BN层放在激活函数之前
# BN, 现在默认加BN层, 将BN放在激活前
model = tf.keras.models.Sequential()
# 定义输入数据的格式 # 定义模型
model.add(tf.keras.layers.Flatten(input_shape = [28, 28]))
model.add(tf.keras.layers.Dense(512))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Activation('relu'))
model.add(tf.keras.layers.Dense(256))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Activation('relu'))
model.add(tf.keras.layers.Dense(10, activation = 'softmax'))
model.compile(loss= 'sparse_categorical_crossentropy',
optimizer = 'sgd',
metrics = ['accuracy'])
histroy = model.fit(x_train_scaled, y_train, epochs = 10,
validation_data = (x_valid_scaled, y_valid))
2.8 生成神经网络
# deep nerual network
model = tf.keras.models.Sequential()
# 定义输入数据的格式
model.add(tf.keras.layers.Flatten(input_shape = [28, 28]))
for _ in range(20):
model.add(tf.keras.layers.Dense(256))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Activation('relu'))
model.add(tf.keras.layers.Dense(10, activation = 'softmax'))
model.compile(loss= 'sparse_categorical_crossentropy',
optimizer = 'sgd',
metrics = ['accuracy'])
histroy = model.fit(x_train_scaled, y_train, epochs = 10,
validation_data = (x_valid_scaled, y_valid))