1.样本不均衡
1.1 定义focalloss
import keras
from keras import backend as K
import tensorflow as tf
# Define our custom loss function
def focal_loss(y_true, y_pred):
gamma = 2.0, alpha = 0.25
pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))
pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))
return -K.sum(alpha * K.pow(1. - pt_1, gamma) * K.log(pt_1))-K.sum((1-alpha) * K.pow( pt_0, gamma) * K.log(1. - pt_0))
# Compile our model
adam = Adam(lr=5e-5)
model.compile(
loss=[focal_loss],
metrics=["accuracy"],
optimizer=adam
)
1.2 权值均衡
权值均衡是在训练样本的时候,在计算loss的时候,通过权值来均衡数据的分布。正常情况下,每个类别在损失函数中的权值是1.0。但是有时候,当某些类别特别重要的时候,我们需要给该类别的训练样本更大权值。参考我们的买房的例子,由于“买”的类别的准确率非常的重要,训练样本中的这个类别应该对损失函数有更大的影响。
可以直接给对应的类别的样本的loss乘上一个因子来设定权值。在Keras中,我们可以这样:
import keras
class_weight = {"buy": 0.75,
"don't buy": 0.25}
model.fit(X_train, Y_train, epochs=10, batch_size=32, class_weight=class_weight)
2.定义动态学习率
import keras.backend as K
from keras.callbacks import LearningRateScheduler
# 动态调整学习率lr
def scheduler(epoch):
lr = K.get_value(model.optimizer.lr)
# // 取整除 - 向下取接近商的整数
# ** 幂 - 返回x的y次幂
K.set_value(model.optimizer.lr, lr * (0.1 ** (epoch // lr_epochs)))
return K.get_value(model.optimizer.lr)
lr_new = LearningRateScheduler(scheduler)
model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, callbacks=[lr_new], validation_data=(x_test, y_test))
而 scheduler 函数并没有具体的描述,但可在其内部,通过 K.get_value(model.optimizer.lr) 获取当前学习率,用 K.set_value(model.optimizer.lr, lr * (0.1 ** (epoch // lr_epochs))) 修改学习率,而后将其加入LearningRateScheduler在模型训练的函数中需要添加回调 callbacks=[lr_new],以实现学习率的动态调整。至此,在模型的训练过程中,将会以每一epoch的周期自动修改学习率,
代码实现如下,其中的具体参数上图已作详细说明,就不再赘述,但同样是使用了回调函数,所以在模型训练的函数中要添加 callbacks=[checkpoint](若涉及准确率,需要在模型设置时,添加 metrics=[‘accuracy’] 参数)
model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy'])
# 保存最佳模型
filepath = 'weights.{epoch:02d}-{val_loss:.2f}.hdf5'
checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
history = model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, callbacks=[checkpoint], validation_data=(x_test, y_test))
- 学习率的衰减有基于时间,基于步数的衰减,详情可以参考:
3. 优化器的选择
3.1 AdamW
import tensorflow as tf
import os
from tensorflow_addons.optimizers import AdamW
import numpy as np
from tensorflow.python.keras import backend as K
from tensorflow.python.util.tf_export import keras_export
from tensorflow.keras.callbacks import Callback
def lr_schedule(epoch):
"""Learning Rate Schedule
Learning rate is scheduled to be reduced after 20, 30 epochs.
Called automatically every epoch as part of callbacks during training.
# Arguments
epoch (int): The number of epochs
# Returns
lr (float32): learning rate
"""
lr = 1e-3
if epoch >= 30:
lr *= 1e-2
elif epoch >= 20:
lr *= 1e-1
print('Learning rate: ', lr)
return lr
def wd_schedule(epoch):
"""Weight Decay Schedule
Weight decay is scheduled to be reduced after 20, 30 epochs.
Called automatically every epoch as part of callbacks during training.
# Arguments
epoch (int): The number of epochs
# Returns
wd (float32): weight decay
"""
wd = 1e-4
if epoch >= 30:
wd *= 1e-2
elif epoch >= 20:
wd *= 1e-1
print('Weight decay: ', wd)
return wd
# just copy the implement of LearningRateScheduler, and then change the lr with weight_decay
@keras_export('keras.callbacks.WeightDecayScheduler')
class WeightDecayScheduler(Callback):
"""Weight Decay Scheduler.
Arguments:
schedule: a function that takes an epoch index as input
(integer, indexed from 0) and returns a new
weight decay as output (float).
verbose: int. 0: quiet, 1: update messages.
```python
# This function keeps the weight decay at 0.001 for the first ten epochs
# and decreases it exponentially after that.
def scheduler(epoch):
if epoch < 10:
return 0.001
else:
return 0.001 * tf.math.exp(0.1 * (10 - epoch))
callback = WeightDecayScheduler(scheduler)
model.fit(data, labels, epochs=100, callbacks=[callback],
validation_data=(val_data, val_labels))
```
"""
def __init__(self, schedule, verbose=0):
super(WeightDecayScheduler, self).__init__()
self.schedule = schedule
self.verbose = verbose
def on_epoch_begin(self, epoch, logs=None):
if not hasattr(self.model.optimizer, 'weight_decay'):
raise ValueError('Optimizer must have a "weight_decay" attribute.')
try: # new API
weight_decay = float(K.get_value(self.model.optimizer.weight_decay))
weight_decay = self.schedule(epoch, weight_decay)
except TypeError: # Support for old API for backward compatibility
weight_decay = self.schedule(epoch)
if not isinstance(weight_decay, (float, np.float32, np.float64)):
raise ValueError('The output of the "schedule" function '
'should be float.')
K.set_value(self.model.optimizer.weight_decay, weight_decay)
if self.verbose > 0:
print('\nEpoch %05d: WeightDecayScheduler reducing weight '
'decay to %s.' % (epoch + 1, weight_decay))
def on_epoch_end(self, epoch, logs=None):
logs = logs or {}
logs['weight_decay'] = K.get_value(self.model.optimizer.weight_decay)
if __name__ == '__main__':
os.environ["CUDA_VISIBLE_DEVICES"] = '1'
gpus = tf.config.experimental.list_physical_devices(device_type='GPU')
for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu, enable=True)
print(gpus)
cifar10 = tf.keras.datasets.cifar10
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
model = tf.keras.models.Sequential([
tf.keras.layers.Conv2D(16, (3, 3), padding='same', activation='relu', input_shape=(32, 32, 3)),
tf.keras.layers.AveragePooling2D(),
tf.keras.layers.Conv2D(32, (3, 3), padding='same', activation='relu'),
tf.keras.layers.AveragePooling2D(),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(10, activation='softmax')
])
optimizer = AdamW(learning_rate=lr_schedule(0), weight_decay=wd_schedule(0))
# optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)
#定义callback
tb_callback = tf.keras.callbacks.TensorBoard(os.path.join('logs', 'adamw'),
profile_batch=0)
lr_callback = tf.keras.callbacks.LearningRateScheduler(lr_schedule)
wd_callback = WeightDecayScheduler(wd_schedule)
model.compile(optimizer=optimizer,
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
#将callback使用起来
model.fit(x_train, y_train, epochs=40, validation_split=0.1,
callbacks=[tb_callback, lr_callback, wd_callback])
model.evaluate(x_test, y_test, verbose=2)
参考优化方法总结以及Adam存在的问题(SGD, Momentum, AdaDelta, Adam, AdamW,LazyAdam)
4.代码实现
from keras.datasets import mnist
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import Dense
from keras import optimizers
import keras.backend as K
from keras.callbacks import LearningRateScheduler, ModelCheckpoint
#定义focalloss
def focal_loss(y_true, y_pred):
gamma=2.0
alpha=0.25
pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))
pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))
return -K.sum(alpha * K.pow(1. - pt_1, gamma) * K.log(pt_1))-K.sum((1-alpha) * K.pow( pt_0, gamma) * K.log(1. - pt_0))
def main(batch_size=100, test_batch_size=100, lr=1e-3, momentum=0.9, decay=1e-5, epochs=10, lr_epochs=20):
# 载入数据
(x_train, y_train), (x_test, y_test) = mnist.load_data() # 自动从网络上下载
# x_shape: (60000, 28, 28)
# y_shape: (60000,)
# 数据格式转换,归一
# -1自动转换合适的数列
# (60000, 28, 28) -> (60000, 784)
x_train = x_train.reshape(x_train.shape[0], -1) / 255.0
x_test = x_test.reshape(x_test.shape[0], -1) / 255.0
# 转换one_hot格式,num_classes种类,数字有10个
y_train = np_utils.to_categorical(y_train, num_classes=10)
y_test = np_utils.to_categorical(y_test, num_classes=10)
# 创建模型,输入784个神经元,输出10个神经元
# bias_initializer偏置值初始化
model = Sequential([Dense(units=10, input_dim=784, bias_initializer='one', activation='softmax')])
# 定义优化器
# sgd = optimizers.SGD(lr=lr, momentum=momentum, decay=decay, nesterov=True)
adam = optimizers.Adam(lr=lr, beta_1=0.9, beta_2=0.999, epsilon=None, decay=decay, amsgrad=False)
# metrices=['accuracy']准确率
# categorical_crossentropy交叉熵
model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy'])
# 动态调整学习率lr
def scheduler(epoch):
lr = K.get_value(model.optimizer.lr)
# // 取整除 - 向下取接近商的整数
# ** 幂 - 返回x的y次幂
K.set_value(model.optimizer.lr, lr * (0.1 ** (epoch // lr_epochs)))
return K.get_value(model.optimizer.lr)
lr_new = LearningRateScheduler(scheduler)
# 保存最佳模型
filepath = 'weights.{epoch:02d}-{val_loss:.2f}.hdf5'
checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
# epochs迭代周期,图片全部训练一次为一周期
history = model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, callbacks=[lr_new, checkpoint], validation_data=(x_test, y_test))
# 评估模型
loss, accuracy = model.evaluate(x_test, y_test, batch_size=test_batch_size)
print('\nFinally Test loss:', loss, '\taccuracy:', accuracy)
if __name__ == '__main__':
main()