Keras实现学习率,优化器,损失函数调优

1.样本不均衡

1.1 定义focalloss

在这里插入图片描述

import keras
from keras import backend as K
import tensorflow as tf

# Define our custom loss function
def focal_loss(y_true, y_pred):
   gamma = 2.0, alpha = 0.25
   pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))
   pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))
   return -K.sum(alpha * K.pow(1. - pt_1, gamma) * K.log(pt_1))-K.sum((1-alpha) * K.pow( pt_0, gamma) * K.log(1. - pt_0))

# Compile our model
adam = Adam(lr=5e-5)
model.compile(
	loss=[focal_loss], 
	metrics=["accuracy"], 
	optimizer=adam
	)

1.2 权值均衡

权值均衡是在训练样本的时候,在计算loss的时候,通过权值来均衡数据的分布。正常情况下,每个类别在损失函数中的权值是1.0。但是有时候,当某些类别特别重要的时候,我们需要给该类别的训练样本更大权值。参考我们的买房的例子,由于“买”的类别的准确率非常的重要,训练样本中的这个类别应该对损失函数有更大的影响。

可以直接给对应的类别的样本的loss乘上一个因子来设定权值。在Keras中,我们可以这样:

import keras
class_weight = {"buy": 0.75,
               "don't buy": 0.25}
model.fit(X_train, Y_train, epochs=10, batch_size=32, class_weight=class_weight)

2.定义动态学习率



import keras.backend as K
from keras.callbacks import LearningRateScheduler

# 动态调整学习率lr
def scheduler(epoch):
	lr = K.get_value(model.optimizer.lr)
    # // 取整除 - 向下取接近商的整数
    # ** 幂 - 返回x的y次幂
    K.set_value(model.optimizer.lr, lr * (0.1 ** (epoch // lr_epochs)))
    return K.get_value(model.optimizer.lr)

lr_new = LearningRateScheduler(scheduler)
model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, callbacks=[lr_new], validation_data=(x_test, y_test))

而 scheduler 函数并没有具体的描述,但可在其内部,通过 K.get_value(model.optimizer.lr) 获取当前学习率,用 K.set_value(model.optimizer.lr, lr * (0.1 ** (epoch // lr_epochs))) 修改学习率,而后将其加入LearningRateScheduler在模型训练的函数中需要添加回调 callbacks=[lr_new],以实现学习率的动态调整。至此,在模型的训练过程中,将会以每一epoch的周期自动修改学习率,
代码实现如下,其中的具体参数上图已作详细说明,就不再赘述,但同样是使用了回调函数,所以在模型训练的函数中要添加 callbacks=[checkpoint](若涉及准确率,需要在模型设置时,添加 metrics=[‘accuracy’] 参数)

model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy'])
# 保存最佳模型
filepath = 'weights.{epoch:02d}-{val_loss:.2f}.hdf5'
checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
history = model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, callbacks=[checkpoint], validation_data=(x_test, y_test))

  • 学习率的衰减有基于时间,基于步数的衰减,详情可以参考:
  1. Keras 自适应Learning Rate (LearningRateScheduler)
  2. Keras中那些学习率衰减策略

3. 优化器的选择

3.1 AdamW

AdamW: Adam with Weight decay

import tensorflow as tf
import os
from tensorflow_addons.optimizers import AdamW

import numpy as np

from tensorflow.python.keras import backend as K
from tensorflow.python.util.tf_export import keras_export
from tensorflow.keras.callbacks import Callback


def lr_schedule(epoch):
    """Learning Rate Schedule
    Learning rate is scheduled to be reduced after 20, 30 epochs.
    Called automatically every epoch as part of callbacks during training.
    # Arguments
        epoch (int): The number of epochs
    # Returns
        lr (float32): learning rate
    """
    lr = 1e-3

    if epoch >= 30:
        lr *= 1e-2
    elif epoch >= 20:
        lr *= 1e-1
    print('Learning rate: ', lr)
    return lr


def wd_schedule(epoch):
    """Weight Decay Schedule
    Weight decay is scheduled to be reduced after 20, 30 epochs.
    Called automatically every epoch as part of callbacks during training.
    # Arguments
        epoch (int): The number of epochs
    # Returns
        wd (float32): weight decay
    """
    wd = 1e-4

    if epoch >= 30:
        wd *= 1e-2
    elif epoch >= 20:
        wd *= 1e-1
    print('Weight decay: ', wd)
    return wd


# just copy the implement of LearningRateScheduler, and then change the lr with weight_decay
@keras_export('keras.callbacks.WeightDecayScheduler')
class WeightDecayScheduler(Callback):
    """Weight Decay Scheduler.

    Arguments:
        schedule: a function that takes an epoch index as input
            (integer, indexed from 0) and returns a new
            weight decay as output (float).
        verbose: int. 0: quiet, 1: update messages.

    ```python
    # This function keeps the weight decay at 0.001 for the first ten epochs
    # and decreases it exponentially after that.
    def scheduler(epoch):
      if epoch < 10:
        return 0.001
      else:
        return 0.001 * tf.math.exp(0.1 * (10 - epoch))

    callback = WeightDecayScheduler(scheduler)
    model.fit(data, labels, epochs=100, callbacks=[callback],
              validation_data=(val_data, val_labels))
    ```
    """

    def __init__(self, schedule, verbose=0):
        super(WeightDecayScheduler, self).__init__()
        self.schedule = schedule
        self.verbose = verbose

    def on_epoch_begin(self, epoch, logs=None):
        if not hasattr(self.model.optimizer, 'weight_decay'):
            raise ValueError('Optimizer must have a "weight_decay" attribute.')
        try:  # new API
            weight_decay = float(K.get_value(self.model.optimizer.weight_decay))
            weight_decay = self.schedule(epoch, weight_decay)
        except TypeError:  # Support for old API for backward compatibility
            weight_decay = self.schedule(epoch)
        if not isinstance(weight_decay, (float, np.float32, np.float64)):
            raise ValueError('The output of the "schedule" function '
                             'should be float.')
        K.set_value(self.model.optimizer.weight_decay, weight_decay)
        if self.verbose > 0:
            print('\nEpoch %05d: WeightDecayScheduler reducing weight '
                  'decay to %s.' % (epoch + 1, weight_decay))

    def on_epoch_end(self, epoch, logs=None):
        logs = logs or {}
        logs['weight_decay'] = K.get_value(self.model.optimizer.weight_decay)


if __name__ == '__main__':
    os.environ["CUDA_VISIBLE_DEVICES"] = '1'

    gpus = tf.config.experimental.list_physical_devices(device_type='GPU')
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, enable=True)
    print(gpus)
    cifar10 = tf.keras.datasets.cifar10

    (x_train, y_train), (x_test, y_test) = cifar10.load_data()
    x_train, x_test = x_train / 255.0, x_test / 255.0

    model = tf.keras.models.Sequential([
        tf.keras.layers.Conv2D(16, (3, 3), padding='same', activation='relu', input_shape=(32, 32, 3)),
        tf.keras.layers.AveragePooling2D(),
        tf.keras.layers.Conv2D(32, (3, 3), padding='same', activation='relu'),
        tf.keras.layers.AveragePooling2D(),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(10, activation='softmax')
    ])

    optimizer = AdamW(learning_rate=lr_schedule(0), weight_decay=wd_schedule(0))
    # optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)
    
    #定义callback
    tb_callback = tf.keras.callbacks.TensorBoard(os.path.join('logs', 'adamw'),
                                                 profile_batch=0)
    lr_callback = tf.keras.callbacks.LearningRateScheduler(lr_schedule)
    wd_callback = WeightDecayScheduler(wd_schedule)

    model.compile(optimizer=optimizer,
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    #将callback使用起来
    model.fit(x_train, y_train, epochs=40, validation_split=0.1,
              callbacks=[tb_callback, lr_callback, wd_callback])

    model.evaluate(x_test, y_test, verbose=2)

参考优化方法总结以及Adam存在的问题(SGD, Momentum, AdaDelta, Adam, AdamW,LazyAdam)

4.代码实现

from keras.datasets import mnist
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import Dense
from keras import optimizers

import keras.backend as K
from keras.callbacks import LearningRateScheduler, ModelCheckpoint

#定义focalloss
def focal_loss(y_true, y_pred):
	gamma=2.0
	alpha=0.25
	pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))
	pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))
	return -K.sum(alpha * K.pow(1. - pt_1, gamma) * K.log(pt_1))-K.sum((1-alpha) * K.pow( pt_0, gamma) * K.log(1. - pt_0))


def main(batch_size=100, test_batch_size=100, lr=1e-3, momentum=0.9, decay=1e-5, epochs=10, lr_epochs=20):
    # 载入数据
    (x_train, y_train), (x_test, y_test) = mnist.load_data()  # 自动从网络上下载
    # x_shape: (60000, 28, 28)
    # y_shape: (60000,)

    # 数据格式转换,归一
    # -1自动转换合适的数列
    # (60000, 28, 28) -> (60000, 784)
    x_train = x_train.reshape(x_train.shape[0], -1) / 255.0
    x_test = x_test.reshape(x_test.shape[0], -1) / 255.0
    # 转换one_hot格式,num_classes种类,数字有10个
    y_train = np_utils.to_categorical(y_train, num_classes=10)
    y_test = np_utils.to_categorical(y_test, num_classes=10)

    # 创建模型,输入784个神经元,输出10个神经元
    # bias_initializer偏置值初始化
    model = Sequential([Dense(units=10, input_dim=784, bias_initializer='one', activation='softmax')])

    # 定义优化器
    # sgd = optimizers.SGD(lr=lr, momentum=momentum, decay=decay, nesterov=True)
    adam = optimizers.Adam(lr=lr, beta_1=0.9, beta_2=0.999, epsilon=None, decay=decay, amsgrad=False)

    # metrices=['accuracy']准确率
    # categorical_crossentropy交叉熵
    model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy'])

    # 动态调整学习率lr
    def scheduler(epoch):
        lr = K.get_value(model.optimizer.lr)
        # // 取整除 - 向下取接近商的整数
        # ** 幂 - 返回x的y次幂
        K.set_value(model.optimizer.lr, lr * (0.1 ** (epoch // lr_epochs)))
        return K.get_value(model.optimizer.lr)

    lr_new = LearningRateScheduler(scheduler)

    # 保存最佳模型
    filepath = 'weights.{epoch:02d}-{val_loss:.2f}.hdf5'
    checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')

    # epochs迭代周期,图片全部训练一次为一周期
    history = model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, callbacks=[lr_new, checkpoint], validation_data=(x_test, y_test))

    # 评估模型
    loss, accuracy = model.evaluate(x_test, y_test, batch_size=test_batch_size)

    print('\nFinally Test loss:', loss, '\taccuracy:', accuracy)


if __name__ == '__main__':
    main()

  • 1
    点赞
  • 23
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值