Keras实现学习率，优化器，损失函数调优

最新推荐文章于 2022-10-29 11:07:33 发布

望长安于日下

最新推荐文章于 2022-10-29 11:07:33 发布

阅读量2.2k

点赞数 1

分类专栏： NLP

本文链接：https://blog.csdn.net/qq_26884501/article/details/119352338

版权

NLP 专栏收录该内容

16 篇文章 1 订阅

订阅专栏

文章目录

1.样本不均衡
- 1.1 定义focalloss
- 1.2 权值均衡
2.定义动态学习率
3. 优化器的选择
- 3.1 AdamW
4.代码实现

1.样本不均衡

1.1 定义focalloss

在这里插入图片描述

import keras
from keras import backend as K
import tensorflow as tf

# Define our custom loss function
def focal_loss(y_true, y_pred):
   gamma = 2.0, alpha = 0.25
   pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))
   pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))
   return -K.sum(alpha * K.pow(1. - pt_1, gamma) * K.log(pt_1))-K.sum((1-alpha) * K.pow( pt_0, gamma) * K.log(1. - pt_0))

# Compile our model
adam = Adam(lr=5e-5)
model.compile(
	loss=[focal_loss], 
	metrics=["accuracy"], 
	optimizer=adam
	)

1.2 权值均衡

权值均衡是在训练样本的时候，在计算loss的时候，通过权值来均衡数据的分布。正常情况下，每个类别在损失函数中的权值是1.0。但是有时候，当某些类别特别重要的时候，我们需要给该类别的训练样本更大权值。参考我们的买房的例子，由于“买”的类别的准确率非常的重要，训练样本中的这个类别应该对损失函数有更大的影响。

可以直接给对应的类别的样本的loss乘上一个因子来设定权值。在Keras中，我们可以这样：

import keras
class_weight = {"buy": 0.75,
               "don't buy": 0.25}
model.fit(X_train, Y_train, epochs=10, batch_size=32, class_weight=class_weight)

2.定义动态学习率



import keras.backend as K
from keras.callbacks import LearningRateScheduler

# 动态调整学习率lr
def scheduler(epoch):
	lr = K.get_value(model.optimizer.lr)
    # // 取整除 - 向下取接近商的整数
    # ** 幂 - 返回x的y次幂
    K.set_value(model.optimizer.lr, lr * (0.1 ** (epoch // lr_epochs)))
    return K.get_value(model.optimizer.lr)

lr_new = LearningRateScheduler(scheduler)
model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, callbacks=[lr_new], validation_data=(x_test, y_test))

而 scheduler 函数并没有具体的描述，但可在其内部，通过 K.get_value(model.optimizer.lr) 获取当前学习率，用 K.set_value(model.optimizer.lr, lr * (0.1 ** (epoch // lr_epochs))) 修改学习率，而后将其加入LearningRateScheduler在模型训练的函数中需要添加回调 callbacks=[lr_new]，以实现学习率的动态调整。至此，在模型的训练过程中，将会以每一epoch的周期自动修改学习率，
代码实现如下，其中的具体参数上图已作详细说明，就不再赘述，但同样是使用了回调函数，所以在模型训练的函数中要添加 callbacks=[checkpoint]（若涉及准确率，需要在模型设置时，添加 metrics=[‘accuracy’] 参数）

model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy'])
# 保存最佳模型
filepath = 'weights.{epoch:02d}-{val_loss:.2f}.hdf5'
checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
history = model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, callbacks=[checkpoint], validation_data=(x_test, y_test))

学习率的衰减有基于时间，基于步数的衰减，详情可以参考：

3. 优化器的选择

3.1 AdamW

AdamW: Adam with Weight decay

import tensorflow as tf
import os
from tensorflow_addons.optimizers import AdamW

import numpy as np

from tensorflow.python.keras import backend as K
from tensorflow.python.util.tf_export import keras_export
from tensorflow.keras.callbacks import Callback


def lr_schedule(epoch):
    """Learning Rate Schedule
    Learning rate is scheduled to be reduced after 20, 30 epochs.
    Called automatically every epoch as part of callbacks during training.
    # Arguments
        epoch (int): The number of epochs
    # Returns
        lr (float32): learning rate
    """
    lr = 1e-3

    if epoch >= 30:
        lr *= 1e-2
    elif epoch >= 20:
        lr *= 1e-1
    print('Learning rate: ', lr)
    return lr


def wd_schedule(epoch):
    """Weight Decay Schedule
    Weight decay is scheduled to be reduced after 20, 30 epochs.
    Called automatically every epoch as part of callbacks during training.
    # Arguments
        epoch (int): The number of epochs
    # Returns
        wd (float32): weight decay
    """
    wd = 1e-4

    if epoch >= 30:
        wd *= 1e-2
    elif epoch >= 20:
        wd *= 1e-1
    print('Weight decay: ', wd)
    return wd


# just copy the implement of LearningRateScheduler, and then change the lr with weight_decay
@keras_export('keras.callbacks.WeightDecayScheduler')
class WeightDecayScheduler(Callback):
    """Weight Decay Scheduler.

    Arguments:
        schedule: a function that takes an epoch index as input
            (integer, indexed from 0) and returns a new
            weight decay as output (float).
        verbose: int. 0: quiet, 1: update messages.

    ```python
    # This function keeps the weight decay at 0.001 for the first ten epochs
    # and decreases it exponentially after that.
    def scheduler(epoch):
      if epoch < 10:
        return 0.001
      else:
        return 0.001 * tf.math.exp(0.1 * (10 - epoch))

    callback = WeightDecayScheduler(scheduler)
    model.fit(data, labels, epochs=100, callbacks=[callback],
              validation_data=(val_data, val_labels))
    ```
    """

    def __init__(self, schedule, verbose=0):
        super(WeightDecayScheduler, self).__init__()
        self.schedule = schedule
        self.verbose = verbose

    def on_epoch_begin(self, epoch, logs=None):
        if not hasattr(self.model.optimizer, 'weight_decay'):
            raise ValueError('Optimizer must have a "weight_decay" attribute.')
        try:  # new API
            weight_decay = float(K.get_value(self.model.optimizer.weight_decay))
            weight_decay = self.schedule(epoch, weight_decay)
        except TypeError:  # Support for old API for backward compatibility
            weight_decay = self.schedule(epoch)
        if not isinstance(weight_decay, (float, np.float32, np.float64)):
            raise ValueError('The output of the "schedule" function '
                             'should be float.')
        K.set_value(self.model.optimizer.weight_decay, weight_decay)
        if self.verbose > 0:
            print('\nEpoch %05d: WeightDecayScheduler reducing weight '
                  'decay to %s.' % (epoch + 1, weight_decay))

    def on_epoch_end(self, epoch, logs=None):
        logs = logs or {}
        logs['weight_decay'] = K.get_value(self.model.optimizer.weight_decay)


if __name__ == '__main__':
    os.environ["CUDA_VISIBLE_DEVICES"] = '1'

    gpus = tf.config.experimental.list_physical_devices(device_type='GPU')
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, enable=True)
    print(gpus)
    cifar10 = tf.keras.datasets.cifar10

    (x_train, y_train), (x_test, y_test) = cifar10.load_data()
    x_train, x_test = x_train / 255.0, x_test / 255.0

    model = tf.keras.models.Sequential([
        tf.keras.layers.Conv2D(16, (3, 3), padding='same', activation='relu', input_shape=(32, 32, 3)),
        tf.keras.layers.AveragePooling2D(),
        tf.keras.layers.Conv2D(32, (3, 3), padding='same', activation='relu'),
        tf.keras.layers.AveragePooling2D(),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(10, activation='softmax')
    ])

    optimizer = AdamW(learning_rate=lr_schedule(0), weight_decay=wd_schedule(0))
    # optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)
    
    #定义callback
    tb_callback = tf.keras.callbacks.TensorBoard(os.path.join('logs', 'adamw'),
                                                 profile_batch=0)
    lr_callback = tf.keras.callbacks.LearningRateScheduler(lr_schedule)
    wd_callback = WeightDecayScheduler(wd_schedule)

    model.compile(optimizer=optimizer,
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    #将callback使用起来
    model.fit(x_train, y_train, epochs=40, validation_split=0.1,
              callbacks=[tb_callback, lr_callback, wd_callback])

    model.evaluate(x_test, y_test, verbose=2)

参考优化方法总结以及Adam存在的问题(SGD, Momentum, AdaDelta, Adam, AdamW，LazyAdam)

4.代码实现

from keras.datasets import mnist
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import Dense
from keras import optimizers

import keras.backend as K
from keras.callbacks import LearningRateScheduler, ModelCheckpoint

#定义focalloss
def focal_loss(y_true, y_pred):
	gamma=2.0
	alpha=0.25
	pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))
	pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))
	return -K.sum(alpha * K.pow(1. - pt_1, gamma) * K.log(pt_1))-K.sum((1-alpha) * K.pow( pt_0, gamma) * K.log(1. - pt_0))


def main(batch_size=100, test_batch_size=100, lr=1e-3, momentum=0.9, decay=1e-5, epochs=10, lr_epochs=20):
    # 载入数据
    (x_train, y_train), (x_test, y_test) = mnist.load_data()  # 自动从网络上下载
    # x_shape: (60000, 28, 28)
    # y_shape: (60000,)

    # 数据格式转换，归一
    # -1自动转换合适的数列
    # (60000, 28, 28) -> (60000, 784)
    x_train = x_train.reshape(x_train.shape[0], -1) / 255.0
    x_test = x_test.reshape(x_test.shape[0], -1) / 255.0
    # 转换one_hot格式，num_classes种类，数字有10个
    y_train = np_utils.to_categorical(y_train, num_classes=10)
    y_test = np_utils.to_categorical(y_test, num_classes=10)

    # 创建模型，输入784个神经元，输出10个神经元
    # bias_initializer偏置值初始化
    model = Sequential([Dense(units=10, input_dim=784, bias_initializer='one', activation='softmax')])

    # 定义优化器
    # sgd = optimizers.SGD(lr=lr, momentum=momentum, decay=decay, nesterov=True)
    adam = optimizers.Adam(lr=lr, beta_1=0.9, beta_2=0.999, epsilon=None, decay=decay, amsgrad=False)

    # metrices=['accuracy']准确率
    # categorical_crossentropy交叉熵
    model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy'])

    # 动态调整学习率lr
    def scheduler(epoch):
        lr = K.get_value(model.optimizer.lr)
        # // 取整除 - 向下取接近商的整数
        # ** 幂 - 返回x的y次幂
        K.set_value(model.optimizer.lr, lr * (0.1 ** (epoch // lr_epochs)))
        return K.get_value(model.optimizer.lr)

    lr_new = LearningRateScheduler(scheduler)

    # 保存最佳模型
    filepath = 'weights.{epoch:02d}-{val_loss:.2f}.hdf5'
    checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')

    # epochs迭代周期，图片全部训练一次为一周期
    history = model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, callbacks=[lr_new, checkpoint], validation_data=(x_test, y_test))

    # 评估模型
    loss, accuracy = model.evaluate(x_test, y_test, batch_size=test_batch_size)

    print('\nFinally Test loss:', loss, '\taccuracy:', accuracy)


if __name__ == '__main__':
    main()

望长安于日下

关注

1
点赞
踩
23

收藏

觉得还不错? 一键收藏
0
评论
Keras实现学习率，优化器，损失函数调优

1.定义focallossimport kerasfrom keras import backend as Kimport tensorflow as tf# Define our custom loss functiondef focal_loss(y_true, y_pred): gamma = 2.0, alpha = 0.25 pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred)) pt_0
复制链接

扫一扫

专栏目录