使用神经网络训练CIFAR10

最新推荐文章于 2023-11-12 00:38:10 发布

王蒟蒻

最新推荐文章于 2023-11-12 00:38:10 发布

阅读量1k

点赞数

分类专栏： # 实战代码

本文链接：https://blog.csdn.net/weixin_41413511/article/details/119410566

版权

实战代码专栏收录该内容

7 篇文章 0 订阅

订阅专栏

下载数据

import tensorflow as tf
from tensorflow import keras

(X_train_full, y_train_full), (X_test, y_test) = keras.datasets.cifar10.load_data()

X_train = X_train_full[5000:]
y_train = y_train_full[5000:]
X_valid = X_train_full[:5000]
y_valid = y_train_full[:5000]

查看数据

训练集

X_train.shape

在这里插入图片描述
测试集

X_valid.shape

在这里插入图片描述
标签

y_train

在这里插入图片描述
查看标签分类情况

import numpy as np
from collections import Counter
Counter(y_train.flatten())

在这里插入图片描述
各类分布均匀

查看前面几张图片的样子

import matplotlib.pyplot as plt
n_rows = 4
n_cols = 7
plt.figure(figsize=(n_cols * 2, n_rows * 2))
for row in range(n_rows):
    for col in range(n_cols):
        index = n_cols * row + col
        plt.subplot(n_rows, n_cols, index + 1)#要生成4行10列，这是第index + 1个图
        plt.imshow(X_train[index], cmap="binary", interpolation="nearest")
        plt.axis('off')
        
plt.subplots_adjust(wspace=0.2, hspace=0.5)#调整子图布局
plt.show()

在这里插入图片描述

寻找最佳学习率并训练

法一

和之前的训练差不多

#设置回调函数，每个数据训练轮次，将学习率*factor
K = keras.backend

class ExponentialLearningRate(keras.callbacks.Callback):
    def __init__(self, factor):
        self.factor = factor
        self.rates = []
        self.losses = []
    def on_batch_end(self, batch, logs):
        self.rates.append(K.get_value(self.model.optimizer.lr))
        self.losses.append(logs["loss"])
        K.set_value(self.model.optimizer.lr, self.model.optimizer.lr * self.factor)

import numpy as np
keras.backend.clear_session()
np.random.seed(42)
tf.random.set_seed(42)

#设置神经网络参数
model = keras.models.Sequential()
model.add(keras.layers.Flatten(input_shape=[32, 32, 3]))
for _ in range(20):
    model.add(keras.layers.Dense(100,
                                 activation="elu",
                                 kernel_initializer="he_normal"))
model.add(keras.layers.Dense(10, activation="softmax"))


optimizer = keras.optimizers.Nadam(learning_rate=1e-5)
model.compile(loss="sparse_categorical_crossentropy",
              optimizer=optimizer,
              metrics=["accuracy"])


expon_lr = ExponentialLearningRate(factor=1.005)

history = model.fit(X_train, y_train, epochs=1,
                    validation_data=(X_valid, y_valid),
                    callbacks=[expon_lr])

plt.plot(expon_lr.rates, expon_lr.losses)
plt.gca().set_xscale('log')#设置为对数坐标
plt.hlines(min(expon_lr.losses), min(expon_lr.rates), max(expon_lr.rates))#绘制水平线，即绘制最小值的水平线
plt.axis([min(expon_lr.rates), max(expon_lr.rates), 0, expon_lr.losses[0]])#设置XY轴范围
plt.grid()#显示网格线
plt.xlabel("Learning rate")
plt.ylabel("Loss")

在这里插入图片描述

之前选取拐点学习率的一半来训练，结果正确率忽高忽低，说明学习率取太大了，后面查看书籍：绘制损失与学习率曲线，你应该首先看到它在下降。但是过一会儿学习率将过大，因此损失将重新上升：最佳学习率将比损失开始攀升的点低一些（通常比转折点低约10倍）

取lr=4e-4结果还行
对比于答案中找到的5e-5，算是局部最小值

开始训练

keras.backend.clear_session()
np.random.seed(42)
tf.random.set_seed(42)

#设置神经网络参数
model = keras.models.Sequential()
model.add(keras.layers.Flatten(input_shape=[32, 32, 3]))
for _ in range(20):
    model.add(keras.layers.Dense(100,
                                 activation="elu",
                                 kernel_initializer="he_normal"))
model.add(keras.layers.Dense(10, activation="softmax"))

optimizer = keras.optimizers.Nadam(lr=4e-4)
model.compile(loss="sparse_categorical_crossentropy",
              optimizer=optimizer,
              metrics=["accuracy"])

#设置保存路径
import os
run_index = 1 # increment every time you train the model
run_logdir = os.path.join(os.curdir, "my_cifar10_logs", "run_{:03d}".format(run_index))

#设置回调函数
early_stopping_cb = keras.callbacks.EarlyStopping(patience=20)
model_checkpoint_cb = keras.callbacks.ModelCheckpoint("my_cifar10_model.h5", save_best_only=True)
tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)
callbacks = [early_stopping_cb, model_checkpoint_cb, tensorboard_cb]

model.fit(X_train, y_train, epochs=100,
          validation_data=(X_valid, y_valid),
          callbacks=callbacks)

部分截图

在这里插入图片描述

达到83次才找到最优，说明这个学习率还是太大

测试

model = keras.models.load_model("my_cifar10_model.h5")
model.evaluate(X_valid, y_valid)

在这里插入图片描述

法二

使用网格化搜索

keras.backend.clear_session()
np.random.seed(42)
tf.random.set_seed(42)

def build_model(learning_rate=1e-5):
    model = keras.models.Sequential()
    model.add(keras.layers.Flatten(input_shape=[32, 32, 3]))
    for layer in range(20):
        model.add(keras.layers.Dense(100,
                                     activation="elu",
                                     kernel_initializer="he_normal"))
    model.add(keras.layers.Dense(10, activation="softmax"))
    
    optimizer = keras.optimizers.Nadam(learning_rate=learning_rate)
    model.compile(loss="sparse_categorical_crossentropy",
                  optimizer=optimizer,
                  metrics=["accuracy"])
    return model


keras_reg = keras.wrappers.scikit_learn.KerasRegressor(build_model)

from scipy.stats import reciprocal
from sklearn.model_selection import GridSearchCV

param_distribs = {
    "learning_rate": [1e-5, 5e-5, 1e-4,5e-4, 1e-3, 5e-3,1e-2],
}

rnd_search_cv = GridSearchCV(keras_reg, param_distribs, cv=3)
rnd_search_cv.fit(X_train, y_train, epochs=10,
                  validation_data=(X_valid, y_valid),
                  callbacks=[keras.callbacks.EarlyStopping(patience=10)])

各参数评分

cvres = rnd_search_cv.cv_results_
for mean_score, params in zip(cvres["mean_test_score"], cvres["params"]):
    print(np.sqrt(-mean_score), params)

在这里插入图片描述

改进

添加批量归一化

keras.backend.clear_session()
tf.random.set_seed(42)
np.random.seed(42)

model = keras.models.Sequential()
model.add(keras.layers.Flatten(input_shape=[32, 32, 3]))
model.add(keras.layers.BatchNormalization())#
for _ in range(20):
    model.add(keras.layers.Dense(100, kernel_initializer="he_normal"))
    model.add(keras.layers.BatchNormalization())#
    model.add(keras.layers.Activation("elu"))
model.add(keras.layers.Dense(10, activation="softmax"))

optimizer = keras.optimizers.Nadam(lr=4e-4)
model.compile(loss="sparse_categorical_crossentropy",
              optimizer=optimizer,
              metrics=["accuracy"])

early_stopping_cb = keras.callbacks.EarlyStopping(patience=20)
model_checkpoint_cb = keras.callbacks.ModelCheckpoint("my_cifar10_bn_model.h5", save_best_only=True)
run_index = 1 # increment every time you train the model
run_logdir = os.path.join(os.curdir, "my_cifar10_logs", "run_bn_{:03d}".format(run_index))
tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)
callbacks = [early_stopping_cb, model_checkpoint_cb, tensorboard_cb]

model.fit(X_train, y_train, epochs=100,
          validation_data=(X_valid, y_valid),
          callbacks=callbacks)

model = keras.models.load_model("my_cifar10_bn_model.h5")
model.evaluate(X_valid, y_valid)

在这里插入图片描述很快就达到比上个模型好的准确率
但是单轮训练时间变久了
最后准确率可以达到54.6%

书本做出来后的比较
Is the model converging faster than before?

Much faster! The previous model took 27 epochs to reach the lowest validation loss, while the new model achieved that same loss in just 5 epochs and continued to make progress until the 16th epoch. The BN layers stabilized training and allowed us to use a much larger learning rate, so convergence was faster.

Does BN produce a better model?

Yes! The final model is also much better, with 54.0% accuracy instead of 47.6%. It’s still not a very good model, but at least it’s much better than before (a Convolutional Neural Network would do much better, but that’s a different topic, see chapter 14).

How does BN affect training speed?

Although the model converged much faster, each epoch took about 12s instead of 8s, because of the extra computations required by the BN layers. But overall the training time (wall time) was shortened significantly!

使用SELU替换批量归一化

使用SELU需要对输入进行处理，注意SELU的使用条件

keras.backend.clear_session()
tf.random.set_seed(42)
np.random.seed(42)

model = keras.models.Sequential()
model.add(keras.layers.Flatten(input_shape=[32, 32, 3]))
for _ in range(20):
    model.add(keras.layers.Dense(100,
                                 kernel_initializer="lecun_normal",
                                 activation="selu"))
model.add(keras.layers.Dense(10, activation="softmax"))

optimizer = keras.optimizers.Nadam(lr=7e-4)
model.compile(loss="sparse_categorical_crossentropy",
              optimizer=optimizer,
              metrics=["accuracy"])

early_stopping_cb = keras.callbacks.EarlyStopping(patience=20)
model_checkpoint_cb = keras.callbacks.ModelCheckpoint("my_cifar10_selu_model.h5", save_best_only=True)
run_index = 1 # increment every time you train the model
run_logdir = os.path.join(os.curdir, "my_cifar10_logs", "run_selu_{:03d}".format(run_index))
tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)
callbacks = [early_stopping_cb, model_checkpoint_cb, tensorboard_cb]

X_means = X_train.mean(axis=0)
X_stds = X_train.std(axis=0)
X_train_scaled = (X_train - X_means) / X_stds
X_valid_scaled = (X_valid - X_means) / X_stds
X_test_scaled = (X_test - X_means) / X_stds

model.fit(X_train_scaled, y_train, epochs=100,
          validation_data=(X_valid_scaled, y_valid),
          callbacks=callbacks)

model = keras.models.load_model("my_cifar10_selu_model.h5")
model.evaluate(X_valid_scaled, y_valid)

在这里插入图片描述
书本做出来的比较

We get 47.9% accuracy, which is not much better than the original model (47.6%), and not as good as the model using batch normalization (54.0%). However, convergence was almost as fast as with the BN model, plus each epoch took only 7 seconds. So it’s by far the fastest model to train so far.

对上个模型添加简单的Dropout正则化模型

droupout在训练期间，它会随机丢弃一些输入（将它们设置为0），然后将其余输入除以保留概率。训练之后，它什么都不做，只是将输入传递到下一层

keras.backend.clear_session()
tf.random.set_seed(42)
np.random.seed(42)

model = keras.models.Sequential()
model.add(keras.layers.Flatten(input_shape=[32, 32, 3]))
for _ in range(20):
    model.add(keras.layers.Dense(100,
                                 kernel_initializer="lecun_normal",
                                 activation="selu"))

model.add(keras.layers.AlphaDropout(rate=0.1))#
model.add(keras.layers.Dense(10, activation="softmax"))

optimizer = keras.optimizers.Nadam(lr=5e-4)
model.compile(loss="sparse_categorical_crossentropy",
              optimizer=optimizer,
              metrics=["accuracy"])

early_stopping_cb = keras.callbacks.EarlyStopping(patience=20)
model_checkpoint_cb = keras.callbacks.ModelCheckpoint("my_cifar10_alpha_dropout_model.h5", save_best_only=True)
run_index = 1 # increment every time you train the model
run_logdir = os.path.join(os.curdir, "my_cifar10_logs", "run_alpha_dropout_{:03d}".format(run_index))
tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)
callbacks = [early_stopping_cb, model_checkpoint_cb, tensorboard_cb]

X_means = X_train.mean(axis=0)
X_stds = X_train.std(axis=0)
X_train_scaled = (X_train - X_means) / X_stds
X_valid_scaled = (X_valid - X_means) / X_stds
X_test_scaled = (X_test - X_means) / X_stds

model.fit(X_train_scaled, y_train, epochs=100,
          validation_data=(X_valid_scaled, y_valid),
          callbacks=callbacks)

model = keras.models.load_model("my_cifar10_alpha_dropout_model.h5")
model.evaluate(X_valid_scaled, y_valid)

在这里插入图片描述
有一点点提高

书本的结果

The model reaches 48.9% accuracy on the validation set. That’s very slightly better than without dropout (47.6%). With an extensive hyperparameter search, it might be possible to do better (I tried dropout rates of 5%, 10%, 20% and 40%, and learning rates 1e-4, 3e-4, 5e-4, and 1e-3), but probably not much better in this case.

MC Dropout

class MCAlphaDropout(keras.layers.AlphaDropout):
    def call(self, inputs):
        return super().call(inputs, training=True)

#isinstance() 函数来判断一个对象是否是一个已知的类型
#遍历所有层，是dropout层就执行mac dropout操作，然后添加到神经网络中，不是就直接添加
mc_model = keras.models.Sequential([
    MCAlphaDropout(layer.rate) if isinstance(layer, keras.layers.AlphaDropout) else layer
    for layer in model.layers
])

def mc_dropout_predict_probas(mc_model, X, n_samples=10):
    Y_probas = [mc_model.predict(X) for sample in range(n_samples)]
    return np.mean(Y_probas, axis=0)

def mc_dropout_predict_classes(mc_model, X, n_samples=10):
    Y_probas = mc_dropout_predict_probas(mc_model, X, n_samples)
    return np.argmax(Y_probas, axis=1)

keras.backend.clear_session()
tf.random.set_seed(42)
np.random.seed(42)

y_pred = mc_dropout_predict_classes(mc_model, X_valid_scaled)
accuracy = np.mean(y_pred == y_valid[:, 0])#x[:,0]这个写法就是取第0列，求均值相当于是正确率
accuracy

书本结果

We get 47.9% accuracy, which is not much better than the original model (47.6%), and not as good as the model using batch normalization (54.0%). However, convergence was almost as fast as with the BN model, plus each epoch took only 7 seconds. So it’s by far the fastest model to train so far.

使用1周期调度训练模型

最大学习率通过和上面一样会曲线图求解出
得0.05

keras.backend.clear_session()
tf.random.set_seed(42)
np.random.seed(42)

model = keras.models.Sequential()
model.add(keras.layers.Flatten(input_shape=[32, 32, 3]))
for _ in range(20):
    model.add(keras.layers.Dense(100,
                                 kernel_initializer="lecun_normal",
                                 activation="selu"))

model.add(keras.layers.AlphaDropout(rate=0.1))
model.add(keras.layers.Dense(10, activation="softmax"))

optimizer = keras.optimizers.SGD(lr=1e-2)
model.compile(loss="sparse_categorical_crossentropy",
              optimizer=optimizer,
              metrics=["accuracy"])

class OneCycleScheduler(keras.callbacks.Callback):
    def __init__(self, iterations, max_rate, start_rate=None,
                 last_iterations=None, last_rate=None):
        self.iterations = iterations
        #print(iterations)#8800
        self.max_rate = max_rate
        #print(max_rate)  #0.05
        self.start_rate = start_rate or max_rate / 10 #Python里面 /表示除法， //表示整除
        #print(self.start_rate)#0.005
        self.last_iterations = last_iterations or iterations // 10 + 1
        #print(self.last_iterations) #881
        self.half_iteration = (iterations - self.last_iterations) // 2
        #print(self.half_iteration) #3959
        #print(2 * self.half_iteration)#7918
        self.last_rate = last_rate or self.start_rate / 1000
        #print(self.last_rate) #5e-06
        self.iteration = 0
    def _interpolate(self, iter1, iter2, rate1, rate2):
        return ((rate2 - rate1) * (self.iteration - iter1)
                / (iter2 - iter1) + rate1)
    def on_batch_begin(self, batch, logs):
        if self.iteration < self.half_iteration:
            rate = self._interpolate(0, self.half_iteration, self.start_rate, self.max_rate)
        elif self.iteration < 2 * self.half_iteration:
            rate = self._interpolate(self.half_iteration, 2 * self.half_iteration,
                                     self.max_rate, self.start_rate)
        else:
            rate = self._interpolate(2 * self.half_iteration, self.iterations,
                                     self.start_rate, self.last_rate)
              
        self.iteration += 1
        K.set_value(self.model.optimizer.lr, rate)

#从0.005到0.05然后降回0.005再往下降到1.0663265306122723e-05

import math
n_epochs = 25
batch_size = 128
#math.ceil(x)返回大于等于参数x的最小整数,即对浮点数向上取整.
#math.ceil(len(X_train) / batch_size) * n_epochs总训练数据数目
#max_rate 最大学习率
onecycle = OneCycleScheduler(math.ceil(len(X_train) / batch_size) * n_epochs, max_rate=0.05)
history = model.fit(X_train_scaled, y_train, epochs=n_epochs, batch_size=batch_size,
                    validation_data=(X_valid_scaled, y_valid),
                    callbacks=[onecycle])

在这里插入图片描述

书本结果

One cycle allowed us to train the model in just 15 epochs, each taking only 2 seconds (thanks to the larger batch size). This is several times faster than the fastest model we trained so far. Moreover, we improved the model’s performance (from 47.6% to 52.0%). The batch normalized model reaches a slightly better performance (54%), but it’s much slower to train.

王蒟蒻

关注

0
点赞
踩
4

收藏

觉得还不错? 一键收藏
打赏
0
评论
使用神经网络训练CIFAR10

下载数据import tensorflow as tffrom tensorflow import keras(X_train_full, y_train_full), (X_test, y_test) = keras.datasets.cifar10.load_data()X_train = X_train_full[5000:]y_train = y_train_full[5000:]X_valid = X_train_full[:5000]y_valid = y_train_full
复制链接

扫一扫