本文在调参记录23的基础上,增加卷积核的个数,最少是64个,最多是256个,继续测试深度残差网络+自适应参数化ReLU激活函数在cifar10数据集上的效果。
自适应参数化ReLU激活函数被放在了残差模块的第二个卷积层之后,它的基本原理如下:
Keras程序:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Apr 14 04:17:45 2020
Implemented using TensorFlow 1.10.0 and Keras 2.2.1
Minghang Zhao, Shisheng Zhong, Xuyun Fu, Baoping Tang, Shaojiang Dong, Michael Pecht,
Deep Residual Networks with Adaptively Parametric Rectifier Linear Units for Fault Diagnosis,
IEEE Transactions on Industrial Electronics, 2020, DOI: 10.1109/TIE.2020.2972458
@author: Minghang Zhao
"""
from __future__ import print_function
import keras
import numpy as np
from keras.datasets import cifar10
from keras.layers import Dense, Conv2D, BatchNormalization, Activation, Minimum
from keras.layers import AveragePooling2D, Input, GlobalAveragePooling2D, Concatenate, Reshape
from keras.regularizers import l2
from keras import backend as K
from keras.models import Model
from keras import optimizers
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import LearningRateScheduler
K.set_learning_phase(1)
# The data, split between train and test sets
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
# Noised data
x_train = x_train.astype('float32') / 255.
x_test = x_test.astype('float32') / 255.
x_test = x_test-np.mean(x_train)
x_train = x_train-np.mean(x_train)
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, 10)
y_test = keras.utils.to_categorical(y_test, 10)
# Schedule the learning rate, multiply 0.1 every 150 epoches
def scheduler(epoch):
if epoch % 150 == 0 and epoch != 0:
lr = K.get_value(model.optimizer.lr)
K.set_value(model.optimizer.lr, lr * 0.1)
print("lr changed to {}".format(lr * 0.1))
return K.get_value(model.optimizer.lr)
# An adaptively parametric rectifier linear unit (APReLU)
def aprelu(inputs):
# get the number of channels
channels = inputs.get_shape().as_list()[-1]
# get a zero feature map
zeros_input = keras.layers.subtract([inputs, inputs])
# get a feature map with only positive features
pos_input = Activation('relu')(inputs)
# get a feature map with only negative features
neg_input = Minimum()([inputs,zeros_input])
# define a network to obtain the scaling coefficients
scales_p = GlobalAveragePooling2D()(pos_input)
scales_n = GlobalAveragePooling2D()(neg_input)
scales = Concatenate()([scales_n, scales_p])
scales = Dense(channels//16, activation='linear', kernel_initializer='he_normal', kernel_regularizer=l2(1e-4))(scales)
scales = BatchNormalization(momentum=0.9, gamma_regularizer=l2(1e-4))(scales)
scales = Activation('relu')(scales)
scales = Dense(channels, activation='linear', kernel_initializer='he_normal', kernel_regularizer=l2(1e-4))(scales)
scales = BatchNormalization(momentum=0.9, gamma_regularizer=l2(1e-4))(scales)
scales = Activation('sigmoid')(scales)
scales = Reshape((1,1,channels))(scales)
# apply a paramtetric relu
neg_part = keras.layers.multiply([scales, neg_input])
return keras.layers.add([pos_input, neg_part])
# Residual Block
def residual_block(incoming, nb_blocks, out_channels, downsample=False,
downsample_strides=2):
residual = incoming
in_channels = incoming.get_shape().as_list()[-1]
for i in range(nb_blocks):
identity = residual
if not downsample:
downsample_strides = 1
residual = BatchNormalization(momentum=0.9, gamma_regularizer=l2(1e-4))(residual)
residual = Activation('relu')(residual)
residual = Conv2D(out_channels, 3, strides=(downsample_strides, downsample_strides),
padding='same', kernel_initializer='he_normal',
kernel_regularizer=l2(1e-4))(residual)
residual = BatchNormalization(momentum=0.9, gamma_regularizer=l2(1e-4))(residual)
residual = Activation('relu')(residual)
residual = Conv2D(out_channels, 3, padding='same', kernel_initializer='he_normal',
kernel_regularizer=l2(1e-4))(residual)
residual = aprelu(residual)
# Downsampling
if downsample_strides > 1:
identity = AveragePooling2D(pool_size=(1,1), strides=(2,2))(identity)
# Zero_padding to match channels
if in_channels != out_channels:
zeros_identity = keras.layers.subtract([identity, identity])
identity = keras.layers.concatenate([identity, zeros_identity])
in_channels = out_channels
residual = keras.layers.add([residual, identity])
return residual
# define and train a model
inputs = Input(shape=(32, 32, 3))
net = Conv2D(64, 3, padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(1e-4))(inputs)
net = residual_block(net, 20, 64, downsample=False)
net = residual_block(net, 1, 128, downsample=True)
net = residual_block(net, 19, 128, downsample=False)
net = residual_block(net, 1, 256, downsample=True)
net = residual_block(net, 19, 256, downsample=False)
net = BatchNormalization(momentum=0.9, gamma_regularizer=l2(1e-4))(net)
net = Activation('relu')(net)
net = GlobalAveragePooling2D()(net)
outputs = Dense(10, activation='softmax', kernel_initializer='he_normal', kernel_regularizer=l2(1e-4))(net)
model = Model(inputs=inputs, outputs=outputs)
sgd = optimizers.SGD(lr=0.1, decay=0., momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
# data augmentation
datagen = ImageDataGenerator(
# randomly rotate images in the range (deg 0 to 180)
rotation_range=30,
# Range for random zoom
zoom_range = 0.2,
# shear angle in counter-clockwise direction in degrees
shear_range = 30,
# randomly flip images
horizontal_flip=True,
# randomly shift images horizontally
width_shift_range=0.125,
# randomly shift images vertically
height_shift_range=0.125)
reduce_lr = LearningRateScheduler(scheduler)
# fit the model on the batches generated by datagen.flow().
model.fit_generator(datagen.flow(x_train, y_train, batch_size=100),
validation_data=(x_test, y_test), epochs=500,
verbose=1, callbacks=[reduce_lr], workers=4)
# get results
K.set_learning_phase(0)
DRSN_train_score = model.evaluate(x_train, y_train, batch_size=100, verbose=0)
print('Train loss:', DRSN_train_score[0])
print('Train accuracy:', DRSN_train_score[1])
DRSN_test_score = model.evaluate(x_test, y_test, batch_size=100, verbose=0)
print('Test loss:', DRSN_test_score[0])
print('Test accuracy:', DRSN_test_score[1])
实验结果:
Using TensorFlow backend.
x_train shape: (50000, 32, 32, 3)
50000 train samples
10000 test samples
Epoch 1/500
281s 562ms/step - loss: 9.6683 - acc: 0.2858 - val_loss: 8.5491 - val_acc: 0.4224
Epoch 2/500
236s 471ms/step - loss: 7.8652 - acc: 0.4406 - val_loss: 7.0180 - val_acc: 0.5270
Epoch 3/500
235s 471ms/step - loss: 6.5241 - acc: 0.5264 - val_loss: 5.7927 - val_acc: 0.6159
Epoch 4/500
235s 471ms/step - loss: 5.4217 - acc: 0.6013 - val_loss: 4.7898 - val_acc: 0.6878
Epoch 5/500
235s 471ms/step - loss: 4.5434 - acc: 0.6542 - val_loss: 4.0362 - val_acc: 0.7256
Epoch 6/500
235s 470ms/step - loss: 3.8297 - acc: 0.6947 - val_loss: 3.3928 - val_acc: 0.7654
Epoch 7/500
235s 471ms/step - loss: 3.2680 - acc: 0.7257 - val_loss: 2.8972 - val_acc: 0.7805
Epoch 8/500
236s 471ms/step - loss: 2.8023 - acc: 0.7493 - val_loss: 2.4718 - val_acc: 0.8117
Epoch 9/500
235s 471ms/step - loss: 2.4351 - acc: 0.7652 - val_loss: 2.1518 - val_acc: 0.8216
Epoch 10/500
235s 470ms/step - loss: 2.1298 - acc: 0.7822 - val_loss: 1.8664 - val_acc: 0.8355
Epoch 11/500
235s 470ms/step - loss: 1.8768 - acc: 0.7961 - val_loss: 1.6576 - val_acc: 0.8407
Epoch 12/500
235s 470ms/step - loss: 1.6745 - acc: 0.8071 - val_loss: 1.4888 - val_acc: 0.8456
Epoch 13/500
235s 471ms/step - loss: 1.5155 - acc: 0.8139 - val_loss: 1.3255 - val_acc: 0.8598
Epoch 14/500
235s 471ms/step - loss: 1.3782 - acc: 0.8230 - val_loss: 1.2249 - val_acc: 0.8616
Epoch 15/500
235s 471ms/step - loss: 1.2630 - acc: 0.8293 - val_loss: 1.1236 - val_acc: 0.8655
Epoch 16/500
235s 471ms/step - loss: 1.1829 - acc: 0.8342 - val_loss: 1.0384 - val_acc: 0.8768
Epoch 17/500
235s 470ms/step - loss: 1.1094 - acc: 0.8389 - val_loss: 0.9748 - val_acc: 0.8752
Epoch 18/500
236s 471ms/step - loss: 1.0510 - acc: 0.8448 - val_loss: 0.9660 - val_acc: 0.8697
Epoch 19/500
235s 471ms/step - loss: 1.0037 - acc: 0.8472 - val_loss: 0.9055 - val_acc: 0.8760
Epoch 20/500
235s 471ms/step - loss: 0.9615 - acc: 0.8520 - val_loss: 0.8935 - val_acc: 0.8711
Epoch 21/500
235s 471ms/step - loss: 0.9345 - acc: 0.8545 - val_loss: 0.8621 - val_acc: 0.8743
Epoch 22/500
235s 470ms/step - loss: 0.9044 - acc: 0.8589 - val_loss: 0.8440 - val_acc: 0.8776
Epoch 23/500
235s 470ms/step - loss: 0.8816 - acc: 0.8625 - val_loss: 0.8310 - val_acc: 0.8792
Epoch 24/500
235s 470ms/step - loss: 0.8640 - acc: 0.8659 - val_loss: 0.8157 - val_acc: 0.8820
Epoch 25/500
235s 470ms/step - loss: 0.8446 - acc: 0.8696 - val_loss: 0.7921 - val_acc: 0.8873
Epoch 26/500
235s 470ms/step - loss: 0.8283 - acc: 0.8716 - val_loss: 0.7739 - val_acc: 0.8934
Epoch 27/500
235s 470ms/step - loss: 0.8212 - acc: 0.8720 - val_loss: 0.7726 - val_acc: 0.8885
Epoch 28/500
235s 471ms/step - loss: 0.8089 - acc: 0.8743 - val_loss: 0.7783 - val_acc: 0.8855
Epoch 29/500
235s 470ms/step - loss: 0.7970 - acc: 0.8775 - val_loss: 0.7350 - val_acc: 0.8988
Epoch 30/500
235s 470ms/step - loss: 0.7911 - acc: 0.8792 - val_loss: 0.7695 - val_acc: 0.8860
Epoch 31/500
235s 470ms/step - loss: 0.7846 - acc: 0.8802 - val_loss: 0.7392 - val_acc: 0.8989
Epoch 32/500
235s 471ms/step - loss: 0.7784 - acc: 0.8814 - val_loss: 0.7618 - val_acc: 0.8888
Epoch 33/500
235s 470ms/step - loss: 0.7724 - acc: 0.8842 - val_loss: 0.7547 - val_acc: 0.8937
Epoch 34/500
235s 470ms/step - loss: 0.7680 - acc: 0.8856 - val_loss: 0.7400 - val_acc: 0.8941
Epoch 35/500
235s 470ms/step - loss: 0.7646 - acc: 0.8865 - val_loss: 0.7079 - val_acc: 0.9096
Epoch 36/500
235s 470ms/step - loss: 0.7567 - acc: 0.8889 - val_loss: 0.7297 - val_acc: 0.8991
Epoch 37/500
235s 471ms/step - loss: 0.7518 - acc: 0.8920 - val_loss: 0.7265 - val_acc: 0.9011
Epoch 38/500
235s 470ms/step - loss: 0.7499 - acc: 0.8911 - val_loss: 0.7068 - val_acc: 0.9108
Epoch 39/500
235s 470ms/step - loss: 0.7455 - acc: 0.8927 - val_loss: 0.7524 - val_acc: 0.8939
Epoch 40/500
235s 470ms/step - loss: 0.7451 - acc: 0.8926 - val_loss: 0.7293 - val_acc: 0.9007
Epoch 41/500
235s 471ms/step - loss: 0.7434 - acc: 0.8951 - val_loss: 0.6985 - val_acc: 0.9097
Epoch 42/500
235s 470ms/step - loss: 0.7439 - acc: 0.8933 - val_loss: 0.7252 - val_acc: 0.9018
Epoch 43/500
235s 470ms/step - loss: 0.7433 - acc: 0.8952 - val_loss: 0.7304 - val_acc: 0.9006
Epoch 44/500
235s 470ms/step - loss: 0.7393 - acc: 0.8958 - val_loss: 0.6997 - val_acc: 0.9134
Epoch 45/500
235s 470ms/step - loss: 0.7348 - acc: 0.8992 - val_loss: 0.7287 - val_acc: 0.9035
Epoch 46/500
235s 470ms/step - loss: 0.7373 - acc: 0.8976 - val_loss: 0.7235 - val_acc: 0.9036
Epoch 47/500
235s 470ms/step - loss: 0.7382 - acc: 0.8974 - val_loss: 0.7178 - val_acc: 0.9081
Epoch 48/500
235s 470ms/step - loss: 0.7363 - acc: 0.8975 - val_loss: 0.7247 - val_acc: 0.9044
Epoch 49/500
235s 470ms/step - loss: 0.7306 - acc: 0.9009 - val_loss: 0.7328 - val_acc: 0.9006
Epoch 50/500
235s 470ms/step - loss: 0.7356 - acc: 0.9003 - val_loss: 0.7096 - val_acc: 0.9114
Epoch 51/500
235s 470ms/step - loss: 0.7282 - acc: 0.9029 - val_loss: 0.7156 - val_acc: 0.9076
Epoch 52/500
235s 470ms/step - loss: 0.7286 - acc: 0.9014 - val_loss: 0.7233 - val_acc: 0.9046
Epoch 53/500
235s 470ms/step - loss: 0.7304 - acc: 0.9016 - val_loss: 0.7087 - val_acc: 0.9088
Epoch 54/500
235s 470ms/step - loss: 0.7261 - acc: 0.9030 - val_loss: 0.7202 - val_acc: 0.9085
Epoch 55/500
235s 470ms/step - loss: 0.7257 - acc: