LeNet
网络结构
model = Sequential()
# C1 Convolutional Layer
model.add(Conv2D(6, kernel_size=(5, 5), strides=(1, 1), activation='tanh', input_shape=input_shape, padding='same'))
# S2 Pooling Layer
model.add(AveragePooling2D(pool_size=(2, 2), strides=2, padding='valid'))
# C3 Convolutional Layer
model.add(Conv2D(16, kernel_size=(5, 5), strides=(1, 1), activation='tanh', padding='valid'))
# S4 Pooling Layer
model.add(AveragePooling2D(pool_size=(2, 2), strides=2, padding='valid'))
# C5 Fully Connected Convolutional Layer
model.add(Conv2D(120, kernel_size=(5, 5), strides=(1, 1), activation='tanh', padding='valid'))
#Flatten the CNN output so that we can connect it with fully connected layers
model.add(Flatten())
# FC6 Fully Connected Layer
model.add(Dense(84, activation='tanh'))
# Output Layer with softmax activation
model.add(Dense(10, activation='softmax'))
# print the model summary
model.summary()
设置学习超参数
from keras.callbacks import ModelCheckpoint, LearningRateScheduler
# set the learning rate schedule as created in the original paper
def lr_schedule(epoch):
if epoch <= 2:
lr = 5e-4
elif epoch > 2 and epoch <= 5:
lr = 2e-4
elif epoch > 5 and epoch <= 9:
lr = 5e-5
else:
lr = 1e-5
return lr
lr_scheduler = LearningRateScheduler(lr_schedule)
# set the checkpointer
checkpointer = ModelCheckpoint(filepath='model.weights.best.hdf5', verbose=1,
save_best_only=True)
# train the model
hist = model.fit(X_train, y_train, batch_size=32, epochs=20,
validation_data=(X_test, y_test), callbacks=[checkpointer, lr_scheduler],
verbose=2, shuffle=True)
AlexNet
网络架构图
代码
# Instantiate an empty sequential model
model = Sequential(name="Alexnet")
# 1st layer (conv + pool + batchnorm)
model.add(Conv2D(filters= 96, kernel_size= (11,11), strides=(4,4), padding='valid', kernel_regularizer=l2(0.0005),
input_shape = (227,227,3)))
model.add(Activation('relu')) #<---- activation function can be added on its own layer or within the Conv2D function
model.add(MaxPool2D(pool_size=(3,3), strides= (2,2), padding='valid'))
model.add(BatchNormalization())
# 2nd layer (conv + pool + batchnorm)
model.add(Conv2D(filters=256, kernel_size=(5,5), strides=(1,1), padding='same', kernel_regularizer=l2(0.0005)))
model.add(Activation('relu'))
model.add(MaxPool2D(pool_size=(3,3), strides=(2,2), padding='valid'))
model.add(BatchNormalization())
# layer 3 (conv + batchnorm) <--- note that the authors did not add a POOL layer here
model.add(Conv2D(filters=384, kernel_size=(3,3), strides=(1,1), padding='same', kernel_regularizer=l2(0.0005)))
model.add(Activation('relu'))
model.add(BatchNormalization())
# layer 4 (conv + batchnorm) <--- similar to layer 3
model.add(Conv2D(filters=384, kernel_size=(3,3), strides=(1,1), padding='same', kernel_regularizer=l2(0.0005)))
model.add(Activation('relu'))
model.add(BatchNormalization())
# layer 5 (conv + batchnorm)
model.add(Conv2D(filters=256, kernel_size=(3,3), strides=(1,1), padding='same', kernel_regularizer=l2(0.0005)))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(MaxPool2D(pool_size=(3,3), strides=(2,2), padding='valid'))
# Flatten the CNN output to feed it with fully connected layers
model.add(Flatten())
# layer 6 (Dense layer + dropout)
model.add(Dense(units = 4096, activation = 'relu'))
model.add(Dropout(0.5))
# layer 7 (Dense layers)
model.add(Dense(units = 4096, activation = 'relu'))
model.add(Dropout(0.5))
# layer 8 (softmax output layer)
model.add(Dense(units = 1000, activation = 'softmax'))
# print the model summary
model.summary()
超参数的设置类似于第一个网络架构的代码
VGGNet
引入一个统一的配置层以减少设计网络时实验和试错次数,因此解决了前两个超参数太多而难以调整的问题。
与AlexNet不同的是,用33代替了AlexNet中第一卷积层(1111)和第二卷积层(5*5)的卷积核
所有卷积层的卷积核大小都为3✖3,strides=1,padding=same
所有池化层大小都为2✖2,strides=2
VGG16更常用,网络架构如下
model = Sequential()
# first block
model.add(Conv2D(filters=64, kernel_size=(3,3), strides=(1,1), activation='relu', padding='same',input_shape=(224,224, 3)))
model.add(Conv2D(filters=64, kernel_size=(3,3), strides=(1,1), activation='relu', padding='same'))
model.add(MaxPool2D(pool_size=(2,2), strides=(2,2)))
# second block
model.add(Conv2D(filters=128, kernel_size=(3,3), strides=(1,1), activation='relu', padding='same'))
model.add(Conv2D(filters=128, kernel_size=(3,3), strides=(1,1), activation='relu', padding='same'))
model.add(MaxPool2D(pool_size=(2,2), strides=(2,2)))
# third block
model.add(Conv2D(filters=256, kernel_size=(3,3), strides=(1,1), activation='relu', padding='same'))
model.add(Conv2D(filters=256, kernel_size=(3,3), strides=(1,1), activation='relu', padding='same'))
model.add(Conv2D(filters=256, kernel_size=(3,3), strides=(1,1), activation='relu', padding='same'))
model.add(MaxPool2D(pool_size=(2,2), strides=(2,2)))
# forth block
model.add(Conv2D(filters=512, kernel_size=(3,3), strides=(1,1), activation='relu', padding='same'))
model.add(Conv2D(filters=512, kernel_size=(3,3), strides=(1,1), activation='relu', padding='same'))
model.add(Conv2D(filters=512, kernel_size=(3,3), strides=(1,1), activation='relu', padding='same'))
model.add(MaxPool2D(pool_size=(2,2), strides=(2,2)))
# fifth block
model.add(Conv2D(filters=512, kernel_size=(3,3), strides=(1,1), activation='relu', padding='same'))
model.add(Conv2D(filters=512, kernel_size=(3,3), strides=(1,1), activation='relu', padding='same'))
model.add(Conv2D(filters=512, kernel_size=(3,3), strides=(1,1), activation='relu', padding='same'))
model.add(MaxPool2D(pool_size=(2,2), strides=(2,2)))
# sixth block (classifier)
model.add(Flatten())
model.add(Dense(4096, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(4096, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1000, activation='softmax'))
model.summary()
inception
inception 与经典CNN的特征提取方式不同,他引入了inception模块(由具有不同大小的卷积核的若干卷积层组成),然后将inception模块堆叠起来。
1、inception模块
组成示例:
2、inception模块与维数约减
维数约减层(1✖1卷积层):假设输入尺寸为32✖32✖200,如果添加一个深度为16的1✖1的卷积层,因此维度从200减少到16,减少了网络的深度,同时也减少了操作数量。
当然只要适度的使用归约层就可以减少计算成本,而不影响性能。
一般来说只要在大的卷积层之前添加维数约减层后,可以在每个阶段显著增加隐藏单元数,可以避免计算量的复杂。
3、inception架构
通过inception模块堆叠来构建inception网络
GoogLeNet
GoogLeNet网络框架:堆叠了很多inception模块
keras代码实现
x = Conv2D(64, (7, 7), padding='same', strides=(2, 2), activation='relu', name='conv_1_7x7/2', kernel_initializer=kernel_init, bias_initializer=bias_init)(input_layer)
x = MaxPool2D((3, 3), padding='same', strides=(2, 2), name='max_pool_1_3x3/2')(x)
# x = Conv2D(64, (1, 1), padding='same', strides=(1, 1), activation='relu', name='conv_2a_3x3/1')(x)
x = Conv2D(192, (3, 3), padding='same', strides=(1, 1), activation='relu', name='conv_2b_3x3/1')(x)
x = MaxPool2D((3, 3), padding='same', strides=(2, 2), name='max_pool_2_3x3/2')(x)
x = inception_module(x,
filters_1x1=64,
filters_3x3_reduce=96,
filters_3x3=128,
filters_5x5_reduce=16,
filters_5x5=32,
filters_pool_proj=32,
name='inception_3a')
x = inception_module(x,
filters_1x1=128,
filters_3x3_reduce=128,
filters_3x3=192,
filters_5x5_reduce=32,
filters_5x5=96,
filters_pool_proj=64,
name='inception_3b')
x = MaxPool2D((3, 3), padding='same', strides=(2, 2), name='max_pool_3_3x3/2')(x)
x = inception_module(x,
filters_1x1=192,
filters_3x3_reduce=96,
filters_3x3=208,
filters_5x5_reduce=16,
filters_5x5=48,
filters_pool_proj=64,
name='inception_4a')
classifier_1 = AveragePooling2D((5, 5), strides=3)(x)
classifier_1 = Conv2D(128, (1, 1), padding='same', activation='relu')(classifier_1)
classifier_1 = Flatten()(classifier_1)
classifier_1 = Dense(1024, activation='relu')(classifier_1)
classifier_1 = Dropout(0.7)(classifier_1)
classifier_1 = Dense(10, activation='softmax', name='auxilliary_output_1')(classifier_1)
x = inception_module(x,
filters_1x1=160,
filters_3x3_reduce=112,
filters_3x3=224,
filters_5x5_reduce=24,
filters_5x5=64,
filters_pool_proj=64,
name='inception_4b')
x = inception_module(x,
filters_1x1=128,
filters_3x3_reduce=128,
filters_3x3=256,
filters_5x5_reduce=24,
filters_5x5=64,
filters_pool_proj=64,
name='inception_4c')
x = inception_module(x,
filters_1x1=112,
filters_3x3_reduce=144,
filters_3x3=288,
filters_5x5_reduce=32,
filters_5x5=64,
filters_pool_proj=64,
name='inception_4d')
classifier_2 = AveragePooling2D((5, 5), strides=3)(x)
classifier_2 = Conv2D(128, (1, 1), padding='same', activation='relu')(classifier_2)
classifier_2 = Flatten()(classifier_2)
classifier_2 = Dense(1024, activation='relu')(classifier_2)
classifier_2 = Dropout(0.7)(classifier_2)
classifier_2 = Dense(10, activation='softmax', name='auxilliary_output_2')(classifier_2)
x = inception_module(x,
filters_1x1=256,
filters_3x3_reduce=160,
filters_3x3=320,
filters_5x5_reduce=32,
filters_5x5=128,
filters_pool_proj=128,
name='inception_4e')
x = MaxPool2D((3, 3), padding='same', strides=(2, 2), name='max_pool_4_3x3/2')(x)
x = inception_module(x,
filters_1x1=256,
filters_3x3_reduce=160,
filters_3x3=320,
filters_5x5_reduce=32,
filters_5x5=128,
filters_pool_proj=128,
name='inception_5a')
x = inception_module(x,
filters_1x1=384,
filters_3x3_reduce=192,
filters_3x3=384,
filters_5x5_reduce=48,
filters_5x5=128,
filters_pool_proj=128,
name='inception_5b')
x = AveragePooling2D(pool_size=(7,7), strides=1, padding='valid',name='avg_pool_5_3x3/1')(x)
x = Dropout(0.4)(x)
x = Dense(1000, activation='relu', name='linear')(x)
x = Dense(1000, activation='softmax', name='output')(x)
学习参数代码
epochs = 25
initial_lrate = 0.01
def decay(epoch, steps=100):
initial_lrate = 0.01
drop = 0.96
epochs_drop = 8
lrate = initial_lrate * math.pow(drop, math.floor((1+epoch)/epochs_drop))
return lrate
sgd = SGD(lr=initial_lrate, momentum=0.9, nesterov=False)
lr_sc = LearningRateScheduler(decay, verbose=1)
model_with_classifiers.compile(loss=['categorical_crossentropy', 'categorical_crossentropy', 'categorical_crossentropy'],
loss_weights=[1, 0.3, 0.3], optimizer=sgd, metrics=['accuracy'])
history = model_with_classifiers.fit(X_train, [y_train, y_train, y_train], validation_data=(X_test, [y_test, y_test, y_test]),
epochs=epochs, batch_size=256, callbacks=[lr_sc])
ResNet
残差神经网络引入了一种含跳跃连接的残差模块,该网络可以对隐藏层进行大批量归一,实现超深神经网络。
*跳跃连接是为了解决梯度消失的问题,他创建了一条旁路让梯度从旁路流过,它可以允许模型学习恒等函数(确保前一层和后一层的表现一样好)
残差块结构如下:
代码示例,与上图不对应:
X_shortcut = X
X = Conv2D(filters = F2, kernel_size = (f, f), strides = (1,1), padding = 'same', name = conv_name_base + '2b', kernel_initializer = glorot_uniform(seed=0))(X)
X = BatchNormalization(axis = 3, name = bn_name_base + '2b')(X)
X = Activation('relu')(X)
X = Add()([X, X_shortcut])
X = Activation('relu')(X)
注意!残差块中没有池化层,因此会在首尾加一个1✖1结构的卷积层来降维,这被称为瓶颈残差块。但是由于残差块的堆叠,矩阵的维数必须相同否则不能进行加法运算,因此旁路需要通过添加一个1✖1的卷积层和批归一化来进行下采样。
残差块函数:
bottleneck_residual_block(X,reduce,f,filters,s):
reduce:为True时,使用约减旁路,否则是常规旁路;X(样本数量,高度,宽度,通道数);f:主路卷积层中的窗口形状;filter:滤波器的数量;s=strides
代码:
def bottleneck_residual_block(X, f, filters, stage, block, reduce=False, s=2):
"""
Arguments:
X -- input tensor of shape (m, height, width, channels)
f -- integer, specifying the shape of the middle CONV's window for the main path
filters -- python list of integers, defining the number of filters in the CONV layers of the main path
stage -- integer, used to name the layers, depending on their position in the network
block -- string/character, used to name the layers, depending on their position in the network
reduce -- boolean, True = identifies the reduction layer at the beginning of each learning stage
s -- integer, strides
Returns:
X -- output of the identity block, tensor of shape (H, W, C)
"""
# defining name basis
conv_name_base = 'res' + str(stage) + block + '_branch'
bn_name_base = 'bn' + str(stage) + block + '_branch'
# Retrieve Filters
F1, F2, F3 = filters
# Save the input value. You'll need this later to add back to the main path.
X_shortcut = X
if reduce:
# if we are to reduce the spatial size, apply a 1x1 CONV layer to the shortcut path
# to do that, we need both CONV layers to have similar strides
X = Conv2D(filters = F1, kernel_size = (1, 1), strides = (s,s), padding = 'valid', name = conv_name_base + '2a', kernel_initializer = glorot_uniform(seed=0))(X)
X = BatchNormalization(axis = 3, name = bn_name_base + '2a')(X)
X = Activation('relu')(X)
X_shortcut = Conv2D(filters = F3, kernel_size = (1, 1), strides = (s,s), padding = 'valid', name = conv_name_base + '1',
kernel_initializer = glorot_uniform(seed=0))(X_shortcut)
X_shortcut = BatchNormalization(axis = 3, name = bn_name_base + '1')(X_shortcut)
else:
# First component of main path
X = Conv2D(filters = F1, kernel_size = (1, 1), strides = (1,1), padding = 'valid', name = conv_name_base + '2a', kernel_initializer = glorot_uniform(seed=0))(X)
X = BatchNormalization(axis = 3, name = bn_name_base + '2a')(X)
X = Activation('relu')(X)
# Second component of main path
X = Conv2D(filters = F2, kernel_size = (f, f), strides = (1,1), padding = 'same', name = conv_name_base + '2b', kernel_initializer = glorot_uniform(seed=0))(X)
X = BatchNormalization(axis = 3, name = bn_name_base + '2b')(X)
X = Activation('relu')(X)
# Third component of main path
X = Conv2D(filters = F3, kernel_size = (1, 1), strides = (1,1), padding = 'valid', name = conv_name_base + '2c', kernel_initializer = glorot_uniform(seed=0))(X)
X = BatchNormalization(axis = 3, name = bn_name_base + '2c')(X)
# Final step: Add shortcut value to main path, and pass it through a RELU activation
X = Add()([X, X_shortcut])
X = Activation('relu')(X)
return X
def ResNet50(input_shape, classes):
"""
Arguments:
input_shape -- tuple shape of the images of the dataset
classes -- integer, number of classes
Returns:
model -- a Model() instance in Keras
"""
# Define the input as a tensor with shape input_shape
X_input = Input(input_shape)
# Stage 1
X = Conv2D(64, (7, 7), strides=(2, 2), name='conv1', kernel_initializer=glorot_uniform(seed=0))(X_input)
X = BatchNormalization(axis=3, name='bn_conv1')(X)
X = Activation('relu')(X)
X = MaxPooling2D((3, 3), strides=(2, 2))(X)
# Stage 2
X = bottleneck_residual_block(X, 3, [64, 64, 256], stage=2, block='a', reduce=True, s=1)
X = bottleneck_residual_block(X, 3, [64, 64, 256], stage=2, block='b')
X = bottleneck_residual_block(X, 3, [64, 64, 256], stage=2, block='c')
# Stage 3
X = bottleneck_residual_block(X, 3, [128, 128, 512], stage=3, block='a', reduce=True, s=2)
X = bottleneck_residual_block(X, 3, [128, 128, 512], stage=3, block='b')
X = bottleneck_residual_block(X, 3, [128, 128, 512], stage=3, block='c')
X = bottleneck_residual_block(X, 3, [128, 128, 512], stage=3, block='d')
# Stage 4
X = bottleneck_residual_block(X, 3, [256, 256, 1024], stage=4, block='a', reduce=True, s=2)
X = bottleneck_residual_block(X, 3, [256, 256, 1024], stage=4, block='b')
X = bottleneck_residual_block(X, 3, [256, 256, 1024], stage=4, block='c')
X = bottleneck_residual_block(X, 3, [256, 256, 1024], stage=4, block='d')
X = bottleneck_residual_block(X, 3, [256, 256, 1024], stage=4, block='e')
X = bottleneck_residual_block(X, 3, [256, 256, 1024], stage=4, block='f')
# Stage 5
X = bottleneck_residual_block(X, 3, [512, 512, 2048], stage=5, block='a', reduce=True, s=2)
X = bottleneck_residual_block(X, 3, [512, 512, 2048], stage=5, block='b')
X = bottleneck_residual_block(X, 3, [512, 512, 2048], stage=5, block='c')
# AVGPOOL
X = AveragePooling2D((1,1), name="avg_pool")(X)
# output layer
请添加图片描述
# AVGPOOL
X = AveragePooling2D((1,1), name="avg_pool")(X)
# output layer
X = Flatten()(X)
X = Dense(classes, activation='softmax', name='fc' + str(classes), kernel_initializer = glorot_uniform(seed=0))(X)
# Create the model
model = Model(inputs = X_input, outputs = X, name='ResNet50')
return model
超参数学习类似与之前的网络
本文搬运《深度学习计算机视觉》内容代码。仅供学习。