这是我的第一篇帖子,就用它来写何凯明大神的深度残差学习吧,下面内容都是我总结修改的成果。
Deep Residual Learning for Image Recognition
文章的重要几点如下:
1.解决了之前对于网络越深,反而效果越不好的问题,也就是所谓的退化问题;
2. 残差块中有卷积层,BN层,和激活层,另外对其进行求和。
代码实现基于cifar10数据集
cifar10数据集共有60000张图片,共有十类数据,每一类有6000张图片。其中50000张训练集,10000张测试集。残差网络主要用于cifar10数据集的分类。
// resnet.py用于构建具体的残差网络
from __future__ import division
import six
from keras.models import Model
from keras.layers import (
Input,
Activation,
Dense,
Flatten
)
from keras.layers.convolutional import (
Conv2D,
MaxPooling2D,
AveragePooling2D
)
from keras.layers.merge import add
from keras.layers.normalization import BatchNormalization
from keras.regularizers import l2
from keras import backend as K
def _bn_relu(input):
"""Helper to build a BN -> relu block
"""
norm = BatchNormalization(axis=CHANNEL_AXIS)(input)
return Activation("relu")(norm)
def _conv_bn_relu(**conv_params):
"""Helper to build a conv -> BN -> relu block
"""
filters = conv_params["filters"]
kernel_size = conv_params["kernel_size"]
strides = conv_params.setdefault("strides", (1, 1))
kernel_initializer = conv_params.setdefault("kernel_initializer", "he_normal")
padding = conv_params.setdefault("padding", "same")
kernel_regularizer = conv_params.setdefault("kernel_regularizer", l2(1.e-4))
def f(input):
conv = Conv2D(filters=filters, kernel_size=kernel_size,
strides=strides, padding=padding,
kernel_initializer=kernel_initializer,
kernel_regularizer=kernel_regularizer)(input)
return _bn_relu(conv)
return f
def _bn_relu_conv(**conv_params):
"""Helper to build a BN -> relu -> conv block.
This is an improved scheme proposed in http://arxiv.org/pdf/1603.05027v2.pdf
"""
filters = conv_params["filters"]
kernel_size = conv_params["kernel_size"]
strides = conv_params.setdefault("strides", (1, 1))
kernel_initializer = conv_params.setdefault("kernel_initializer", "he_normal")
padding = conv_params.setdefault("padding", "same")
kernel_regularizer = conv_params.setdefault("kernel_regularizer", l2(1.e-4))
def f(input):
activation = _bn_relu(input)
return Conv2D(filters=filters, kernel_size=kernel_size,
strides=strides, padding=padding,
kernel_initializer=kernel_initializer,
kernel_regularizer=kernel_regularizer)(activation)
return f
def _shortcut(input, residual):
"""Adds a shortcut between input and residual block and merges them with "sum"
"""
# Expand channels of shortcut to match residual.
# Stride appropriately to match residual (width, height)
# Should be int if network architecture is correctly configured.
input_shape = K.int_shape(input)
residual_shape = K.int_shape(residual)
stride_width = int(round(input_shape[