Caffe在Cifar10上复现ResNet
ResNet在2015年的ImageNet竞赛上的识别率达到了非常高的水平,这里我将使用Caffe在Cifar10上复现论文4.2节的Cifar实验。
- ResNet的基本模块
- Caffe实现
- Cifar10上的实验结果及说明
ResNet的基本模块
本文参照Torch7在Cifar10上复现ResNet的实验,使用Caffe复现之。ResNet的基本模块可以如下python代码:
@requires_authorization
from __future__ import print_function
from caffe import layers as L, params as P, to_proto
from caffe.proto import caffe_pb2
import caffe
# helper function for building ResNet block structures
# The function below does computations: bottom--->conv--->BatchNorm
def conv_factory(bottom, ks, n_out, stride=1, pad=0):
conv = L.Convolution(bottom, kernel_size=ks, stride=stride, num_output=n_out, pad=pad,
param = [dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)],
bias_filler=dict(type='constant', value=0), weight_filler=dict(type='gaussian', std=0.01))
batch_norm = L.BatchNorm(conv, in_place=True,
param=[dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)])
scale = L.Scale(batch_norm, bias_term=True, in_place=True)
return scale
# bottom--->conv--->BatchNorm--->ReLU
def conv_factory_relu(bottom, ks, n_out, stride=1, pad=0):
conv = L.Convolution(bottom, kernel_size=ks, stride=stride, num_output=n_out, pad=pad,
param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)],
bias_filler=dict(type='constant', value=0), weight_filler=dict(type='gaussian', std=0.01))
batch_norm = L.BatchNorm(conv, in_place=True, param=[dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)])
scale = L.Scale(batch_norm, bias_term=True, in_place=True)
relu = L.ReLU(scale, in_place=True)
return relu
# Residual building block! Implements option (A) from Section 3.3. The input
# is passed through two 3x3 convolution layers. Currently this block only supports
# stride == 1 or stride == 2. When stride is 2, the block actually does pooling.
# Instead of simply doing pooling which may cause representational bottlneck as
# described in inception v3, here we use 2 parallel branches P && C and add them
# together. Note pooling branch may has less channels than convolution branch so we
# need to do zero-padding along channel dimension. And to the best knowledge of
# ours, we haven't found current caffe implementation that supports this operation.
# So later I'll give implementation in C++ and CUDA.
def residual_block(bottom, num_filters, stride=1):
if stride == 1:
conv1 = conv_factory_relu(bottom, 3, num_filters, 1, 1)
conv2 = conv_factory(conv1, 3, num_filters, 1, 1)
add = L.Eltwise(bottom, conv2, operation=P.Eltwise.SUM)
return add
elif stride == 2:
conv1 = conv_factory_relu(bottom, 3, num_filters, 2, 1)
conv2 = conv_factory(conv1, 3, num_filters, 1, 1)
pool = L.Pooling(bottom, pool=P.Pooling.AVE, kernel_size=2, stride=2)
pad = L.PadChannel(pool, num_channels_to_pad=num_filters / 2)
add = L.Eltwise(conv2, pad, operation=P.