1.使用collections.namedtuple设计ResNet基本Block模块组
注:collections.namedtuple,tuple和list类型类似,tuple中的元素不可以更改,tuple元组的item只能通过index访问,collections模块的namedtuple子类不仅可以使用item的index访问item,还可以通过item的name进行访问。
class Block(collections.namedtuple('Block', ['scope', 'unit_fn', 'args'])):
'A named tuple describing a Resnet block.'
unit_fn是一个残差学习单元,args是一个残差学习单元的具体每层,如
[(256,64,1)]*2+[(256,64,2)]
每个元素都是一个三元tuple,即(depth,depth_bottleneck,stride),如(256,64,3)代表第三层的通道数为256,前两层的通道数为64,第二层的步长stride为3,
2.定义一个降采样subsample的方法
def subsample(inputs, factor, scope=None):
if factor == 1:
return inputs
else:
return slim.max_pool2d(inputs, [1, 1], stride=factor, scope=scope)
如果采样因子factor不是1,则通过1*1的池化,以factor为步长stride实现
3.定义一个conv2d_same函数来创建卷积层。
def conv2d_same(inputs, num_outputs, kernel_size, stride, scope=None):
if stride == 1:
return slim.conv2d(inputs, num_outputs, kernel_size, stride=1,
padding='SAME', scope=scope)
else:
pad_total = kernel_size - 1
pad_beg = pad_total // 2
pad_end = pad_total - pad_beg
inputs = tf.pad(inputs,
[[0, 0], [pad_beg, pad_end], [pad_beg, pad_end], [0, 0]])
return slim.conv2d(inputs, num_outputs, kernel_size, stride=stride,
padding='VALID', scope=scope)
先进行步长判断,如果stride=1,直接使用slim.conv2d并令padding=SAME。如果stride不为1,则进行补零操作,四个维度为样本数,长和宽,通道数。
4.定义堆叠Blocks的函数
注:enumerate在字典上是枚举、列举的意思对于一个可迭代的(iterable)/可遍历的对象(如列表、字符串),enumerate将其组成一个索引序列,利用它可以同时获得索引和值
@slim.add_arg_scope
def stack_blocks_dense(net, blocks,
outputs_collections=None):
for block in blocks:
with tf.variable_scope(block.scope, 'block', [net]) as sc:
for i, unit in enumerate(block.args):
with tf.variable_scope('unit_%d' % (i + 1), values=[net]):
unit_depth, unit_depth_bottleneck, unit_stride = unit
net = block.unit_fn(net,
depth=unit_depth,
depth_bottleneck=unit_depth_bottleneck,
stride=unit_stride)
net = slim.utils.collect_named_outputs(outputs_collections, sc.name, net)
return net
第一行是调用slim的函数,为下面自己定义的函数给一个默认值。
下面是用两层循环,逐个Block,逐个Residual Unit地堆叠。在第二层循环中我们拿到每个Block中的args,并展开为depth,depth_bottleneck和stride,然后使用unit_fn函数(即残差学习单元的生成函数)顺序创建并连接所有的残差学习单元。最后使用slim.utils.collect_named_outputs函数将输出添加至collectionzhong。最后返回net作为结果。
5.创建ResNet通用的arg_scope
def resnet_arg_scope(is_training=True,
weight_decay=0.0001,
batch_norm_decay=0.997,
batch_norm_epsilon=1e-5,
batch_norm_scale=True):
batch_norm_params = {
'is_training': is_training,
'decay': batch_norm_decay,
'epsilon': batch_norm_epsilon,
'scale': batch_norm_scale,
'updates_collections': tf.GraphKeys.UPDATE_OPS,
}
with slim.arg_scope(
[slim.conv2d],
weights_regularizer=slim.l2_regularizer(weight_decay),
weights_initializer=slim.variance_scaling_initializer(),
activation_fn=tf.nn.relu,
normalizer_fn=slim.batch_norm,
normalizer_params=batch_norm_params):
with slim.arg_scope([slim.batch_norm], **batch_norm_params):
with slim.arg_scope([slim.max_pool2d], padding='SAME') as arg_sc:
return arg_sc
6.定义核心的bottleneck残差学习单元
@slim.add_arg_scope
def bottleneck(inputs, depth, depth_bottleneck, stride,
outputs_collections=None, scope=None):
with tf.variable_scope(scope, 'bottleneck_v2', [inputs]) as sc:
depth_in = slim.utils.last_dimension(inputs.get_shape(), min_rank=4)
preact = slim.batch_norm(inputs, activation_fn=tf.nn.relu, scope='preact')
if depth == depth_in:
shortcut = subsample(inputs, stride, 'shortcut')
else:
shortcut = slim.conv2d(preact, depth, [1, 1], stride=stride,
normalizer_fn=None, activation_fn=None,
scope='shortcut')
residual = slim.conv2d(preact, depth_bottleneck, [1, 1], stride=1,
scope='conv1')
residual = conv2d_same(residual, depth_bottleneck, 3, stride,
scope='conv2')
residual = slim.conv2d(residual, depth, [1, 1], stride=1,
normalizer_fn=None, activation_fn=None,
scope='conv3')
output = shortcut + residual
return slim.utils.collect_named_outputs(outputs_collections,
sc.name,
output)
这个残差学习单元与Full Preactivation Residual Unit不同的是,它在每一层之前都使用了Batch_Normalization,然后对输入进行了preactivation,而不是进行激活函数处理。shortcut与residual的和完成了一次信息的保护。
7.定义ResNet_V2的主函数
def resnet_v2(inputs,
blocks,
num_classes=None,
global_pool=True,
include_root_block=True,
reuse=None,
scope=None):
with tf.variable_scope(scope, 'resnet_v2', [inputs], reuse=reuse) as sc:
end_points_collection = sc.original_name_scope + '_end_points'
with slim.arg_scope([slim.conv2d, bottleneck,
stack_blocks_dense],
outputs_collections=end_points_collection):
net = inputs
if include_root_block:
with slim.arg_scope([slim.conv2d],activation_fn=None, normalizer_fn=None):
net = conv2d_same(net, 64, 7, stride=2, scope='conv1')
net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1')
net = stack_blocks_dense(net, blocks)
net = slim.batch_norm(net, activation_fn=tf.nn.relu, scope='postnorm')
if global_pool:
net = tf.reduce_mean(net, [1, 2], name='pool5', keep_dims=True)
if num_classes is not None:
net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None,
normalizer_fn=None, scope='logits')
end_points = slim.utils.convert_collection_to_dict(end_points_collection)
if num_classes is not None:
end_points['predictions'] = slim.softmax(net, scope='predictions')
return net, end_points
global_pool标志是否加上最后的一层全局平均池化,include_root_block表示是否加上ResNet网络最前面通常使用的7*7卷积和最大池化,reuse标志是否重用
8.定义层数为152的ResNet
def resnet_v2_152(inputs,
num_classes=None,
global_pool=True,
reuse=None,
scope='resnet_v2_152'):
blocks = [
Block(
'block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]),
Block(
'block2', bottleneck, [(512, 128, 1)] * 7 + [(512, 128, 2)]),
Block(
'block3', bottleneck, [(1024, 256, 1)] * 35 + [(1024, 256, 2)]),
Block(
'block4', bottleneck, [(2048, 512, 1)] * 3)]
return resnet_v2(inputs, blocks, num_classes, global_pool,
include_root_block=True, reuse=reuse, scope=scope)
9.下面写调用的部分
x=tf.placeholder(tf.float32,[None,224,224,3])
y_=tf.placeholder(tf.float32,[None,1,1,10])
inputs=x
net, end_points = resnet_v2_152(inputs, 10)
Predictions=end_points['predictions']
cross_entropy=tf.reduce_mean(-tf.reduce_sum(y_*tf.log(Predictions),
reduction_indices=[1]))
train_step=tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
correct_prediction=tf.equal(tf.argmax(Predictions,1),tf.argmax(y_,1))
accuracy=tf.reduce_mean(tf.cast(correct_prediction,tf.float32))
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
with slim.arg_scope(resnet_arg_scope(is_training=True)):
for i in range (20000):
batch_size = 32
height, width = 224, 224
x_inputs = tf.random_uniform((batch_size, height, width, 3))
y_inputs = tf.random_uniform((batch_size,1,1,10),minval=0,maxval=9,dtype=tf.int32)
with sess.as_default():
if i%2==0:
train_accuracy=accuracy.eval(feed_dict={x:sess.run(x_inputs),y_:sess.run(y_inputs)},session=sess)
print('step %d ,training accuracy %g'%(i,train_accuracy))
train_step.run(feed_dict={x:sess.run(x_inputs),y_:sess.run(y_inputs)})
print("test accuracy %g"%accuracy.eval(feed_dict={
x:mnist.test.images,y_:mnist.test.labels}))
前四行时进行一些变量的初始化,以及ResNet进行的主要过程,以交叉熵损失函数作为loss,优化器是AdamOptimizer,速率为1e-4
注:本文大部分代码来自Tensorflow的开源实现