# 实现一个残差块
def residual_block(x, num_filters):
"""如果输出通道数增加,则残差块降维+通道数增加,则恒等映射x也需要降维+增加通道数(pool+tf.pad)"""
"""[] X 1"""
"""resnet沿用了vgg的原则,每一次subsampling,则通道数增加"""
x_channel = x.get_shape().as_list()[-1]
if x_channel * 2 == num_filters:
increase_dim = True
strides = (2, 2)
elif x_channel == num_filters:
increase_dim = False
strides = (1, 1)
else:
raise Exception("Error")
conv0 = tf.layers.conv2d(x,
num_filters,
(3, 3),
strides=strides,
activation=tf.nn.relu,
padding='same',
name='conv0')
conv1 = tf.layers.conv2d(conv0,
num_filters,
(3, 3),
strides=(1, 1),
activation=tf.nn.relu,
padding='same',
name='conv1')
"""1.通过增加0来增加维度,这种不添加多余的参数"""
"""2.通过非线性变换(1×1的卷积)来增加维度"""
if increase_dim:
pool_x = tf.layers.average_pooling2d(x,
(2, 2),
(2, 2),
padding='same',
name='pool_x')
pad_x = tf.pad(pool_x,
[[0, 0],
[0, 0],
[0, 0],
[x_channel // 2, x_channel // 2]])
else:
pad_x = x
out_x = pad_x + conv1
return out_x
# 残差结构的实现
def res_net(x, num_filter_block, num_filter_base, class_num):
"""num_filter_block=[3,4,6,3]"""
num_subsampling = len(num_filter_block) # 需要四次降采样
layers = [] # 因为每一次的输入都是上一次的输出
# 先经过一个卷积,一个池化
with tf.variable_scope('conv2'):
conv0 = tf.layers.conv2d(x,
num_filter_base,
(7, 7),
strides=(2, 2),
activation=tf.nn.relu,
padding='same',
name='conv0')
pool0 = tf.layers.max_pooling2d(conv0,
(3, 3), # overlapping
(2, 2),
name='pool0')
layers.append(pool0)
# 再经过残差块
# [0, 1, 2, 3]
for i in range(num_subsampling):
# [3, 4, 6, 3]
for sample in range(num_filter_block[i]):
with tf.variable_scope("conv%d_%d" % (i, sample)):
layer = residual_block(layers[-1], num_filter_base * (2 ** i))
layers.append(layer)
# 最后经过一个average_pooling(沿着每一个通道做所有元素的平均),直接输出到1000维
with tf.variable_scope('fc'):
# layers[-1].shape: [None, width, height, channel]
# ave_pool: [None, channel]
ave_pool = tf.reduce_mean(layers[-1], [1, 2])
logits = tf.layers.dense(ave_pool, class_num)
layers.append(logits)
return layers[-1]
Deep Residual Learning for Image Recognition中算法(resnet-34)的实现
最新推荐文章于 2024-08-19 15:00:00 发布