Resnet:(简易思想)
网络结构:先经过一个卷积层,再经过一个池化层,然后再经过若干个残差连接块,在经过一个残差连接块之后,会有一个降采样的操作(By max-pooling或卷积层的步长等于2)。
残差连接结构的输入分成两部分,第一部分经过卷积层做一些事情,第二部分直接传过来加到经过卷积层之后的输出上面。这样的结构在实现过程中会遇到什么问题呢?在某一次经过某个残差连接块的时候可能会做降采样。输入经过卷积层之后可能会做一个降采样,使得它的输入变成原来的1/2,但是恒等变换是不会做变化的,所以在做加法时,加法两端的矩阵的size是不一样的。此时加法就会出现问题。为了解决这个问题,在残差连接的实战过程中,在卷积层中做降采样的同时,在恒等变换中也做一个降采样,这个降采样使用max-pooling来做。
实战:因为在实现残差网络的时候,需要经过很多个残差连接块,所以把残差连接块抽象出来形成一个函数,参数为输入x与输出通道数。
ResNet每经过一个降采样过程都会使它输出的通道数翻倍,这样可以避免因为降采样造成的信息损失。即在此函数中,如果output_channel是input_channel的2倍的话,就需要做降采样过程,如果output_channel = input_channel,则只是经过了一个普通的残差连接块。
http://localhost:8889/notebooks/tensorflow1_14_0-resnet.ipynb
#将残差连接块抽象成函数
def residual_block(x, output_channel):
"""residual connection implementation"""
input_channel = x.get_shape().as_list()[-1]
if input_channel * 2 == output_channel:
increase_dim = True
strides = (2, 2)
elif input_channel == output_channel:
increase_dim = False
strides = (1, 1)
else:
raise Exception("input channel can't match output channel")
conv1 = tf.layers.conv2d(x,
output_channel,
(3, 3),# kernel size
strides = strides,
padding = 'same',
activation = tf.nn.relu,
name = 'conv1')
#1、 13.20不懂
conv2 = tf.layers.conv2d(conv1,
output_channel,
(3, 3),# kernel size
strides = (1, 1),
padding = 'same',
activation = tf.nn.relu,
name = 'conv2')
if increase_dim:
#[None, image_width, image_height, channel] -> [,,,channel*2]
pooled_x = tf.layers.average_pooling2d(x,
(2, 2),#kernel size
(2, 2),#stride,
padding = 'valid')
#做一个padding,padding在通道上
padded_x = tf.pad(pooled_x,
[[0,0],
[0,0],
[0,0],
[input_channel // 2, input_channel // 2]])
else:
padded_x = x
output_x = conv2 + padded_x
return output_x
#res_net(x_image, [2,3,2], 32, 10)
def res_net(x,
num_residual_blocks,
num_filter_base,
class_num):
# residual network implementation
# Args:
# -num_residual_blocks定义每一层上有多少个残差连接块,eg:[3,4,6,3]
# -num_subsampling定义需要做多少次降采样,eg:4
# -num_filter_base:通道数目的base,即:最初的通道数目,
# -class__num:泛化,可以适应多种类别数目不同的数据集
num_subsampling = len(num_residual_blocks)
layers = []
# x:[None, width, height, channel] -> [width, height, channel]
input_size = x.get_shape().as_list()[1:]
# 定义一个命名空间,在这个空间下定义的变量,
# 它们的名字就会是conv0/xxxx,这样可以有效的防止命名冲突。
with tf.variable_scope('conv0'):
conv0 = tf.layers.conv2d(input_size,
num_filter_base,
(3, 3),
strides = (1, 1),
padding = 'same',
activation = tf.nn.relu,
name = 'conv0')
layers.append(conv0)
# eg:num_subsampling = 3, sample_id = [0,1,2]
for sample_id in range(num_subsampling):
for i in range(num_residual_blocks[sample_id]):
with tf.variable_scope("conv%d_%d" % (sample_id, i)):
conv = residual_block(
layers[-1],
num_filter_base * (2 ** sample_id))
layers.append(conv)
with tf.variable_scope('fc'):
# layer[-1].shape : [None, width, height, channel]
# kernal_size: image_width, image_height
#将神经元图从二维的图变成一个像素点。一个值,就是它的均值。
global_pool = tf.reduce_mean(layers[-1], [1,2])
logits = tf.layers.dense(global_pool, class_num)
layers.append(logits)
return layers[-1]
x = tf.placeholder(tf.float32, [None, 3072])
y = tf.placeholder(tf.int64, [None])
# [None], eg: [0,5,6,3]
x_image = tf.reshape(x, [-1, 3, 32, 32])
# 32*32
x_image = tf.transpose(x_image, perm=[0, 2, 3, 1])
y_ = res_net(x_image, [2,3,2], 32, 10)
InceptionNet:(简易思想)
基本思想:分组卷积
将分组卷积的inception结构用一个函数封装起来
注意每一组的输入均为x,并没有对x做拆分
def inception_block(x,
output_channel_for_each_path,
name):
"""inception block implementation"""
"""
Args:
- x:输入
- output_channel_for_each_path: 定义各个组的输出通道数,eg: [10, 20, 5]
- name:
"""
with tf.variable_scope(name):
conv1_1 = tf.layers.conv2d(x,
output_channel_for_each_path[0],
(1, 1),
strides = (1,1),
padding = 'same',
activation = tf.nn.relu,
name = 'conv1_1')
conv3_3 = tf.layers.conv2d(x,
output_channel_for_each_path[1],
(3, 3),
strides = (1,1),
padding = 'same',
activation = tf.nn.relu,
name = 'conv3_3')
conv5_5 = tf.layers.conv2d(x,
output_channel_for_each_path[2],
(5, 5),
strides = (1,1),
padding = 'same',
activation = tf.nn.relu,
name = 'conv5_5')
max_pooling = tf.layers.max_pooling2d(x,
(2, 2),
(2, 2),
name = 'max_pooling')
max_pooling_shape = max_pooling.get_shape().as_list()[1:]
input_shape = x.get_shape().as_list()[1:]
width_padding = (input_shape[0] - max_pooling_shape[0]) // 2
height_padding = (input_shape[1] - max_pooling_shape[1]) // 2
padded_pooling = tf.pad(max_pooling,
[[0, 0],
[width_padding, width_padding],
[height_padding, height_padding],
[0, 0]])
"""
拼接的维度是在第4个通道上:
conv1_1的size是[None, 32, 32, 16], conv3_3的size是[None, 32, 32, 32],
conv5_5的size是[None, 32, 32, 48], padded_pooling的size是[None, 32, 32, 32].
那么拼接后的大小就是[None, 32, 32, 128], 因为 16 + 32 + 48 + 32 = 128
"""
concat_layer = tf.concat(
[conv1_1, conv3_3, conv5_5, padded_pooling],
axis = 3)
return concat_layer
x = tf.placeholder(tf.float32, [None, 3072])
y = tf.placeholder(tf.int64, [None])
# [None], eg: [0,5,6,3]
x_image = tf.reshape(x, [-1, 3, 32, 32])
# 32*32
x_image = tf.transpose(x_image, perm=[0, 2, 3, 1])
# conv1: 神经元图, feature_map, 输出图像
conv1 = tf.layers.conv2d(x_image,
32, # output channel number
(3,3), # kernel size
padding = 'same',
activation = tf.nn.relu,
name = 'conv1')
pooling1 = tf.layers.max_pooling2d(conv1,
(2, 2), # kernel size
(2, 2), # stride
name = 'pool1')
#调用函数inception_block
inception_2a = inception_block(pooling1,
[16, 16, 16], #输出通道数
name = 'inception_2a')
inception_2b = inception_block(inception_2a,
[16, 16, 16],
name = 'inception_2b')
pooling2 = tf.layers.max_pooling2d(inception_2b,
(2, 2), # kernel size
(2, 2), # stride
name = 'pool2')
inception_3a = inception_block(pooling2,
[16, 16, 16],
name = 'inception_3a')
inception_3b = inception_block(inception_3a,
[16, 16, 16],
name = 'inception_3b')
pooling3 = tf.layers.max_pooling2d(inception_3b,
(2, 2), # kernel size
(2, 2), # stride
name = 'pool3')
flatten = tf.layers.flatten(pooling3)
y_ = tf.layers.dense(flatten, 10)
MobileNet:(简易思想)
基本思想:深度可分离卷积:将输入的每个通道拆开,在单个通道上做卷积,再合并卷积结果。
def separable_conv_block(x,
output_channel_number,
name):
"""separable_conv block implementation"""
"""
Args:
- x:
- output_channel_number:输出通道数目:output channel of 1*1 conv layer.
3 * 3 Depthwise Conv
RELU
1 * 1 Conv
- name:
"""
with tf.variable_scope(name):
#获取通道数目
input_channel = x.get_shape().as_list()[-1]
# channel_wise_x:列表, [channel1, channel2, ...]
#把通道拆开
#spilt参数:输入,把输入拆成多少份,在第4个维度上
channel_wise_x = tf.split(x, input_channel, axis = 3)
output_channels = []
for i in range(len(channel_wise_x)):
output_channel = tf.layers.conv2d(channel_wise_x[i],
1,
(3, 3),
strides = (1,1),
padding = 'same',
activation = tf.nn.relu,
name = 'conv_%d' % i)
output_channels.append(output_channel)
concat_layer = tf.concat(output_channels, axis = 3)
conv1_1 = tf.layers.conv2d(concat_layer,
output_channel_number,
(1,1),
strides = (1,1),
padding = 'same',
activation = tf.nn.relu,
name = 'conv1_1')
return conv1_1
x = tf.placeholder(tf.float32, [None, 3072])
y = tf.placeholder(tf.int64, [None])
# [None], eg: [0,5,6,3]
x_image = tf.reshape(x, [-1, 3, 32, 32])
# 32*32
x_image = tf.transpose(x_image, perm=[0, 2, 3, 1])
# conv1: 神经元图, feature_map, 输出图像
conv1 = tf.layers.conv2d(x_image,
32, # output channel number
(3,3), # kernel size
padding = 'same',
activation = tf.nn.relu,
name = 'conv1')
pooling1 = tf.layers.max_pooling2d(conv1,
(2, 2), # kernel size
(2, 2), # stride
name = 'pool1')
separable_2a = separable_conv_block(pooling1,
32,
name = 'separable_2a')
separable_2b = separable_conv_block(separable_2a,
32,
name = 'separable_2b')
pooling2 = tf.layers.max_pooling2d(separable_2b,
(2, 2), # kernel size
(2, 2), # stride
name = 'pool2')
separable_3a = separable_conv_block(pooling2,
32,
name = 'separable_3a')
separable_3b = separable_conv_block(separable_3a,
32,
name = 'separable_3b')
pooling3 = tf.layers.max_pooling2d(separable_3b,
(2, 2), # kernel size
(2, 2), # stride
name = 'pool3')
flatten = tf.layers.flatten(pooling3)
y_ = tf.layers.dense(flatten, 10)