1、 _variable_on_cpu 在CPU的内存上生成一个变量。
def _variable_on_cpu(name, shape, initializer, use_fp16=False, trainable=True):
"""Helper to create a Variable stored on CPU memory.
Args:
name: name of the variable
shape: list of ints
initializer: initializer for Variable
Returns:
Variable Tensor
"""
with tf.device('/cpu:0'):
dtype = tf.float16 if use_fp16 else tf.float32
var = tf.get_variable(name, shape, initializer=initializer, dtype=dtype, trainable=trainable)
return var
2、函数 _variable_with_weight_decay 生成一个初始化的变量,并基于这个变量利用L2计算出一个权重衰减值。
在Python中,None、空列表[]、空字典{}、空元组()、0等一系列代表空和无的对象会被转换成False。除此之外的其它对象都会被转化成True。
xavier 初始化权值:Xavier的基本思想是保持信息在神经网络中流动过程的方差不变。假设激活函数是关于0点对称的,且主要针对于全连接神经网络,适应于tanh和softsign。这种方法在实际应用时根据一个均匀分布来初始化权值。
这里变量加上权值衰减有什么好处?如果为真,权重衰减对var产生作用了么?权重衰减值没有返回出函数啊
def _variable_with_weight_decay(name, shape, stddev, wd, use_xavier=True):
"""Helper to create an initialized Variable with weight decay.
Note that the Variable is initialized with a truncated normal distribution.
A weight decay is added only if one is specified.
Args:
name: name of the variable
shape: list of ints
stddev: standard deviation of a truncated Gaussian
wd: add L2Loss weight decay multiplied by this float. If None, weight
decay is not added for this Variable.
use_xavier: bool, whether to use xavier initializer
Returns:
Variable Tensor
"""
if use_xavier:
initializer = tf.contrib.layers.xavier_initializer()
else:
initializer = tf.truncated_normal_initializer(stddev=stddev)
var = _variable_on_cpu(name, shape, initializer)
if wd is not None: #判断变量是否为None
weight_decay = tf.multiply(tf.nn.l2_loss(var), wd, name='weight_loss')
tf.add_to_collection('losses', weight_decay)
return var
一些相关的操作:
tf.nn.l2_loss(t, name=None) 形如1/2Σw2,一般用于优化的目标函数中的正则项,防止参数太多复杂容易过拟合。
解释:这个函数的作用是利用 L2 范数来计算张量的误差值,但是没有开方并且只取 L2 范数的值的一半,具体如下:
- output = sum(t ** 2) / 2
输入参数:
- t: 一个Tensor。数据类型必须是一下之一:float32,float64,int64,int32,uint8,int16,int8,complex64,qint8,quint8,qint32。虽然一般情况下,数据维度是二维的。但是,数据维度可以取任意维度。
- name: 为这个操作取个名字。
输出参数:
- 一个 Tensor ,数据类型和 t 相同,是一个标量。
tf.add_to_collection(name, value) 用来把一个value放入名称是‘name’的集合,组成一个列表;
tf.get_collection(key, scope=None) 用来获取一个名称是‘key’的集合中的所有元素,返回的是一个列表,列表的顺序是按照变量放入集合中的先后; scope参数可选,表示的是名称空间(名称域),如果指定,就返回名称域中所有放入‘key’的变量的列表,不指定则返回所有变量。
tf.add_n(inputs, name=None), 把所有 ‘inputs’列表中的所有变量值相加,name可选,是操作的名称。
2、卷积操作
def conv2d(inputs,
num_output_channels,
kernel_size,
scope,
stride=[1, 1],
padding='SAME',
use_xavier=True,
stddev=1e-3,
weight_decay=0.0,
activation_fn=tf.nn.relu,
bn=False,
bn_decay=None,
is_training=None,
is_dist=False):
""" 2D convolution with non-linear operation.
Args:
inputs: 4-D tensor variable BxHxWxC
num_output_channels: int
kernel_size: a list of 2 ints
scope: string
stride: a list of 2 ints
padding: 'SAME' or 'VALID'
use_xavier: bool, use xavier_initializer if true
stddev: float, stddev for truncated_normal init
weight_decay: float
activation_fn: function
bn: bool, whether to use batch norm
bn_decay: float or float tensor variable in [0,1]
is_training: bool Tensor variable
Returns:
Variable tensor
"""
with tf.variable_scope(scope) as sc:
kernel_h, kernel_w = kernel_size
num_in_channels = inputs.get_shape()[-1].value
kernel_shape = [kernel_h, kernel_w,
num_in_channels, num_output_channels]
kernel = _variable_with_weight_decay('weights',
shape=kernel_shape,
use_xavier=use_xavier,
stddev=stddev,
wd=weight_decay)
stride_h, stride_w = stride
# kernel充当一个weight,然后inputs 与 weight 连系起来, 作为新的 outputs
outputs = tf.nn.conv2d(inputs, kernel,
[1, stride_h, stride_w, 1],
padding=padding)
biases = _variable_on_cpu('biases', [num_output_channels],
tf.constant_initializer(0.0))
# outputs与biases 连系起来,作为新的 outputs
outputs = tf.nn.bias_add(outputs, biases)
if bn:
outputs = batch_norm_for_conv2d(outputs, is_training,
bn_decay=bn_decay, scope='bn', is_dist=is_dist)
if activation_fn is not None:
if tf.nn.relu == activation_fn:
outputs = activation_fn(outputs)
elif tf.nn.leaky_relu == activation_fn:
outputs = activation_fn(outputs, 0.1)
return outputs
输入input:4-D tensor variable BxHxWxC.B代表bachsize,W代表点的个数,C代表点的特征向量[x1,x2,....,xc]
kernel_shape = [kernel_h, kernel_w,num_in_channels, num_output_channels]
outputs = tf.nn.conv2d(inputs, kernel,[1, stride_h, stride_w, 1],padding=padding)
tf.nn.conv2d(input, filter, strides, padding, use_cudnn_on_gpu=None, name=None)
除去name参数用以指定该操作的name,与方法有关的一共五个参数:
第一个参数input:指需要做卷积的输入图像,它要求是一个Tensor,具有[batch, in_height, in_width, in_channels]这样的shape,具体含义是[训练时一个batch的图片数量, 图片高度, 图片宽度, 图像通道数],注意这是一个4维的Tensor,要求类型为float32和float64其中之一
第二个参数filter:相当于CNN中的卷积核,它要求是一个Tensor,具有[filter_height, filter_width, in_channels, out_channels]这样的shape,具体含义是[卷积核的高度,卷积核的宽度,图像通道数,卷积核个数],要求类型与参数input相同,有一个地方需要注意,第三维in_channels,就是参数input的第四维
第三个参数strides:卷积时在图像每一维的步长,这是一个一维的向量,长度4
第四个参数padding:string类型的量,只能是"SAME","VALID"其中之一,这个值决定了不同的卷积方式(后面会介绍)
第五个参数:use_cudnn_on_gpu:bool类型,是否使用cudnn加速,默认为true
结果返回一个Tensor,这个输出,就是我们常说的feature map,shape仍然是[batch, height, width, channels]这种形式。
3、全连接层
weights 为全连接层权重参数,其要和input进行张量相乘操作。因为 outputs = tf.matmul(inputs, weights),所以要满足张量相乘条件,张量的最后两个维度要满足矩阵相乘条件。输入为2-D张量,直接根据矩阵操作的习惯就好了。
def fully_connected(inputs,
num_outputs,
scope,
use_xavier=True,
stddev=1e-3,
weight_decay=0.0,
activation_fn=tf.nn.relu,
bn=False,
bn_decay=None,
is_training=None,
is_dist=False):
""" Fully connected layer with non-linear operation.
Args:
inputs: 2-D tensor BxN
num_outputs: int
Returns:
Variable tensor of size B x num_outputs.
"""
with tf.variable_scope(scope) as sc:
num_input_units = inputs.get_shape()[-1].value #得到输入张量的最后一个维度的值
weights = _variable_with_weight_decay('weights',
shape=[num_input_units, num_outputs],
use_xavier=use_xavier,
stddev=stddev,
wd=weight_decay)
outputs = tf.matmul(inputs, weights)
biases = _variable_on_cpu('biases', [num_outputs],
tf.constant_initializer(0.0))
outputs = tf.nn.bias_add(outputs, biases)
if bn:
outputs = batch_norm_for_fc(outputs, is_training, bn_decay, 'bn', is_dist=is_dist)
if activation_fn is not None:
outputs = activation_fn(outputs)
return outputs
4、对全连接层的输出数据做归一化
def batch_norm_for_fc(inputs, is_training, bn_decay, scope, is_dist=False):
""" Batch normalization on FC data.
Args:
inputs: Tensor, 2D BxC input
is_training: boolean tf.Varialbe, true indicates training phase
bn_decay: float or float tensor variable, controling moving average weight
scope: string, variable scope
is_dist: true indicating distributed training scheme
Return:
normed: batch-normalized maps
"""
if is_dist:
return batch_norm_dist_template(inputs, is_training, scope, [0, ], bn_decay)
else:
return batch_norm_template(inputs, is_training, scope, [0, ], bn_decay)
下面两个归一化是如何操作的?
def batch_norm_template(inputs, is_training, scope, moments_dims, bn_decay):
""" Batch normalization on convolutional maps and beyond...
Ref.: http://stackoverflow.com/questions/33949786/how-could-i-use-batch-normalization-in-tensorflow
Args:
inputs: Tensor, k-D input ... x C could be BC or BHWC or BDHWC
is_training: boolean tf.Varialbe, true indicates training phase
scope: string, variable scope
moments_dims: a list of ints, indicating dimensions for moments calculation
bn_decay: float or float tensor variable, controling moving average weight
Return:
normed: batch-normalized maps
"""
with tf.variable_scope(scope) as sc:
num_channels = inputs.get_shape()[-1].value
beta = tf.Variable(tf.constant(0.0, shape=[num_channels]),
name='beta', trainable=True)
gamma = tf.Variable(tf.constant(1.0, shape=[num_channels]),
name='gamma', trainable=True)
batch_mean, batch_var = tf.nn.moments(inputs, moments_dims, name='moments')
decay = bn_decay if bn_decay is not None else 0.9
ema = tf.train.ExponentialMovingAverage(decay=decay)
# Operator that maintains moving averages of variables.
ema_apply_op = tf.cond(is_training,
lambda: ema.apply([batch_mean, batch_var]),
lambda: tf.no_op())
# Update moving average and return current batch's avg and var.
def mean_var_with_update():
with tf.control_dependencies([ema_apply_op]):
return tf.identity(batch_mean), tf.identity(batch_var)
# ema.average returns the Variable holding the average of var.
mean, var = tf.cond(is_training,
mean_var_with_update,
lambda: (ema.average(batch_mean), ema.average(batch_var)))
normed = tf.nn.batch_normalization(inputs, mean, var, beta, gamma, 1e-3)
return normed
def batch_norm_dist_template(inputs, is_training, scope, moments_dims, bn_decay):
""" The batch normalization for distributed training.
Args:
inputs: Tensor, k-D input ... x C could be BC or BHWC or BDHWC
is_training: boolean tf.Varialbe, true indicates training phase
scope: string, variable scope
moments_dims: a list of ints, indicating dimensions for moments calculation
bn_decay: float or float tensor variable, controling moving average weight
Return:
normed: batch-normalized maps
"""
with tf.variable_scope(scope) as sc:
num_channels = inputs.get_shape()[-1].value
beta = _variable_on_cpu('beta', [num_channels], initializer=tf.zeros_initializer())
gamma = _variable_on_cpu('gamma', [num_channels], initializer=tf.ones_initializer())
pop_mean = _variable_on_cpu('pop_mean', [num_channels], initializer=tf.zeros_initializer(), trainable=False)
pop_var = _variable_on_cpu('pop_var', [num_channels], initializer=tf.ones_initializer(), trainable=False)
def train_bn_op():
batch_mean, batch_var = tf.nn.moments(inputs, moments_dims, name='moments')
decay = bn_decay if bn_decay is not None else 0.9
train_mean = tf.assign(pop_mean, pop_mean * decay + batch_mean * (1 - decay))
train_var = tf.assign(pop_var, pop_var * decay + batch_var * (1 - decay))
with tf.control_dependencies([train_mean, train_var]):
return tf.nn.batch_normalization(inputs, batch_mean, batch_var, beta, gamma, 1e-3)
def test_bn_op():
return tf.nn.batch_normalization(inputs, pop_mean, pop_var, beta, gamma, 1e-3)
normed = tf.cond(is_training,
train_bn_op,
test_bn_op)
return normed
相关操作:tf.nn.batch_normalization()函数用于执行批归一化。
# 用于最中执行batch normalization的函数
tf.nn.batch_normalization(
x,
mean,
variance,
offset,
scale,
variance_epsilon,
name=None
)
参数:
- x是input输入样本
- mean是样本均值
- variance是样本方差
- offset是样本偏移(相加一个转化值)
- scale是缩放(默认为1)
- variance_epsilon是为了避免分母为0,添加的一个极小值
输出的计算公式为:
- y=scale∗(x−mean)/var+offset
参考:
深度学习Deep Learning(05):Batch Normalization(BN)批标准化
https://blog.csdn.net/u013082989/article/details/54293279?utm_source=itdadao&utm_medium=referral
深度学习 --- 优化入门四(Batch Normalization(批量归一化)一
https://blog.csdn.net/weixin_42398658/article/details/84560411