'''
前向传播就是搭建网络,设计网络结构
forward.py
forward()
get_weight()
get_bias()
'''
def forward ( x, regularizer) :
w =
b =
y =
return y
def get_weight ( shape, regularizer) :
w = tf. Variable( )
if regularizer != None :
tf. add_to_collection( 'losses' , tf. contrib. layers. l2_regularizer( regularizer) ( w) )
return w
def get_bias ( shape) :
'''
偏置项bias
'''
b = tf. Variable( )
return b
'''
反向传播就是训练网络,优化网络参数
backward.py
backward()
'''
def backward ( ) :
x = tf. placeholder( )
y_ = tf. placeholder( )
y = forward. forward( x, REGULARIZER)
global_step = tf. Variable( 0 , trainable= False )
loss =
'''
损失函数(loss):预测值(y)与已知值(y_)的差距
None优化目标:loss最小 --> 均方误差(Mean Squared Erroe mse)、交叉熵(Cross Entropy ce)、自定义
均方误差表征向量距离
loss_mse = tf.reduce_mean(tf.square(y-y_))
交叉熵表征两个概率分布之间的距离
ce = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1))
loss_ce = tf.reduce_mean(ce)
加入正则化后
loss = (loss_ce or loss_mse) + tf.add_n(tf.get_collection('losses'))
正则化缓解过拟合
正则化在损失函数中引入模型复杂度指标,利用给w加权值,弱化了训练数据的噪声(一般不正则化b)
loss = loss(y与y_) + REGULARIZER * loss(w)
# 其中用超参数REGULARIZER给出参数w在总loss中的比例,即正则化的权重
# loss(w) = tf.contrib.layers.l1_regularizzer(REGULARIZER)(w) L1正则化
# loss(w) = tf.contrib.layers.l2_regularizzer(REGULARIZER)(w) L2正则化
tf.add_to_collection('losses', tf.contrib.layers.l2_regularizer(regularizer)(w))
# 把正则项加到losses集合中
loss = (交叉熵 or 均方误差)+ tf.add_n(tf.get_collection('losses'))
# 把losses中的所有值相加然后加到交叉熵或者均方误差上得到总损失函数
'''
'''
学习率learning_rate:每次参数更新的幅度
w_{n+1} = w_n - learning_rate * \gradi
其中\gradi 为梯度
如果有指数衰减学习率(模拟退火)用
learning_rate = LEARNING_RATE_BASE * LEARNING_RATE_DECAY**{global_step/LEARNING_RATE_STEP}
其中 LEARNING_RATE_BASE为学习率的初始值,LEARNING_RATE_DECAY为学习率衰减率(0, 1)
指数中global_step表示运行了几轮,LEARNING_RATE_STEP表示多少轮更新一次 = 总样本数/ BATCH_SIZE
用tf.train.exponential_decay()方法实现
反之直接设定 learning_rate = 0.001 or 其他数字
'''
learning_rate = tf. train. exponential_decay(
LEARNING_RATE_BASE,
global_step,
数据集总样本数 / BATCH_SIZE,
LEARNING_RATE_DECAY,
staircase= True )
train_step = tf. train. GradientDescentOptimizer( learning_rate) . minimize( loss, global_step= global_step)
train_step = tf. train. MomentumOptimizer( learning_rate, momentum) . minimize( loss)
train_step = tf. train. AdamOptimizer( learning_rate) . minimize( loss)
'''
滑动平均(影子值):记录了每个参数一段时间内国网值的平均,增加了模型的泛化性
针对所有的参数:w和b
影子 = 衰减率 * 影子 +(1 - 衰减率)* 参数
影子初值 = 参数初值
衰减率=min{MOVING_AVERAGE_DECAY, (1+轮数)/(10+轮数)}
ema = tf.train.ExponentialMovingAverage(
衰减率MOVING_AVERAGE_DECAY,
当前轮数global_step)
ema_op = ema.apply(tf.trainable_variables())
# 每次运行此句,所有待优化的参数求滑动平均
# 其中ema.apply()对括号中的参数求滑动平均
# tf.trainable_variables()自动将待训练的参数汇总成列表
with tf.control_dependencies([train_step, ema_op]):
train_op = tf.no_op(name='train')
'''
ema = tf. train. ExponentialMovingAverage( MOVING_AVERAGE_DECAY, global_step)
ema_op = ema. apply ( tf. trainable_variables( ) )
with tf. control_dependencies( [ train_step, ema_op] ) :
train_op = tf. no_op( name= 'train' )
with tf. Session( ) as sess:
'''
初始化
'''
init_op = tf. global_variables_initializer( )
sess. run( init_op)
for i in range ( STEPS) :
sess. run( train_step, feed_dict= { x: , y_: } )
if i % 轮数 == 0 :
print ( )
if __name__ == '__main__' :
backward( )