滑动平均
滑动平均(影子值):记录了每个参数一段时间内过往值的平均,增加了模型的泛化性。 针对所有参数:w和b (像是给参数加了影子,参数变化,影子缓慢追随/)
影子 = 衰减率* 影子 +(1 - 衰减率)* 参数 影子初值 = 参数初值
衰减率 = min{MOVING_AVERAGE_DECAY, } step为轮数
#参数为 衰减率当前轮数
ema=tf.train.ExponentialMovingAverage(
MOVING_AVERAGE_DECAY,
global_step)
#运行即所有待优化的参数求滑动平均
ema_op = ema.apply(tf.trainable_variables())
#把滑动平均和训练过程绑定在一起运行,使他们合成一个训练节点
with tf.control_dependencies([rain_step,ema_op]):
train_op = tf.no_op(name='train')
#查看某参数的滑动平均值
ema.average()
代码样例
- 定义变量及滑动平均类 优化w1参数 滑动平均做w1的影子
- 定义num_updates(NN的迭代轮数),初始值为0,不可被训练
- 实例话滑动平均类,删减率设为0.99 当前轮数global_step
- 每次运行sess.run(ema_op)对ema.apply()更新列表中的元素求滑动平均值
import tensorflow as tf
w1 = tf.Variable(0,dtype=tf.float32)
global_step = tf.Variable(0,trainable=False)
MOVING_AVERAGE_DECAY=0.99
ema = tf.train.ExponentialMovingAverage(
MOVING_AVERAGE_DECAY,
global_step)
ema_op = ema.apply(tf.trainable_variables())
用ema.average(w1)获取w1滑动平均值,每次sess.run() 会更新一次w1滑动平均值
with tf.Session() as sess:
init_op = tf.global_variables_initializer()
sess.run(init_op)
print(sess.run([w1,ema.average(w1)]))
#模拟出100轮后参数w1变为10
sess.run(tf.assign(global_step,100))
sess.run(tf.assign(w1,10))
for i in range(20):
sess.run(ema_op)
print(sess.run([w1,ema.average(w1)]))
# [0.0, 0.0]
# [10.0, 0.81818163]
# [10.0, 1.569421]
# [10.0, 2.2591956]
# [10.0, 2.892534]
# [10.0, 3.4740539]
# [10.0, 4.0079947]
# [10.0, 4.4982495]
# [10.0, 4.948393]
# [10.0, 5.3617063]
# [10.0, 5.741203]
# [10.0, 6.0896497]
# [10.0, 6.4095874]
# [10.0, 6.703348]
# [10.0, 6.973074]
# [10.0, 7.2207313]
# [10.0, 7.448126]
# [10.0, 7.6569157]
# [10.0, 7.8486223]
# [10.0, 8.024644]
# [10.0, 8.186264]
正则化缓和过拟合
正则化在loss函数中引入模型复杂度指标,利用给W加权值,弱化了训练数据的噪声(一般不正则化b)
loss = loss(y,y_)+REGULARIZER*loss(w)
- loss:模型中所有参数的损失函数
- REULARIZER:用超参数给出参数w在总loss中的比例,即正则化的权重
- w:需要正则化的参数
loss(w) = tf.contrib.layers.l1_regularizer(REGULARIZER)(w)
loss(w) = tf.contrib.layers.l1_regularizer(REGULARIZER)(w)
把计算好的所有w正则化加载losses集合中:
tf.add_to_collection('losses',tf.contrib.layers.l2_regularizer(regularizer)(w))
tf.add_n 把所有值相加 再加上交叉熵得到loss:
loss = cem + tf.add_n(tf.get_collection('losses'))
——————————————————————————————————————
设计数据集
- 数据X[x0,x1]为正态分布随机点
- 标注Y_当 时y_=1(红),其余y_=0
区分红色与蓝色
使用matplotlib画图
plt.scatter(x=1,y=1,c="r") # x坐标,y坐标,颜色
plt.show
np.mgrid[]给坐标赋值 xx.ravel()将数据拉直成一行n列 np.c_将x与y对应组合成为矩阵 得到所有网格坐标点
xx,yy = np.mgrid[start:end:1,start:end:1]
grid = np.c_[xx.ravel(),yy.ravel()]
将网格坐标点喂入神经网络,计算得到结果y 赋值给用来判断结果的量化值 使形状与xx一样
probs = sess.run(y,feed_dict={x:grid})
probs = probs.reshape(xx.shape)
生成300行2列的矩阵,作为输入数据集,并得出Y_为结果,并根据Y_给Y_c赋值颜色用于区分。对数据集XY进行shape整理,并画图
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
BATCH_SIZE=30
seed=2
rdm=np.random.RandomState(seed)
X = rdm.randn(300,2)
Y_ =[int((x0*x0+x1*x1)<2) for (x0,x1) in X]
Y_c = [['red' if y else 'blue'] for y in Y_]
X = np.vstack(X).reshape(-1,2)
Y_ = np.vstack(Y_).reshape(-1,1)
#print (X)
#print (Y_)
#print (Y_c)
plt.scatter(X[:,0],X[:,1],c=np.squeeze(Y_c))
plt.show()
定义神经网络 前向传播
def get_weight(shape,regularizer):
w = tf.Variable(tf.random_normal(shape),dtype=tf.float32)
tf.add_to_collection('losses',tf.contrib.layers.l2_regularizer(regularizer)(w))
return w
def get_bias(shape):
b = tf.Variable(tf.constant(0.01,shape=shape))
return b
x = tf.placeholder(tf.float32,shape=(None, 2))
y_= tf.placeholder(tf.float32,shape=(None,1))
w1 = get_weight([2,11],0.01)
b1 = get_bias([11])
y1 = tf.nn.relu(tf.matmul(x,w1)+b1)
w2 = get_weight([11,1],0.01)
b2 = get_bias([1])
y = tf.matmul(y1,w2)+b2#输出层不过激活
使用不包含正则化的loss函数
loss_mse = tf.reduce_mean(tf.square(y-y_))
loss_total = loss_mse + tf.add_n(tf.get_collection('losses'))
#使用不包含正则化的损失函数
train_step = tf.train.AdamOptimizer(0.0001).minimize(loss_mse)
with tf.Session() as sess:
init_op = tf.global_variables_initializer()
sess.run(init_op)
STEPS = 40000
for i in range(STEPS):
start = (i*BATCH_SIZE) % 300
end = start+ BATCH_SIZE
sess.run(train_step, feed_dict={x:X[start:end],y_:Y_[start:end]})
if (i %2000 ==0):
loss_mse_v = sess.run(loss_mse,feed_dict={x:X,y_:Y_})
print("After %d steps ,loss is: %f "%(i,loss_mse_v))
xx,yy = np.mgrid[-3:3:.01,-3:3:.01]
grid = np.c_[xx.ravel(),yy.ravel()]
print(xx.shape)
probs = sess.run(y,feed_dict={x:grid})
probs = probs.reshape(xx.shape)
print('w1',sess.run(w1),'b1',sess.run(b1),'w2',sess.run(w2),'b2',sess.run(b2))
plt.scatter(X[:,0],X[:,1],c=np.squeeze(Y_c))
plt.contour(xx,yy,probs,levels=[.5])
plt.show()
# After 0 steps ,loss is: 17.304857
# After 2000 steps ,loss is: 3.676223
# After 4000 steps ,loss is: 1.029984
# After 6000 steps ,loss is: 0.380914
# After 8000 steps ,loss is: 0.194118
# After 10000 steps ,loss is: 0.139421
# After 12000 steps ,loss is: 0.107350
# After 14000 steps ,loss is: 0.082768
# After 16000 steps ,loss is: 0.076988
# After 18000 steps ,loss is: 0.074694
# After 20000 steps ,loss is: 0.073808
# After 22000 steps ,loss is: 0.073279
# After 24000 steps ,loss is: 0.072903
# After 26000 steps ,loss is: 0.072554
# After 28000 steps ,loss is: 0.072066
# After 30000 steps ,loss is: 0.071615
# After 32000 steps ,loss is: 0.071293
# After 34000 steps ,loss is: 0.071010
# After 36000 steps ,loss is: 0.070667
# After 38000 steps ,loss is: 0.070384
改为包含正则化
loss_mse = tf.reduce_mean(tf.square(y-y_))
loss_total = loss_mse + tf.add_n(tf.get_collection('losses'))
#使用包含正则化的损失函数
train_step = tf.train.AdamOptimizer(0.0001).minimize(loss_total)
#后面都一样
with tf.Session() as sess:
init_op = tf.global_variables_initializer()
sess.run(init_op)
STEPS = 40000
for i in range(STEPS):
start = (i*BATCH_SIZE) % 300
end = start+ BATCH_SIZE
sess.run(train_step, feed_dict={x:X[start:end],y_:Y_[start:end]})
if (i %2000 ==0):
loss_mse_v = sess.run(loss_mse,feed_dict={x:X,y_:Y_})
print("After %d steps ,loss is: %f "%(i,loss_mse_v))
xx,yy = np.mgrid[-3:3:.01,-3:3:.01]
grid = np.c_[xx.ravel(),yy.ravel()]
print(xx.shape)
probs = sess.run(y,feed_dict={x:grid})
probs = probs.reshape(xx.shape)
print('w1',sess.run(w1),'b1',sess.run(b1),'w2',sess.run(w2),'b2',sess.run(b2))
plt.scatter(X[:,0],X[:,1],c=np.squeeze(Y_c))
plt.contour(xx,yy,probs,levels=[.5])
plt.show()
# After 0 steps ,loss is: 6.762687
# After 2000 steps ,loss is: 1.454304
# After 4000 steps ,loss is: 0.311554
# After 6000 steps ,loss is: 0.133483
# After 8000 steps ,loss is: 0.103376
# After 10000 steps ,loss is: 0.088640
# After 12000 steps ,loss is: 0.081629
# After 14000 steps ,loss is: 0.077943
# After 16000 steps ,loss is: 0.075808
# After 18000 steps ,loss is: 0.073653
# After 20000 steps ,loss is: 0.072593
# After 22000 steps ,loss is: 0.071901
# After 24000 steps ,loss is: 0.070255
# After 26000 steps ,loss is: 0.069900
# After 28000 steps ,loss is: 0.069471
# After 30000 steps ,loss is: 0.069070
# After 32000 steps ,loss is: 0.068832
# After 34000 steps ,loss is: 0.068625
# After 36000 steps ,loss is: 0.068546
# After 38000 steps ,loss is: 0.068471
‘