前向传播就是搭建网络,设计网络结构(一般新建一个文件forward.py)
#前向传播过程,完成网络结构的设计,搭建完整的网络结构
def forward(x,regularizer): #x为输入,regularizer为正则化权重;返回y为预测或分类结果
w =
b =
y =
return y
#对w的设定
def get_weight(shape,regularizer): #shape为w的形状,regularizer为正则化权重
w = tf.Variable(赋初值的方法)
tf.add_to_collection("losses",tf.contrib.layers.l2_regularizer(regularizer)(w)) #把每个w的正则化损失加到总损失losses中
return w
#对参数b的设定
def get_bias(shape): #shape为b的形状,其实就是某层中b的个数
b = tf.Variable(赋初值的方法)
return b
反向传播就是训练网络,优化网络参数,提高模型准确性(backward.py)
#反向传播
def backward():
x = tf.placeholder()
y_ = tf.placeholder()
y = forward.forward(x,REGULARIZER) #实现前向传播的网络结构
global_step = tf.Variable(0,trainable=False) #训练轮数,设置为不可训练型参数
loss =
#在训练模型中,常将正则化、指数衰减学习率和滑动平均作为模型优化方法。
一、loss = 预测结果与标准答案的损失值加上正则化项
预测结果与标准答案的损失值 计算3中方法
1. 均方误差:y与y_的差距 = tf.reduce_mean(tf.square(y - y_))
2. 交叉熵: ce = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y,labels=tf.arg_max(y_,1))
y与y_的差距 = tf.reduce_mean(ce)
3. 自定义 y与y_的差距
所以得到 loss = y与y_的差距 + tf.add_n(tf.get_collection('losses'))
二、指数衰减学习率
learning_rate = tf.train.exponential_decay(
LEARNING_RATE_BASE,
global_step,
数据集总样本数/BATCH_SIZE,
LEARNING_RATE_DECAY,
staircase=True
)
训练过程 train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss,global_step=global_step)
三、滑动平均
ema = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY,global_step)
ema_op = ema.apply(tf.trainable_variables())
with tf.control_dependencies([train_step,ema_op]):
train_op = tf.no_op(name='train')
#用with结构初始化所有参数,并且调用训练过程,打印出loss值
with tf.Session() as sess:
init_op = tf.global_variables_initializer()
sess.run(init_op)
for i in range(STEPS):
sess.run(train_step,feed_dict={x: ,y: })
if i % 轮数 == 0:
print()
#判断python运行的文件是否为主文件,若是主文件,则执行backward()函数
if __name__ == '__main__':
backward()
学习率:表示每次参数更新的幅度大小,学习率过大,会导致待优化的参数在最小值附近波动,不收敛;
学习率过小,会导致待优化的参数收敛缓慢。
滑动平均:记录了一段时间内模型中所有参数w和b各自的平均值。利用平均值可以增强模型的泛化能力。
正则化:在损失函数中给每个参数w加上权重,引入模型复杂度指标,从而抑制模型噪声,减小过拟合。有L1和L2两种。
模块化搭建
分别为3个文件 generated.py 生成数据集
forward.py 搭建神经网络
backward.py 训练网络
generated.py
#coding:utf-8
#0导入模块,生成模拟数据集
import numpy as np
import matplotlib.pyplot as plt
seed = 2
def generateds():
#基于seed产生随机数
rdm = np.random.RandomState(seed)
#随机数返回300行2列的矩阵,表示300组坐标点(x0,x1)作为输入数据集
X = rdm.randn(300,2)
#从X这个300行2列的矩阵中取出一行,判断如果两个坐标的平方和小于2,给Y赋值1,其余赋值0
#作为输入数据集的标签(正确答案)
Y_ = [int(x0*x0+x1*x1 <2) for (x0,x1) in X]
#遍历Y中的每个元素,1赋值‘red’其余赋值‘blue’,正阳可视化显示时人可以直观区分
Y_c = [['red' if y else 'blue'] for y in Y_]
#对数据集X和标签Y进行形状整理,第一个元素为-1表示跟随第二列计算,第二个元素表示多少列,可见X为两列,Y为一列
X = np.vstack(X).reshape(-1,2)
Y_ = np.vstack(Y_).reshape(-1,1)
return X, Y_, Y_c
forward.py
import tensorflow as tf
#定义神经网络的输入、参数和输出,定义前向传播过程
def get_weight(shape,regularizer): #shape为x的形状,regularizer为正则化权重
w = tf.Variable(tf.random_normal(shape),dtype=tf.float32)
tf.add_to_collection("losses",tf.contrib.layers.l2_regularizer(regularizer)(w)) #把每个w的正则化损失加到总损失losses中
return w
#与参数b有关
def get_bias(shape): #shape为b的形状,其实就是某层中b的个数
b = tf.Variable(tf.constant(0.01,shape=shape))
return b
def forward(x,regularizer): #x为输入,regularizer为正则化权重
w1 = get_weight([2,11],regularizer)
b1 = get_bias([11])
y1 = tf.nn.relu(tf.matmul(x,w1)+b1)
w2 = get_weight([11,1],regularizer)
b2 = get_bias([1])
y = tf.matmul(y1,w2)+b2 #输出层不经过激活
return y
backward.py
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import generateds
import forward
#超参数
STEPS = 40000
BATCH_SIZE = 300
LEARNING_RATE_BASE = 0.001
LEARNING_RATE_DECAY = 0.999
REGULARIZER = 0.01
#反向传播
def backward():
x = tf.placeholder(tf.float32, shape=(None,2))
y_ = tf.placeholder(tf.float32, shape=(None,1))
X, Y_, Y_c = generateds.generateds()
y = forward.forward(x, REGULARIZER) #实现前向传播的网络结构,得到输出y
global_step = tf.Variable(0, trainable=False) #训练轮数,设置为不可训练型参数
#指数衰减学习率
learning_rate = tf.train.exponential_decay(
LEARNING_RATE_BASE,
global_step,
300/BATCH_SIZE,
LEARNING_RATE_DECAY,
staircase=True
)
#定义正则化损失函数
loss_mse = tf.reduce_mean(tf.square(y-y_))
loss_total = loss_mse + tf.add_n(tf.get_collection("losses"))
#定义反向传播方法:包含正则化
train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss_total)
#用with结构初始化所有参数,并且调用训练过程,打印出loss值
with tf.Session() as sess:
init_op = tf.global_variables_initializer()
sess.run(init_op)
for i in range(STEPS):
start = (i*BATCH_SIZE) % 300
end = start + BATCH_SIZE
sess.run(train_step, feed_dict={x: X[start:end],y_: Y_[start:end]})
if i % 2000 == 0: #每2000轮打印出当前loss值
loss_v = sess.run(loss_total, feed_dict={x:X, y_:Y_[start:end]})
print("After %d steps,loss is :%f" %(i,loss_v))
xx, yy = np.mgrid[-3:3:.01, -3:3:.01] #以0.01位间距生成网格坐标点,组成xx,yy坐标集,命名为grid
grid = np.c_[xx.ravel(), yy.ravel()]
probs = sess.run(y, feed_dict={x:grid})
probs = probs.reshape(xx.shape)
plt.scatter(X[:,0],X[:,1],c = np.squeeze(Y_c))
plt.contour(xx,yy,probs,levels= [.5])
plt.show()
#判断python运行的文件是否为主文件,若是主文件,则执行backward()函数
if __name__ == '__main__':
backward()
结果
After 0 steps,loss is :11.990105
After 2000 steps,loss is :0.248668
After 4000 steps,loss is :0.184870
After 6000 steps,loss is :0.148596
After 8000 steps,loss is :0.111184
After 10000 steps,loss is :0.093076
After 12000 steps,loss is :0.090954
After 14000 steps,loss is :0.090920
After 16000 steps,loss is :0.090845
After 18000 steps,loss is :0.090826
After 20000 steps,loss is :0.090824
After 22000 steps,loss is :0.090824
After 24000 steps,loss is :0.090571
After 26000 steps,loss is :0.090571
After 28000 steps,loss is :0.090571
After 30000 steps,loss is :0.090571
After 32000 steps,loss is :0.090571
After 34000 steps,loss is :0.090571
After 36000 steps,loss is :0.090571
After 38000 steps,loss is :0.090570