搭建神经网络(构造数据集、前向传播、反向传播)
前向传播
前向传播搭建神经网络,设计整体网络结构,创建forward.py
:
//定义神经网络结构
def forward(x,regularizer): //传入参数:x:输入,regularizer:正则化权重
w= //定义权重参数
b= //定义偏置参数
y= //定义输出
return y
//初始化权重参数
def get_weight(shape,regulerizer):
w = tf.Variable()
tf.add_to_collection('losses',tf.contrib.layers.12_regularizer(regularizer)(w)) //将权重参数的正则化损失加入总损失
return w
//初始化偏置参数
def get_bias(shape):
b = tf.Variable()
return b
反向传播
反向传播训练网络,优化网络参数,创建backward.py
:
def backward():
x = tf.placeholde() //输入占位符
y_ = tf.placeholder() //输入展位符
y = forward.forward(x,REGULARIZER) //调用前向传播设计的网络结构求输出
global_step = tf.Variable(0,trainable=False) //定义训练轮数计数器
loss = //定义损失函数
//均方误差作损失函数
loss = tf.reduce_mean(tf.square(y-y_))
//交叉熵作损失函数
ce = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y,labels=tf.argmax(y_,1))
loss = tf.reduce_mean(ce)
//加入正则化
loss = loss + tf.add_n(tf.get_collection('losses'))
//动态计算学习率,指数衰减学习率
learning_rate = tf.train.exponential_decay(
LEARNING_RATE_BASE,
global_step,
数据集样本总数 / BATCH_SIZE,
LEARNING_RATE_DECAY,
staircase=True)
//定义反向传播梯度下降方式
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss,global_step=global_step)
//定义滑动平均
ema = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
ema_op = ema.apply(tf.trainable_variables())
with tf.control_dependencies([train_step, ema_op]):
train_op = tf.no_op(name='train')
//神经网络训练
with tf.Session() as sess:
init_op = tf.global_variables_initializer()
sess.run(init_op)
for i in range(STEPS):
sess.run(train_step,feed_dict=(x: ,y_: ))
if i % 轮数 == 0:
print
if __name__=='__main__':
backward()
实践
train_data
构造神经网络训练数据集
import numpy as np
import matplotlib.pyplot as plt
seed = 2
def generateds():
rdm = np.random.RandomState(seed) //使用随机种子产生随机数
X = rdm.randn(300,2) //使用随机数产生300行2列的矩阵
Y_ = [int(x0*x0 + x1*x1 < 2) for (x0,x1) in X] //定义输入标签
Y_c = [['red' if y else 'blue'] for y in Y_]
X = np.vstack(X).reshape(-1,2)
Y_ = np.vstack(Y_).reshape(-1,1)
return X, Y_, Y_c
generateds()
forward
定义神经网络的权重参数、偏置参数和前向传播过程
import tensorflow as tf
def get_weight(shape, regularizer):
w = tf.Variable(tf.random_normal(shape),dtype=tf.float32)
tf.add_to_collection('losses', tf.contrib.layers.l2_regularizer(regularizer)(w))
return w
pass
def get_bias(shape):
b = tf.Variable(tf.constant(0.01, shape=shape))
return b
pass
def forward(x,regularizer):
w1 = get_weight([2,11],regularizer) //构造权重参数
b1 = get_bias([11]) //构造偏置参数
y1 = tf.nn.relu(tf.matmul(x,w1)+b1) //定义第一层神经网络,使用激活函数RELU()
w2 = get_weight([11,1],regularizer)
b2 = get_bias([1])
y = tf.matmul(y1,w2)+b2 //定义神经网络输出
return y
pass
backward
定义损失函数、优化及神经网络训练过程
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import train_data
import forward
STEPS = 40000
BATCH_SIZE = 30
LEARNING_RATE_BASE = 0.001
LEARNING_RATE_DECAY = 0.999
REGULARIZER = 0.01
def backward():
x = tf.placeholder(tf.float32,shape=(None,2))
y_ = tf.placeholder(tf.float32,shape=(None,1))
X, Y_, Y_c = train_data.generateds() //产生训练数据集
y = forward.forward(x, REGULARIZER) //定义神经网络输出
global_step = tf.Variable(0,trainable=False) //定义轮数计数器
//定义指数衰减学习率
learning_rate = tf.train.exponential_decay(
LEARNING_RATE_BASE,
global_step,
300/BATCH_SIZE,
LEARNING_RATE_DECAY,
staircase=True)
//定义引入正则化的损失函数
loss_mse = tf.reduce_mean(tf.square(y-y_)) //使用均方误差计算损失函数
loss_total = loss_mse + tf.add_n(tf.get_collection('losses'))
train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss_total) //定义训练过程
with tf.Session() as sess:
init_op = tf.global_variables_initializer()
sess.run(init_op)
for i in range(STEPS):
start = (i*BATCH_SIZE) % 300
end = start + BATCH_SIZE
sess.run(train_step, feed_dict={x:X[start:end],y_:Y_[start:end]})
if i % 2000 == 0:
loss_v = sess.run(loss_total, feed_dict={x:X,y_:Y_})
print('After %d steps, loss is: %f' %(i,loss_v))
xx, yy = np.mgrid[-3:3:.01, -3:3:.01]
grid = np.c_[xx.ravel(), yy.ravel()]
probs = sess.run(y, feed_dict={x:grid})
probs = probs.reshape(xx.shape)
plt.scatter(X[:,0], X[:,1], c=np.squeeze(Y_c))
plt.contour(xx, yy, probs, levels=[.5])
plt.show()
if __name__ == '__main__':
backward()
output
由上图看到,神经网络通过训练拟合出了一条曲线,将距离坐标原点距离小于
2
\sqrt2
2 的点圈在曲线内,通过定义不同的损失函数及优化法可以得到不一样的拟合曲线。