第一种实现:
import tensorflow as tf
old_v = tf.logging.get_verbosity()
tf.logging.set_verbosity(tf.logging.ERROR)
from tensorflow.examples.tutorials.mnist import input_data
tf.logging.set_verbosity(old_v)
#mnist = input_data.read_data_sets("/home/lg/Desktop/learn/MNIST_data/",one_hot=True)
#print('Training data size: ', mnist.train.num_examples)
#print('Validation data size: ', mnist.validation.num_examples)
#print('Test data size: ', mnist.test.num_examples)
# MNIST数据集相关常数
INPUT_NODE=784 # 28*28输入层的节点数,对于MNIST数据集这个就等于图片的像素
OUTPUT_NODE=10 # 输出层节点数,等于类别数目
# 配置神经网络参数
LAYER1_NODE = 500 # 隱藏层节点数,这里使用只有一个隐藏层的网络结构作为样例,该隐层有500个节点
BATCH_SIZE = 100 # mini-batch中训练数据个数,越小训练过程越接近梯度下降,越大训练越接近梯度下降
LEARNING_RATE_BASE = 0.8 # 基础学习率
LEARNING_RATE_DECAY = 0.99 # 学习率衰减率
REGULARIZATION_RATE = 0.0001 # 正则化项在损失函数中的系数
TRAINING_STEPS = 30000 # 训练轮数
MOVING_AVERAGE_DECAY = 0.99 # 滑动平均衰减率
# 辅助函数,用于计算前向
def inference(input_tensor,avg_class, weight1, bias1, weight2, bias2):
if avg_class == None:
# 计算隐层的前向结果,使用ReLU激活函数
layer1 = tf.nn.relu(tf.matmul(input_tensor, weight1)+bias1)
# 计算输出层的前向传播结果,因为在计算损失函数时会一并计算softmax函数,而且不加入softmax并不会影响预测结果,
# 因为预测时使用的是不同类别对应节点输出值的相对大小,有没有softmax层对最后分类结果的计算没有影响,于是在计算
# 整个神经网络的前向传播时可以不加入最后的softmax层。
return tf.matmul(layer1,weight2)+bias2
else:
layer1 = tf.nn.relu(tf.matmul(input_tensor, avg_class.average(weight1)) + avg_class.average(bias1))
return tf.matmul(layer1, avg_class.average(weight2)) + avg_class.average(bias2)
# 训练模型的过程
def train(mnist):
x = tf.placeholder(tf.float32,shape=(None,INPUT_NODE),name='x-input')
y_gt = tf.placeholder(tf.float32,shape=(None, OUTPUT_NODE),name='y_gt-input')
# 生成隐藏层的参数
weight1 = tf.Variable(tf.truncated_normal([INPUT_NODE,LAYER1_NODE],stddev=0.1))
bias1 = tf.Variable(tf.constant(0.1,shape=[LAYER1_NODE]))
# 生成输出层的参数
weight2 = tf.Variable(tf.truncated_normal([LAYER1_NODE,OUTPUT_NODE], stddev=0.1))
bias2 = tf.Variable(tf.constant(0.1,shape=[OUTPUT_NODE]))
# 计算在当前参数下神经网络前向传播的结果,此处不使用滑动平均
y = inference(x,None,weight1,bias1,weight2,bias2)
# 定义存储训练轮数的变量,该变量不需要计算滑动平均值,因此将这个变量指定为不可训练的变量(trainable=False)
global_step = tf.Variable(0,trainable=False)
# 给定滑动平均衰减率和训练轮数的变量,初始化滑动平均类
ema = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY,global_step)
# 在所有代表神经网络参数的变量上使用滑动平均,其它辅助变量(比如global_step)就不需要了,
# tf.trainable_variables返回的就是图上集合GraphKeys.TRAINABLE_VARIABLES中的元素,
# 该集合的元素就是所有没有指定trainable=False的参数
variables_averages_op = ema.apply(tf.trainable_variables())
# 计算使用滑动平均的前向结果,滑动平均并不会改变变量本身的值,而是会维护一个影子变量来记录其滑动平均值,
# 所以当需要使用这个滑动平均值时需要明确调用average函数
average_y = inference(x,ema,weight1,bias1,weight2,bias2)
# 计算交叉熵作为刻画预测值和真实值之间差距的损失函数,当分类问题只有一个正确答案时
# 使用tf.nn.sparse_softmax_cross_entropy_with_logits可以加速交叉熵的计算
# MNIST分类问题的图片中只包含0~9中的一个数字,因此可以使用该函数计算交叉熵损失.
# 该函数的第一个参数是神经网络不包括softmax层的前向传播结果,第二个是训练数据的标签,
# 因为标签是一个长度为10的一维数组,而该函数需要提供的是一个正确答案的数字,所以要使用
# tf.argmax函数来得到正确答案对应的类别编号(即编号,从0开始)
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=tf.argmax(y_gt,1),logits=y)
# 计算在当前batch中所有样例的交叉熵平均值
cross_entropy_mean = tf.reduce_mean(cross_entropy)
# 创建L2正则化损失函数
regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE)
# 计算模型的正则化损失,一般只计算神经网络权重的正则化损失,而不使用偏置项
regularization = regularizer(weight1) + regularizer(weight2)
# 总损失等于交叉熵损失和正则化损失之和
loss = cross_entropy_mean + regularization
#设置指数衰减的学习率
learning_rate = tf.train.exponential_decay(
LEARNING_RATE_BASE,
global_step,
mnist.train.num_examples/BATCH_SIZE, # 过完所有的训练数据需要的迭代次数
LEARNING_RATE_DECAY)
# 这里损失函数包含了交叉熵损失和L2正则化损失
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss,global_step)
#在训练神经网络时,没过一遍数据既需要通过反向传播来更新神经网络中的参数,又要更新每一个参数的滑动平均值,
#为了一次完成多个操作,TensorFLow提供了tf.control_dependencies和tf.group两种机制,两种实现是等价的:
# train_op = tf.group([train_step,variables_averages_op])
with tf.control_dependencies([train_step,variables_averages_op]):
train_op = tf.no_op(name='train')
#检验使用了滑动平均模型的神经网络前向传播结果是否正确,
correct_prediction = tf.equal(tf.argmax(average_y,1),tf.argmax(y_gt,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
validate_feed = {x:mnist.validation.images, y_gt:mnist.validation.labels}
test_feed = {x:mnist.test.images, y_gt:mnist.test.labels}
for i in range(TRAINING_STEPS):
if i % 1000 == 0:
validate_acc = sess.run(accuracy, feed_dict=validate_feed)
print('validation accuracy: ', validate_acc)
xs, ys = mnist.train.next_batch(BATCH_SIZE)
sess.run(train_op,feed_dict={x:xs, y_gt:ys})
test_acc = sess.run(accuracy, feed_dict=test_feed)
print('test accuracy using average: ', test_acc)
def main(argv=None):
mnist = input_data.read_data_sets("/home/lg/Desktop/learn/MNIST_data/",one_hot=True)
train(mnist)
if __name__ == '__main__':
#main()
tf.app.run()
第二种实现:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('/home/lg/Desktop/learn/MNIST_data/',one_hot=True)
print(mnist.train.num_examples)
print(mnist.validation.num_examples)
print(mnist.test.num_examples)
#print(mnist.train.images[0])
print(mnist.train.labels[0])
BATCH_SIZE = 100
x = tf.placeholder(dtype=tf.float32, shape=(None,28*28),name='x')
y_gt = tf.placeholder(dtype=tf.float32, shape=(None,10),name='y_gt')
LEARNING_RATE_BASE = 0.8
LEARNING_RATE_DECAY = 0.99
TRAINING_STEPS = 30000
MOVING_AVERAGE_DECAY = 0.99
def inference(input_tensor,avg_class, weight1,bias1, weight2, bias2):
if avg_class == None:
layer1 = tf.nn.relu(tf.matmul(input_tensor, weight1)+bias1)
return tf.matmul(layer1,weight2)+bias2
else:
layer1 = tf.nn.relu(tf.matmul(input_tensor, avg_class.average(weight1))+avg_class.average(bias1))
return tf.matmul(layer1,avg_class.average(weight2))+avg_class.average(bias2)
def get_weight(shape, lambda1):
var = tf.Variable(tf.truncated_normal(shape),tf.float32)
tmp = tf.contrib.layers.l2_regularizer(lambda1)(var)
tf.add_to_collection(tf.GraphKeys.LOSSES,tmp)
return var
def train(x,y_gt,mnist):
weight1 = get_weight(shape=(28*28,500),lambda1=0.0001)
bias1 = tf.Variable(tf.constant(0.1,shape=[500]))
weight2 = get_weight(shape=(500,10),lambda1=0.0001)
bias2 = tf.Variable(tf.constant(0.1,shape=[10]))
y = inference(x,None,weight1, bias1, weight2,bias2)
global_step = tf.Variable(0, trainable=False)
ema = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY,global_step)
variables_averages_op = ema.apply(tf.trainable_variables())
average_y=inference(x,ema,weight1,bias1,weight2,bias2)
cross_entropy = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y,labels=tf.arg_max(y_gt,1)))
tf.add_to_collection(tf.GraphKeys.LOSSES,cross_entropy)
total_loss = tf.add_n(tf.get_collection(tf.GraphKeys.LOSSES))
learning_rate = tf.train.exponential_decay(LEARNING_RATE_BASE,global_step, mnist.train.num_examples/BATCH_SIZE, LEARNING_RATE_DECAY)
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(total_loss,global_step)
train_op = tf.group(train_step, variables_averages_op)
correct_pred = tf.equal(tf.arg_max(y,1), tf.arg_max(y_gt,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
validate_feed = {x:mnist.validation.images,y_gt:mnist.validation.labels}
test_feed = {x:mnist.validation.images,y_gt:mnist.validation.labels}
for i in range(TRAINING_STEPS):
print(global_step.eval())
if i % 1000 == 0:
validate_acc = sess.run(accuracy, feed_dict=validate_feed)
print("validation accuracy : ", validate_acc)
xs,ys = mnist.train.next_batch(BATCH_SIZE)
sess.run(train_op, feed_dict={x:xs, y_gt:ys})
test_acc = sess.run(accuracy, feed_dict=test_feed)
print("test accuracy : ", test_acc)
def main(argv=None):
train(x,y_gt,mnist)
if __name__ == '__main__':
tf.app.run()
Reference
- 郑泽宇等.TensorFLow实战Google深度学习框架(第2版),电子工业出版社,2018.
相关文章
tf.train.ExponentialMovingAverage()
tf.train.ExponentialMovingAverage()的错误与正确实践