下面开始给出mnist的tensorflow实现代码
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
#MNIST数据集相关常数
INPUT_NODE=784
OUTPUT_NODE=10
#配置神经网络参数
LAYER1_NODE=500
BATCH_SIZE=100
LEARNING_RATE_BASE=0.8
LEARNING_RATE_DECAY=0.99
REGULARIZATION_RATE=0.0001
TRAINING_STEPS=30000
MOVING_AVERAGE_DECAY=0.99
#一个辅助函数,给定神经网络的输入和所有参数,计算神经网络的前向传播结果。定义了一个ReLU激活函数的三层全连接神经网络。
def inference(input_tensor, avg_class, weights1, biases1, weights2, biases2):
#当没有提供滑动平均类时
if avg_class==None:
layer1 = tf.nn.relu(tf.matmul(input_tensor, weights1)+biases1)
return tf.matmul(layer1, weights2)+biases2
else:
layer1 = tf.nn.relu(tf.matmul(input_tensor, avg_class.average(weights1))+
avg_class.average(biases1))
return tf.matmul(layer1, avg_class.average(weights2))+avg_class.average(biases2)
#训练模型的过程
def train(mnist):
#为输入输出占坑
x=tf.placeholder(tf.float32, [None, INPUT_NODE], name='x-input')
y_=tf.placeholder(tf.float32, [None, OUTPUT_NODE], name='y-input')
#设置权重和偏置
weights1=tf.Variable(tf.truncated_normal([INPUT_NODE, LAYER1_NODE], stddev=0.1))
biases1=tf.Variable(tf.constant(0.1, shape=[LAYER1_NODE]))
weights2 = tf.Variable(tf.truncated_normal([LAYER1_NODE, OUTPUT_NODE], stddev=0.1))
biases2 = tf.Variable(tf.constant(0.1, shape=[OUTPUT_NODE]))
y = inference(x, None, weights1, biases1, weights2, biases2)
#这里的y是没有使用滑动平均,仅仅是前向传播的结果
global_step = tf.Variable(0, trainable=False)#这里将训练轮数设定为不可训练的参数
#滑动平均值,改变训练轮数的变量
variable_averages=tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
variable_averages_op = variable_averages.apply(tf.trainable_variables())
average_y=inference(x, variable_averages, weights1, biases1, weights2, biases2)
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1))
#使用了交叉熵,交叉熵使用的是没有经过滑动平均的结果y
#书上给出的代码错了,此处必须指定logits和labels,看名字就知道,logits是计算结果,labels是正确答案
cross_entropy_mean = tf.reduce_mean(cross_entropy)
#L2正则化
regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE)#损失函数
regularization = regularizer(weights1) + regularizer(weights2)
loss = cross_entropy_mean + regularization
#设置指数衰减的学习率
learning_rate = tf.train.exponential_decay(
LEARNING_RATE_BASE,
global_step,
mnist.train.num_examples / BATCH_SIZE,
LEARNING_RATE_DECAY)
#梯度下降优化损失函数
train_step=tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
with tf.control_dependencies([train_step, variable_averages_op]):
train_op=tf.no_op("train")
#tf.control_dependencies([a,b]): c=tf.no_op()和c=tf.group(a,b)等价,就是没有执行顺序先后,是平行关系
correct_prediction=tf.equal(tf.arg_max(average_y, 1), tf.argmax(y_,1))
accuracy=tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
#初始化会话并开始训练过程,大概分两步,准备数据和开始迭代
with tf.Session() as sess:
tf.initialize_all_variables().run()
#验证数据
validated_feed={x: mnist.validation.images, y_: mnist.validation.labels}
#测试数据
test_feed = {x: mnist.test.images, y_: mnist.test.labels}
#迭代训练
for i in range(TRAINING_STEPS):
#每1000轮输出一次在验证数据集上的测试结果
if i % 1000 ==0:
validate_acc = sess.run(accuracy, feed_dict=validated_feed)
print("After %d training step(s), validation accuracy using average model is %g" %(i, validate_acc))
#产生本轮使用的一个batch的训练数据,并运行训练过程
xs, ys = mnist.train.next_batch(BATCH_SIZE)
sess.run(train_op, feed_dict={x:xs, y_:ys})
#训练结束后检测最终成功率
test_acc = sess.run(accuracy, feed_dict=test_feed)
print("After %d training step(s), test accuracy using average model is %g" %(TRAINING_STEPS, test_acc))
def main(argv=None):
mnist = input_data.read_data_sets("/path/to/MNIST_data", one_hot=True)#自动下载数据
train(mnist)#开始训练
#tf提供的中程序入口,tf.app.run()会自动调用main函数
if __name__ == '__main__':
tf.app.run()
结果:
After 0 training step(s), validation accuracy using average model is 0.0902
After 1000 training step(s), validation accuracy using average model is 0.9748
After 2000 training step(s), validation accuracy using average model is 0.9802
After 3000 training step(s), validation accuracy using average model is 0.9798