《TensorFlow实战Google深度学习框架(第2版)》5.MNIST数字识别问题

MNIST数据处理

# 读取MNISI数据集
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/path/to/MNIST_data/", one_hot=True)
Extracting /path/to/MNIST_data/train-images-idx3-ubyte.gz
Extracting /path/to/MNIST_data/train-labels-idx1-ubyte.gz
Extracting /path/to/MNIST_data/t10k-images-idx3-ubyte.gz
Extracting /path/to/MNIST_data/t10k-labels-idx1-ubyte.gz
print("Training data size: ", mnist.train.num_examples)
print("Validating data size: ", mnist.validation.num_examples)
print("Testing data size: ", mnist.test.num_examples)
# print("Example training data: ", mnist.train.images[0])
print("Example training data label: ", mnist.train.labels[0])
Training data size:  55000
Validating data size:  5000
Testing data size:  10000
Example training data label:  [0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]

神经网络模型训练及不同模型结果对比

TensorFlow训练神经网络
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

INPUT_NODE = 784 # 输入层的节点数
OUTPUT_NODE = 10 # 输出层的节点数
LAYER1_NODE = 500 # 隐藏层节点数

BATCH_SIZE = 100 # 一个batch中训练的数据个数
LEARNING_RATE_BASE = 0.8 # 基础学习率
LEARNING_RATE_DECAY = 0.99 # 学习率的衰减率
REGULARIZATION_RATE = 0.0001 # 描述模型复杂度的正则化项在损失函数中的系数
TRAINING_STEPS = 10000 # 训练轮数
MOVING_AVERAGE_DECAY = 0.99 # 滑动平均衰减率

def inference(input_tensor, avg_class, weights1, biases1, weights2, biases2):
    """辅助函数用来计算前向传播结果,使用ReLU做为激活函数,实现去线性化"""
    if avg_class == None:
        # 不使用滑动平均
        layer1 = tf.nn.relu(tf.matmul(input_tensor, weights1) + biases1)
        return tf.matmul(layer1, weights2) + biases2
    else:
        layer1 = tf.nn.relu(tf.matmul(input_tensor, avg_class.average(weights1)) + avg_class.average(biases1))
        return tf.matmul(layer1, avg_class.average(weights2)) + avg_class.average(biases2)
    
def train(mnist):
    """训练模型生成"""
    x = tf.placeholder(tf.float32, [None, INPUT_NODE], name="x-input")
    y_ = tf.placeholder(tf.float32, [None, OUTPUT_NODE], name="y-input")
    # 权重和偏置项参数生成
    weights1 = tf.Variable(tf.truncated_normal([INPUT_NODE, LAYER1_NODE], stddev=0.1))
    biases1  = tf.Variable(tf.constant(0.1, shape=[LAYER1_NODE]))
    weights2 = tf.Variable(tf.truncated_normal([LAYER1_NODE, OUTPUT_NODE], stddev=0.1))
    biases2  = tf.Variable(tf.constant(0.1, shape=[OUTPUT_NODE]))
    # 计算在当前参数下神经网络前向传播的结果
    y = inference(x, None, weights1, biases1, weights2, biases2)
    
    # 定义存储训练轮数的变量。不使用滑动平均为False
    global_step = tf.Variable(0, trainable=False)
    # 给定滑动平均衰减率和训练轮数的变量下,初始化滑动平均类
    variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
    # 在所有代表神经网络参数的变量上使用滑动平均
    variables_averages_op = variable_averages.apply(tf.trainable_variables())
    # 计算使用了滑动平均之后的前向传播结果
    average_y = inference(x, variable_averages, weights1, biases1, weights2, biases2)
    
    # 交叉熵作为损失函数
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1))
    # 计算当前batch下的平均交叉熵
    cross_entropy_mean = tf.reduce_mean(cross_entropy) 
    # 计算L2正则化损失函数
    regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE)
    regularization = regularizer(weights1) + regularizer(weights2)
    # 总损失等于交叉熵损失和正则化损失的和。
    loss = cross_entropy_mean + regularization
    
    # 设置指数衰减的学习率
    learning_rate = tf.train.exponential_decay(LEARNING_RATE_BASE, \
                                               global_step, \
                                               mnist.train.num_examples/BATCH_SIZE, \
                                               LEARNING_RATE_DECAY, \
                                               staircase=True)
    
    # 梯度下降来优化损失
    train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
    
    # 在训练神经网络模型时,每过一遍数据既需要通过反向传播来更新神经网络中的参数,又要更新每一个参数的滑动平均值。
    # 为了一次完成多个操作,TensorFlow提供了tf.control_dependencies和tf.group两种机制。
    with tf.control_dependencies([train_step, variables_averages_op]):
        train_op = tf.no_op(name="train")
    
    # 预测值
    correct_prediction = tf.equal(tf.argmax(average_y, 1), tf.argmax(y_, 1))
    # 正确率
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    
    ## 会话建立
    with tf.Session() as sess:
        tf.global_variables_initializer().run()
        
        # 准备验证数据和测试数据
        validate_feed = {x:mnist.validation.images, y_:mnist.validation.labels}
        test_feed = {x:mnist.test.images, y_:mnist.test.labels}
        
        # 迭代训练
        for i in range(TRAINING_STEPS):
            if i % 1000 == 0:
                validate_acc = sess.run(accuracy, feed_dict=validate_feed)
                print("After %d training step(s), validation accuracy using average model is %g" % (i, validate_acc))
            
            # 产生这一轮使用的batch数据
            xs, ys = mnist.train.next_batch(BATCH_SIZE)
            sess.run(train_op, feed_dict={x:xs, y_:ys})
        
        # 最终的正确率
        test_acc = sess.run(accuracy, feed_dict=test_feed)
        print("After %d training step(s), test accuracy using average model is %g" % (TRAINING_STEPS, test_acc))

def main(argv=None):
    mnist = input_data.read_data_sets("/path/to/MNIST_data", one_hot=True)
    train(mnist)

if __name__=='__main__':
    main()
Extracting /path/to/MNIST_data\train-images-idx3-ubyte.gz
Extracting /path/to/MNIST_data\train-labels-idx1-ubyte.gz
Extracting /path/to/MNIST_data\t10k-images-idx3-ubyte.gz
Extracting /path/to/MNIST_data\t10k-labels-idx1-ubyte.gz
WARNING:tensorflow:From C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\framework\op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.
Instructions for updating:
Colocations handled automatically by placer.
After 0 training step(s), validation accuracy using average model is 0.0478
After 1000 training step(s), validation accuracy using average model is 0.9774
After 2000 training step(s), validation accuracy using average model is 0.9802
After 3000 training step(s), validation accuracy using average model is 0.9822
After 4000 training step(s), validation accuracy using average model is 0.9832
After 5000 training step(s), validation accuracy using average model is 0.9832
After 6000 training step(s), validation accuracy using average model is 0.9836
After 7000 training step(s), validation accuracy using average model is 0.9834
After 8000 training step(s), validation accuracy using average model is 0.9842
After 9000 training step(s), validation accuracy using average model is 0.984
After 10000 training step(s), test accuracy using average model is 0.9836
使用验证数据集判断模型效果

验证数据的选取方法是非常重要的,一般来说选取的验证数据分布越接近测试数据分布,模型在验证数据上的表现越可以体现模型在测试数据上的表现。此外还有交叉验证
在上述代码迭代训练处中添加代码,得到每1000轮迭代后,使用了滑动平均的模型在验证数据和测试数据上的正确率。

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

INPUT_NODE = 784 # 输入层的节点数
OUTPUT_NODE = 10 # 输出层的节点数
LAYER1_NODE = 500 # 隐藏层节点数

BATCH_SIZE = 100 # 一个batch中训练的数据个数
LEARNING_RATE_BASE = 0.8 # 基础学习率
LEARNING_RATE_DECAY = 0.99 # 学习率的衰减率
REGULARIZATION_RATE = 0.0001 # 描述模型复杂度的正则化项在损失函数中的系数
TRAINING_STEPS = 10000 # 训练轮数
MOVING_AVERAGE_DECAY = 0.99 # 滑动平均衰减率

def inference(input_tensor, avg_class, weights1, biases1, weights2, biases2):
    """辅助函数用来计算前向传播结果,使用ReLU做为激活函数,实现去线性化"""
    if avg_class == None:
        # 不使用滑动平均
        layer1 = tf.nn.relu(tf.matmul(input_tensor, weights1) + biases1)
        return tf.matmul(layer1, weights2) + biases2
    else:
        layer1 = tf.nn.relu(tf.matmul(input_tensor, avg_class.average(weights1)) + avg_class.average(biases1))
        return tf.matmul(layer1, avg_class.average(weights2)) + avg_class.average(biases2)
    
def train(mnist):
    """训练模型生成"""
    x = tf.placeholder(tf.float32, [None, INPUT_NODE], name="x-input")
    y_ = tf.placeholder(tf.float32, [None, OUTPUT_NODE], name="y-input")
    # 权重和偏置项参数生成
    weights1 = tf.Variable(tf.truncated_normal([INPUT_NODE, LAYER1_NODE], stddev=0.1))
    biases1  = tf.Variable(tf.constant(0.1, shape=[LAYER1_NODE]))
    weights2 = tf.Variable(tf.truncated_normal([LAYER1_NODE, OUTPUT_NODE], stddev=0.1))
    biases2  = tf.Variable(tf.constant(0.1, shape=[OUTPUT_NODE]))
    # 计算在当前参数下神经网络前向传播的结果
    y = inference(x, None, weights1, biases1, weights2, biases2)
    
    # 定义存储训练轮数的变量。不使用滑动平均为False
    global_step = tf.Variable(0, trainable=False)
    # 给定滑动平均衰减率和训练轮数的变量下,初始化滑动平均类
    variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
    # 在所有代表神经网络参数的变量上使用滑动平均
    variables_averages_op = variable_averages.apply(tf.trainable_variables())
    # 计算使用了滑动平均之后的前向传播结果
    average_y = inference(x, variable_averages, weights1, biases1, weights2, biases2)
    
    # 交叉熵作为损失函数
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1))
    # 计算当前batch下的平均交叉熵
    cross_entropy_mean = tf.reduce_mean(cross_entropy) 
    # 计算L2正则化损失函数
    regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE)
    regularization = regularizer(weights1) + regularizer(weights2)
    # 总损失等于交叉熵损失和正则化损失的和。
    loss = cross_entropy_mean + regularization
    
    # 设置指数衰减的学习率
    learning_rate = tf.train.exponential_decay(LEARNING_RATE_BASE, \
                                               global_step, \
                                               mnist.train.num_examples/BATCH_SIZE, \
                                               LEARNING_RATE_DECAY, \
                                               staircase=True)
    
    # 梯度下降来优化损失
    train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
    
    # 在训练神经网络模型时,每过一遍数据既需要通过反向传播来更新神经网络中的参数,又要更新每一个参数的滑动平均值。
    # 为了一次完成多个操作,TensorFlow提供了tf.control_dependencies和tf.group两种机制。
    with tf.control_dependencies([train_step, variables_averages_op]):
        train_op = tf.no_op(name="train")
    
    # 预测值
    correct_prediction = tf.equal(tf.argmax(average_y, 1), tf.argmax(y_, 1))
    # 正确率
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    
    ## 会话建立
    with tf.Session() as sess:
        tf.global_variables_initializer().run()
        
        # 准备验证数据和测试数据
        validate_feed = {x:mnist.validation.images, y_:mnist.validation.labels}
        test_feed = {x:mnist.test.images, y_:mnist.test.labels}
        
        # 迭代训练
        for i in range(TRAINING_STEPS):
            if i % 1000 == 0:
                validate_acc = sess.run(accuracy, feed_dict=validate_feed)
#                 print("After %d training step(s), validation accuracy using average model is %g" % (i, validate_acc))
                test_acc = sess.run(accuracy, feed_dict=test_feed)
                print("After %d training step(s), validation accuracy using average model is %g, test accutacy using average model is %g" %\
                      (i, validate_acc, test_acc))
            # 产生这一轮使用的batch数据
            xs, ys = mnist.train.next_batch(BATCH_SIZE)
            sess.run(train_op, feed_dict={x:xs, y_:ys})
        
        # 最终的正确率
        test_acc = sess.run(accuracy, feed_dict=test_feed)
        print("After %d training step(s), test accuracy using average model is %g" % (TRAINING_STEPS, test_acc))

def main(argv=None):
    mnist = input_data.read_data_sets("/path/to/MNIST_data", one_hot=True)
    train(mnist)

if __name__=='__main__':
    main()
Extracting /path/to/MNIST_data\train-images-idx3-ubyte.gz
Extracting /path/to/MNIST_data\train-labels-idx1-ubyte.gz
Extracting /path/to/MNIST_data\t10k-images-idx3-ubyte.gz
Extracting /path/to/MNIST_data\t10k-labels-idx1-ubyte.gz
After 0 training step(s), validation accuracy using average model is 0.0774, test accutacy using average model is 0.0753
After 1000 training step(s), validation accuracy using average model is 0.9778, test accutacy using average model is 0.9758
After 2000 training step(s), validation accuracy using average model is 0.9816, test accutacy using average model is 0.9799
After 3000 training step(s), validation accuracy using average model is 0.9834, test accutacy using average model is 0.9815
After 4000 training step(s), validation accuracy using average model is 0.9812, test accutacy using average model is 0.9828
After 5000 training step(s), validation accuracy using average model is 0.9838, test accutacy using average model is 0.9827
After 6000 training step(s), validation accuracy using average model is 0.983, test accutacy using average model is 0.9832
After 7000 training step(s), validation accuracy using average model is 0.9842, test accutacy using average model is 0.9836
After 8000 training step(s), validation accuracy using average model is 0.9836, test accutacy using average model is 0.9838
After 9000 training step(s), validation accuracy using average model is 0.9832, test accutacy using average model is 0.9834
After 10000 training step(s), test accuracy using average model is 0.9834
不同模型效果的比较

神经网络的结构对最终模型的效果有本质性的影响。
滑动平均模型、指数衰减的学习率和正则化损失对MNIST数据集最终正确率的提升效果不明显。但通过进一步分析实验的结果,可以得出这些优化方法确实可以解决第4章中提到的神经网络优化过程中的问题。当需要解决的问题和使用到的神经网络模型更加复杂时,这些优化方法将更有可能对训练效果产生更大的影响。

5.3变量管理

当神经网络的结构更加复杂、参数更多时,就需要一个更好的方式来传递和管理神经网络中的参数了。TensorFlow提供了通过变量名称来创建或者获取一个变量的机制。通过这个机制,在不同的函数中可以直接通过变量的名字来使用变量,而不需要将变量通过参数的形式到处传递。TensorFlow中通过变量名称获取变量的机制主要是通过tf.get_variable和tf.variable_scope函数实现的。

# 这两个定义是等价的,最大的区别在于指定变量名称的参数
# 调用时提供维度(shape)信息以及初始化方法(initializer)
# v = tf.get_variable("v", shape=[1], initializer=tf.constan_initializer(1.0))
# v = tf.Variable(tf.constant(1.0), shape=[1], name="v")

通过tf.variable_scope函数来控制tf.get_variable函数获取己经创建过的变量。

import tensorflow as tf

# 在名字为foo的命名空间内创建名字为v的变量
with tf.variable_scope("foo"):
    v = tf.get_variable("v", [1], initializer=tf.constant_initializer(1.0))

# 已经存在,下面代码会报错                        
# with tf.variable_scope("foo"):
#     v = tf.get_variable("v", [1])

# 在生成上下文管理器时,将参数reuse设置成True,这样tf.get_variable函数将直接获取已经声明的变量。
with tf.variable_scope("foo", reuse=True):
    v1 = tf.get_variable("v", [1])
print(v == v1)

# 将参敖reuse设置为True时,tf.variable_scope将只能获取巳经创建过的变量。因为在命名空间bar中还没有创建变量v,
# 所以以下代码将会报错:
# with tf.variable_scope("bar", reuse=True):
#     v = tf.get_variable("v", [1])
True

TensorFlow中tf.variable_scope函数是可以嵌套的。下面的程序说明了当tf.variable_scope函数嵌套时,reuse参数的取值是如何确定的。

with tf.variable_scope("root"):
    print(tf.get_variable_scope().reuse) # 获取当前上下文管理器中reuse参数的取值,此处为False
    
    with tf.variable_scope("foo", reuse=True):
        print(tf.get_variable_scope().reuse) # True
        
        with tf.variable_scope("bar"):
            print(tf.get_variable_scope().reuse) # 没设置,为上层True
            
    print(tf.get_variable_scope().reuse) # 退出设置后,又回到False
False
True
True
False

通过tf.variable_scope来管理变量的名称。

import tensorflow as tf
v = tf.get_variable("v", [1])
print(v.name) # 输出v:0。 "v"变量名称,":0"表示这个变量时生成变量这个运算的第一个结果

with tf.variable_scope("foo"):
    v2 = tf.get_variable("v", [1])
print(v2.name) # 输出foo/v:0. "foo"命名空间的名称

with tf.variable_scope("foo"):
    with tf.variable_scope("bar"):
        v3 = tf.get_variable("v", [1])
        print(v3.name)
        
v4 = tf.get_variable("v1", [1])
print(v4.name)

# 创建一个名称为空的命名空间,并设置reuse=True
with tf.variable_scope("", reuse=True):
    v5 = tf.get_variable("foo/bar/v", [1])
    print(v5 == v3)
    v6 = tf.get_variable("v1", [1])     
    print(v6 == v4)
WARNING:tensorflow:From C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\framework\op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.
Instructions for updating:
Colocations handled automatically by placer.
v:0
foo/v:0
foo/bar/v:0
v1:0
True
True

tensorflow模型持久化

持久化代码的实现

TensorFlow提供了一个非常简单的API来保存和还原一个神经网络模型。这个API就是tf.train_Saver类。

# 保存模型
import tensorflow as tf

v1 = tf.Variable(tf.constant(1.0, shape=[1]), name="v1")
v2 = tf.Variable(tf.constant(2.0, shape=[1]), name="v2")
result = v1 + v2

init_op = tf.global_variables_initializer()
# 声明类用于保存模型
saver = tf.train.Saver()

with tf.Session() as sess:
    sess.run(init_op)
    # 将模型保存
    saver.save(sess, r"/path/to/model/model.ckpt")
WARNING:tensorflow:From C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\framework\op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.
Instructions for updating:
Colocations handled automatically by placer.

第一个文件为model.ckpt.meta,它保存了TensorFlow计算图的结构。
第二个文件为model.ckpt,这个文件中保存了TensorFlow程序中每一个变量的取值。
最后一个文件为checkpoint文件,这个文件中保存了一个目录下所有的模型文件列表。
在这里插入图片描述

# 调用模型
# 需要重新启动,清除变量
import tensorflow as tf

v1 = tf.Variable(tf.constant(1.0, shape=[1]), name="v1")
v2 = tf.Variable(tf.constant(2.0, shape=[1]), name="v2")
result = v1 + v2

saver = tf.train.Saver()

with tf.Session() as sess:
    # 加载已经保存的模型
    saver.restore(sess, r"/path/to/model/model.ckpt")
    print(sess.run(result))
WARNING:tensorflow:From C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\framework\op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.
Instructions for updating:
Colocations handled automatically by placer.
WARNING:tensorflow:From C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\training\saver.py:1266: checkpoint_exists (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.
Instructions for updating:
Use standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from /path/to/model/model.ckpt
[3.]
# 也得重启清除变量
import tensorflow as tf
# 直接加载持久化的图
saver = tf.train.import_meta_graph(r"/path/to/model/model.ckpt.meta")
with tf.Session() as sess:
    saver.restore(sess, r"/path/to/model/model.ckpt")
    print(sess.run(tf.get_default_graph().get_tensor_by_name("add:0")))

INFO:tensorflow:Restoring parameters from /path/to/model/model.ckpt
[3.]

支持在保存或者加载时给变量重命名。

import tensorflow as tf
# 这里声明的变量和已经保存的模型中变量的名称不同。
vl = tf.Variable(tf.constant(1.0, shape=[1]), name="other-vl")
v2 = tf.Variable(tf.constant(2.0, shape=[1]), name="other-v2")
#如果直接使用tf.train.Saver()来加载模型会报变量找不到的错误。

#使用一个字典来重命名变量可以就可以加载原来的模型了。这个字典指定了原来名称为vl的变量现在加载到变量vl中
saver= tf.train.Saver({"vl": vl, "v2": v2})

方便了滑动平均模型的使用

import tensorflow as tf

v = tf.Variable(0, dtype=tf.float32, name="v")
# 在没有申明滑动平均模型时只有一个变量v
for variables in tf.global_variables():
    print(variables.name)

ema = tf.train.ExponentialMovingAverage(0.99)
maintain_averages_op = ema.apply(tf.global_variables())
# 在申明滑动平均模型之后,TensorFlow会自动生成一个影子变量
for variables in tf.global_variables():
    print(variables.name)

saver = tf.train.Saver()
with tf.Session() as sess:
    init_op = tf.global_variables_initializer()
    sess.run(init_op)

    sess.run(tf.assign(v, 10))
    sess.run(maintain_averages_op)
    # 保存时,TensorFlow会将v:0和v/ExponentialMovingAverage:0两个变量都存下来
    saver.save(sess, r"/path/to/model/model.ckpt")
    print(sess.run([v, ema.average(v)]))
WARNING:tensorflow:From C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\framework\op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.
Instructions for updating:
Colocations handled automatically by placer.
v:0
v:0
v/ExponentialMovingAverage:0
[10.0, 0.099999905]

通过变量重命名直接读取变量的滑动平均值

v = tf.Variable(0, dtype=tf.float32, name="v")
# 通过变量重命名将原来变量V的滑动平均值直接赋值给v
saver = tf.train.Saver({"v/ExponentialMovingAverage": v})
with tf.Session() as sess:
    saver.restore(sess, r"/path/to/model/model.ckpt")
    print(sess.run(v))
WARNING:tensorflow:From C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\training\saver.py:1266: checkpoint_exists (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.
Instructions for updating:
Use standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from /path/to/model/model.ckpt
0.099999905

5.5 TensoFlow最佳实践样例程序

将训练和测试分成两个独立的程序,这可以使得每一个组件更加灵活。
熟悉之后回过头再来看

mnist_inference.py

import tensorflow as tf

# 定义神经网络结构相关的参数
INPUT_NODE = 784
OUTPUT_NODE = 10
LAYER1_NODE = 500

# 通边tf.get_variable函数来获取变量。在训练神经网络时会创建这些变量;
# 在测试时会通过保存的模型加载这些变量的取值。而且更加方便的是,因为可以在变量加载时将滑动平均变量重命名,所以可以直接通过同样的名字在训练时使用变量自身,
# 而在测试时使用变量的滑动平均值。在这个的函数也会将变量的正则化损失加入损失集合。
def get_weight_variable(shape, regularizer):
    weights = tf.get_variable("weights", shape, initializer=tf.truncated_normal_initializer(stddev=0.1))
    # 当给出了正则化生成函数时,将当前变量的正则化损失加入名字为losses的集合。
    # 在这里使用了add_to_collection函数将一个张量加入一个集合,而这个集合的名称为losses.
    # 这是自定义的集合,不在TensorFlow自动管理的集合列表中。
    if regularizer != None: 
        tf.add_to_collection('losses', regularizer(weights))
    return weights

# 定义神经网络的前向传播过程
def inference(input_tensor, regularizer):
    # 声明第一层神经网络的变量并完成前向传播过程
    with tf.variable_scope('layer1'):
        weights = get_weight_variable([INPUT_NODE, LAYER1_NODE], regularizer)
        biases = tf.get_variable("biases", [LAYER1_NODE], initializer=tf.constant_initializer(0.0))
        layer1 = tf.nn.relu(tf.matmul(input_tensor, weights) + biases)
    
    # 声明第二层神经网络的变量并完成前向传播过程
    with tf.variable_scope('layer2'):
        weights = get_weight_variable([LAYER1_NODE, OUTPUT_NODE], regularizer)
        biases = tf.get_variable("biases", [OUTPUT_NODE], initializer=tf.constant_initializer(0.0))
        layer2 = tf.matmul(layer1, weights) + biases

    return layer2

mnist_train.py

import os

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

import mnist_inference

# 配置神经网络的参数
BATCH_SIZE = 100
LEARNING_RATE_BASE = 0.8
LEARNING_RATE_DECAY = 0.99
REGULARIZATION_RATE = 0.0001
TRAINING_STEPS = 30000
MOVING_AVERAGE_DECAY = 0.99
# 模型保存的路径和文件名
MODEL_SAVE_PATH = "E:/path/to/MNIST_model/"
MODEL_NAME = "mnist_model"


def train(mnist):
    # 定义输入输出palceholder
    x = tf.placeholder(tf.float32, [None, mnist_inference.INPUT_NODE], name='x-input')
    y_ = tf.placeholder(tf.float32, [None, mnist_inference.OUTPUT_NODE], name='y-input')

    regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE)
    y = mnist_inference.inference(x, regularizer)
    global_step = tf.Variable(0, trainable=False)

    # 损失函数、学习率、滑动平均操作以及训练过程
    variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
    variables_averages_op = variable_averages.apply(tf.trainable_variables())
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1))
    cross_entropy_mean = tf.reduce_mean(cross_entropy)
    loss = cross_entropy_mean + tf.add_n(tf.get_collection('losses'))
    learning_rate = tf.train.exponential_decay(
        LEARNING_RATE_BASE,
        global_step,
        mnist.train.num_examples / BATCH_SIZE, LEARNING_RATE_DECAY,
        staircase=True)
    train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
    with tf.control_dependencies([train_step, variables_averages_op]):
        train_op = tf.no_op(name='train')

    # 初始化TensorFlow持久化类
    saver = tf.train.Saver()
    with tf.Session() as sess:
        tf.global_variables_initializer().run()
        # 在训练过程中不再测试模型在验证数据上的表现,验证和测试的过程将有一个独立的程序来完成
        for i in range(TRAINING_STEPS):
            xs, ys = mnist.train.next_batch(BATCH_SIZE)
            _, loss_value, step = sess.run([train_op, loss, global_step], feed_dict={x: xs, y_: ys})
            if i % 1000 == 0:
                print("After %d training step(s), loss on training batch is %g." % (step, loss_value))
                saver.save(sess, os.path.join(MODEL_SAVE_PATH, MODEL_NAME), global_step=global_step)


def main(argv=None):
    mnist = input_data.read_data_sets("E:/path/to/MNIST_data", one_hot=True)
    train(mnist)

if __name__ == '__main__':
    tf.app.run(main)

在这里插入图片描述
mnist_eval.py

import time

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

import mnist_inference
import mnist_train

# 加载的时间间隔。
EVAL_INTERVAL_SECS = 1

def evaluate(mnist):
    with tf.Graph().as_default() as g:
        # 定义输入输出的格式
        x = tf.placeholder(tf.float32, [None, mnist_inference.INPUT_NODE], name='x-input')
        y_ = tf.placeholder(tf.float32, [None, mnist_inference.OUTPUT_NODE], name='y-input')
        validate_feed = {x: mnist.validation.images, y_: mnist.validation.labels}

        # 直接通过调用封装好的函数来计算前向传播的结果。因为测试时不关注正则化损失的值,所以这里用于计算正则化损失的函数被设置为None.
        y = mnist_inference.inference(x, None)
        # 使用前向传播的结果计算正确率。如果需要对未知的样例进行分类,那么使用tf.argmax(y,1)就可以得到输入样例的预测类别了。
        correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

        # 通过变量重命名的方式来加载模型,这样在前向传播的过程中就不需要调用求滑动平均的函数来获取平均值了。
        # 这样就可以完全共用mnist_inference.py中定义的前向传播过程。
        variable_averages = tf.train.ExponentialMovingAverage(mnist_train.MOVING_AVERAGE_DECAY)
        variables_to_restore = variable_averages.variables_to_restore()
        saver = tf.train.Saver(variables_to_restore)

        while True:
            with tf.Session() as sess:
                # 自动找到目录中最新模型的文件名
                ckpt = tf.train.get_checkpoint_state(mnist_train.MODEL_SAVE_PATH)
                if ckpt and ckpt.model_checkpoint_path:
                    # 加载模型
                    saver.restore(sess, ckpt.model_checkpoint_path)
                    # 通过文件名得到模型保存时迭代的轮数
                    global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
                    accuracy_score = sess.run(accuracy, feed_dict=validate_feed)
                    print("After %s training step(s), validation accuracy = %g" % (global_step, accuracy_score))
                else:
                    print('No checkpoint file found')
                    return
            time.sleep(EVAL_INTERVAL_SECS)

def main(argv=None):
    mnist = input_data.read_data_sets("E:/path/to/MNIST_data", one_hot=True)
    evaluate(mnist)

if __name__ == '__main__':
    main()

  • 0
    点赞
  • 6
    收藏
    觉得还不错? 一键收藏
  • 7
    评论
评论 7
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值