前向传播 & 参数定义
# -*- coding:utf-8 -*-
#定义前向传播的过程以及神经网络中的参数
import tensorflow as tf
#定义神经网络结构相关的参数
INPUT_NODE = 784 # 28*28
OUTPUT_NODE = 10 # 0-9
LAYER1_NODE = 500 #隐藏层节点个数
#获取w权值 weight
def get_weight_variable(shape,regularizer):
#按照正态分布,标准差0.1 随机生成w 权值
weights = tf.get_variable("weights",shape,initializer = tf.truncated_normal_initializer(stddev=0.1))
#将当前变量的正则化损失loss加入到 losses 集合
if regularizer != None:
tf.add_to_collection('losses',regularizer(weights))
return weights
#定义神经网络的前向传播过程
def inference(input_tensor,regularizer):
#声明第一层神经网络的变量并完成前向传播过程
with tf.variable_scope('layer1'):
weights = get_weight_variable([INPUT_NODE,LAYER1_NODE],regularizer)
biases = tf.get_variable("biases",[LAYER1_NODE],initializer=tf.constant_initializer(0.0))
layer1 = tf.nn.relu(tf.matmul(input_tensor,weights) + biases)
with tf.variable_scope('layer2'):
weights = get_weight_variable([LAYER1_NODE,OUTPUT_NODE],regularizer)
biases = tf.get_variable("biases",[OUTPUT_NODE],initializer=tf.constant_initializer(0.0))
layer2 = tf.matmul(layer1,weights) + biases
#返回最后的output 结果
return layer2
训练过程以及模型保存:
#-*- coding:utf-8 -*-
#训练神经网络
import os
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
#加载1_mnist_inference.py里面定义的常量和前向传播函数
import mnist_inference_1
#配置神经网络参数
BATCH_SIZE = 100
LEARNING_RATE_BASE = 0.8
LEARNING_RATE_DECAY = 0.99
REGULARAZTION_RATE = 0.0001
TRAINNING_STEPS = 3000
MOVING_AVERAGE_DECAY = 0.99
#保存模型
MODEL_SAVE_PATH = "F:\\AI_code\\Tensorflow_Book\\model"
MODEL_NAME = "MNIST_model.ckpt"
def train(mnist):
#定义输入输出占位符placeholder
x = tf.placeholder(tf.float32,[None,mnist_inference_1.INPUT_NODE],name='x-input')
y_ = tf.placeholder(tf.float32,[None,mnist_inference_1.OUTPUT_NODE],name='y-input')
regularizer = tf.contrib.layers.l2_regularizer(REGULARAZTION_RATE)
#直接使用1_mnist_inference.py中定义的前向传播过程
y = mnist_inference_1.inference(x,regularizer)
global_step = tf.Variable(0,trainable=False)
#定义损失函数,学习率,滑动平均操作以及训练过程
#1,滑动平均操作
variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY,global_step)
variables_averages_op = variable_averages.apply(tf.trainable_variables())
#2,交叉熵
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y,labels=tf.argmax(y_,1))
cross_entropy_mean = tf.reduce_mean(cross_entropy)
#3.总损失
loss = cross_entropy_mean + tf.add_n(tf.get_collection('losses'))
#4.学习率
learning_rate = tf.train.exponential_decay(LEARNING_RATE_BASE,
global_step,
mnist.train.num_examples / BATCH_SIZE,
LEARNING_RATE_DECAY)
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss,global_step=global_step)
with tf.control_dependencies([train_step,variables_averages_op]):
#下面的内容必须在train_step,Variables_averages_op执行之后才能正常执行
train_op = tf.no_op('train')
''' # 解释 tf.no_op
As the documentation says, tf.no_op() does nothing.
However, when you create a tf.no_op() inside a with tf.control_dependencies([x, y, z]): block,
the op will gain control dependencies on ops x, y, and z.
Therefore it can be used to group together a set of side effecting ops,
and give you a single op to pass to sess.run() in order to run all of them in a single step.
'''
#保存模型
saver = tf.train.Saver()
with tf.Session() as sess:
tf.global_variables_initializer().run()
for i in range(TRAINNING_STEPS):
xs, ys = mnist.train.next_batch(BATCH_SIZE)
_, loss_value, step = sess.run([train_op,loss,global_step],feed_dict={x: xs, y_: ys})
#每1000轮保存一次模型
if i %1000 == 0:
#输出当前的训练情况,通过损失函数的大小大概了解训练的情况
print("\ni = %d , step = %d" % (i,step))
print("After %d trainning step(s), loss on trainning batch is %g." % (step,loss_value))
#保存
saver.save(sess,os.path.join(MODEL_SAVE_PATH,MODEL_NAME),global_step=global_step)
def main(argv=None):
mnist = input_data.read_data_sets("../DataSet/",one_hot=True)
train(mnist)
if __name__ == '__main__':
tf.app.run()
训练结果:
After 28001 trainning step(s), loss on trainning batch is 0.0323391.
After 29001 trainning step(s), loss on trainning batch is 0.0378978.
[Done] exited with code=0 in 433.749 seconds
加载模型,测试集验证准确率:
# -*- coding:utf-8 -*-
import time
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
#加载 mnist_inference_1.py & mnist_train_2.py 中定义的常量和函数
import mnist_inference_1
import mnist_train_2
#每10s加载一次最新的模型,并在测试数据上测试最新模型的正确率
EVAL_INTERVAL_SECS = 10
def evaluate(mnist):
with tf.Graph().as_default() as g:
#定义输入输出的格式
x = tf.placeholder(tf.float32,[None,mnist_inference_1.INPUT_NODE],name='x-input')
y_ = tf.placeholder(tf.float32,[None,mnist_inference_1.OUTPUT_NODE],name='y-input')
validate_feed = {x:mnist.validation.images,
y_:mnist.validation.labels}
# 用mnist_inference_1 中封装好的前向传播函数计算前向传播结果。
# 测试过程不关注正则化损失的结果(主要用于训练过程模型的参数优化,现在模型已经确定了)
y = mnist_inference_1.inference(x,None) #第二个参数就是正则化 regularizer,传none
#使用前向传播的结果计算正确率
correct_prediction = tf.equal(tf.argmax(y,1),tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))
# 通过变量重命名的方式加载模型,获取每一轮前面的滑动平均操作后的平均值
variable_averages = tf.train.ExponentialMovingAverage(mnist_train_2.MOVING_AVERAGE_DECAY)
variables_to_restore = variable_averages.variables_to_restore()
saver = tf.train.Saver(variables_to_restore)
#每隔固定时间加载模型并计算正确率,用来检测训练过程中正确率的变化
while True:
with tf.Session() as sess:
# tf.train.get_checkpoint_state 函数会通过checkpoint文件自动找到目录中最新的模型的文件名
ckpt = tf.train.get_checkpoint_state(mnist_train_2.MODEL_SAVE_PATH)
if ckpt and ckpt.model_checkpoint_path:
#加载模型
saver.restore(sess, ckpt.model_checkpoint_path)
#通过文件名解析模型保存时迭代的次数
print("\n ckpt.model_checkpoint_path = %s"% ckpt.model_checkpoint_path)
#print("ckpt.model_checkpoint_path.split('/')[-1] = %s"%ckpt.model_checkpoint_path.split('/')[-1])
#global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
print("ckpt.model_checkpoint_path.split('-')[-1] = %s"%ckpt.model_checkpoint_path.split('-')[-1])
global_step = ckpt.model_checkpoint_path.split('-')[-1]
accuracy_score = sess.run(accuracy,feed_dict = validate_feed)
print("After %s trainning step(s) , validation accuracy = %g"%(global_step,accuracy_score))
else:
print("No checkpoint files found")
return
time.sleep(EVAL_INTERVAL_SECS)
def main(argv=None):
mnist = input_data.read_data_sets("../DataSet/",one_hot=True)
evaluate(mnist)
if __name__ == '__main__':
tf.app.run()