基于tensorflow的MNIST数字识别
坚持每周更新一篇博客,为了督促自己,当然主要是围绕两个领域,一个是所谓的人工智能领域,另一个则是围绕着android的开发,别的不说,直接进入正题,基于tensorflow的书写识别。
别的不说,先把所有的代码给大家。这段代码是基于tensorflow1.0+anaconda+苹果的mac系统+pycharm环境搭建的。
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
#MNISt数据集的相关函数
INPUT_NODE=784
OUTPUT_NODE=10
#配置神经网络的参数
LAYER1_NODE=500
BATCH_SIZE=100
LEARNING_RATE_BASE=0.8
LEARNING_RATE_DECAY=0.99
REGULARIZATION_RATE=0.0001
TRAINING_STEPS=3000000
MOVING_AVERAGE_DECAY=0.99
#一个辅助函数给定神经网络的输入和所有函数,计算神经网络的前向传播结果。在这里定义一个
def interence (input_tensor,avg_class,weightS1,biases1,weights2,biases2):
if avg_class==None:
layer1=tf.nn.relu(tf.matmul(input_tensor,weightS1)+biases1)
return tf.matmul(layer1,weights2)+biases2
else:
layer1=tf.nn.relu(tf.matmul(input_tensor,avg_class.average(weightS1))+avg_class.average(biases1))
return tf.matmul(layer1,avg_class.average(weights2))+avg_class.average(biases2)
#训练模型过程
def train(mnist):
x=tf.placeholder(tf.float32,[None,INPUT_NODE],name='x-onput')
y_=tf.placeholder(tf.float32,[None,OUTPUT_NODE],name='y-input')
#生成隐藏层参数
weights1=tf.Variable(
tf.truncated_normal([INPUT_NODE,LAYER1_NODE],stddev=0.1))
biases1=tf.Variable(tf.constant(0,1,shape=[LAYER1_NODE]))
#生成输出层参数
weights2=tf.Variable(tf.truncated_normal([LAYER1_NODE,OUTPUT_NODE],stddev=0.1))
biases2=tf.Variable(tf.constant(0.1,shape=[OUTPUT_NODE]))
#所以函数不会使用参数滑动平均值
y=interence(x,None,weights1,biases1,weights2,biases2)
#一般会将代表训练轮数指定为不可训练数据
global_step=tf.Variable(0,trainable=False)
#给定滑动平均的
variable_averages=tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY,global_step)
variable_averages_op=variable_averages.apply(tf.trainable_variables())
#所以当使用这个活动平均值是,需要明确调用average
average_y=interence(x,variable_averages,weights1,biases1,weights2,biases2)
#计算交叉謪作为刻画真实值与预测值之间的差距
cross_entropy=tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y,labels=tf.arg_max(y_,1))
#计算在当前batch中所有样例的交叉上平均值
cross_entropy_mean=tf.reduce_mean(cross_entropy)
#计算l2的正则化损失
regularizer=tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE)
#计算模型的正则话损失
regularization=regularizer(weights1)+regularizer(weights2)
#总损失等于交叉謪的损失和正则化的sunsh
loss=cross_entropy_mean+regularization
#设置指数衰减的学习率
learning_rate=tf.train.exponential_decay(LEARNING_RATE_BASE,
global_step,
mnist.train.num_examples/BATCH_SIZE,
LEARNING_RATE_DECAY)
train_step=tf.train.GradientDescentOptimizer(learning_rate).minimize(loss,global_step=global_step)
with tf.control_dependencies([train_step,variable_averages_op]):
train_op=tf.no_op(name='train')
correct_prediction=tf.equal(tf.arg_max(average_y,1),tf.arg_max(y_,1))
accuracy=tf.reduce_mean(tf.cast(correct_prediction,tf.float32))
with tf.Session() as sess:
tf.initialize_all_variables().run()
validate_feed={x:mnist.validation.images,
y_:mnist.validation.labels}
test_feed={x:mnist.test.images,y_:mnist.test.labels}
#迭代的训练神经网络
for i in range(TRAINING_STEPS):
if i % 1000 == 0:
validate_acc = sess.run(accuracy, feed_dict=validate_feed)
print("After %d training step(s), validation accuracy using average model is %g " % (i, validate_acc))
xs, ys = mnist.train.next_batch(BATCH_SIZE)
sess.run(train_op, feed_dict={x: xs, y_: ys})
test_acc=sess.run(accuracy,feed_dict=test_feed)
print(("After %d training step(s), test accuracy using average model is %g" %(TRAINING_STEPS, test_acc)))
# 4. 主程序入口,这里设定模型训练次数为5000次
def main(argv=None):
mnist = input_data.read_data_sets("/tmp/data", one_hot=True)
train(mnist)
if __name__=='__main__':
main()
运行结果如下:
程序中首先引入两个模块,分别是手写的训练数据,一个是tensorflow的模块。
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
下面是程序的基本参数设置
INPUT_NODE=784
OUTPUT_NODE=10
#配置神经网络的参数
LAYER1_NODE=500
BATCH_SIZE=100
LEARNING_RATE_BASE=0.8
LEARNING_RATE_DECAY=0.99
REGULARIZATION_RATE=0.0001
TRAINING_STEPS=3000000
MOVING_AVERAGE_DECAY=0.99
下面定义一个辅助函数,这个函数的作用是,给定神经网络的输入和所有参数,计算神经网络的向前传播结果。在这里定义一个使用ReLU激活函数的三层连接神经网络。通过加入隐藏层实现了多层网络架构。通过ReLU激活函数实现去线性化。在这个函数中也支持传入用于计算机参数平均值的类,这样方便在测试时使用滑动模型。
def interence (input_tensor,avg_class,weightS1,biases1,weights2,biases2):
if avg_class==None:
layer1=tf.nn.relu(tf.matmul(input_tensor,weightS1)+biases1)
return tf.matmul(layer1,weights2)+biases2
else:
layer1=tf.nn.relu(tf.matmul(input_tensor,avg_class.average(weightS1))+avg_class.average(biases1))
return tf.matmul(layer1,avg_class.average(weights2))+avg_class.average(biases2)
下面是训练模型的过程
def train(mnist):
x=tf.placeholder(tf.float32,[None,INPUT_NODE],name='x-onput')
y_=tf.placeholder(tf.float32,[None,OUTPUT_NODE],name='y-input')
#生成隐藏层参数
weights1=tf.Variable(
tf.truncated_normal([INPUT_NODE,LAYER1_NODE],stddev=0.1))
biases1=tf.Variable(tf.constant(0,1,shape=[LAYER1_NODE]))
#生成输出层参数
weights2=tf.Variable(tf.truncated_normal([LAYER1_NODE,OUTPUT_NODE],stddev=0.1))
biases2=tf.Variable(tf.constant(0.1,shape=[OUTPUT_NODE]))
#所以函数不会使用参数滑动平均值
y=interence(x,None,weights1,biases1,weights2,biases2)
#一般会将代表训练轮数指定为不可训练数据
global_step=tf.Variable(0,trainable=False)
#给定滑动平均的
variable_averages=tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY,global_step)
variable_averages_op=variable_averages.apply(tf.trainable_variables())
#所以当使用这个活动平均值是,需要明确调用average
average_y=interence(x,variable_averages,weights1,biases1,weights2,biases2)
#计算交叉謪作为刻画真实值与预测值之间的差距
cross_entropy=tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y,labels=tf.arg_max(y_,1))
#计算在当前batch中所有样例的交叉上平均值
cross_entropy_mean=tf.reduce_mean(cross_entropy)
#计算l2的正则化损失
regularizer=tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE)
#计算模型的正则话损失
regularization=regularizer(weights1)+regularizer(weights2)
#总损失等于交叉謪的损失和正则化的sunsh
loss=cross_entropy_mean+regularization
#设置指数衰减的学习率
learning_rate=tf.train.exponential_decay(LEARNING_RATE_BASE,
global_step,
mnist.train.num_examples/BATCH_SIZE,
LEARNING_RATE_DECAY)
train_step=tf.train.GradientDescentOptimizer(learning_rate).minimize(loss,global_step=global_step)
with tf.control_dependencies([train_step,variable_averages_op]):
train_op=tf.no_op(name='train')
correct_prediction=tf.equal(tf.arg_max(average_y,1),tf.arg_max(y_,1))
accuracy=tf.reduce_mean(tf.cast(correct_prediction,tf.float32))
with tf.Session() as sess:
tf.initialize_all_variables().run()
validate_feed={x:mnist.validation.images,
y_:mnist.validation.labels}
test_feed={x:mnist.test.images,y_:mnist.test.labels}
#迭代的训练神经网络
for i in range(TRAINING_STEPS):
if i % 1000 == 0:
validate_acc = sess.run(accuracy, feed_dict=validate_feed)
print("After %d training step(s), validation accuracy using average model is %g " % (i, validate_acc))
xs, ys = mnist.train.next_batch(BATCH_SIZE)
sess.run(train_op, feed_dict={x: xs, y_: ys})
test_acc=sess.run(accuracy,feed_dict=test_feed)
print(("After %d training step(s), test accuracy using average model is %g" %(TRAINING_STEPS, test_acc)))
下面是主程序的运行过程
def main(argv=None):
mnist = input_data.read_data_sets("/tmp/data", one_hot=True)
train(mnist)
上面就是全部程序,已经经过验证,大家有什么困难,可以一起讨论。