Tensorflow浅层学习实现mnist手写数字识别
环境测试
print('python version:', pytest.__version__)
print('numpy version:', np.__version__)
print('tensorflow version:', tf.VERSION)
获取mnist
from tensorflow.contrib import learn
mnist = learn.datasets.load_dataset('mnist')
无隐藏层
启动会话(session)
启动计算图,定义一个交互session
sess = tf.InteractiveSession()
输入数据(包括训练数据和测试数据)
x = tf.placeholder(tf.float32, [None, 28 * 28])
y_ = tf.placeholder(tf.float32, [None, 10])
定义一个最简单的单层全连接网络,计算公式为:y=Wx+b,然后利用softmax来计算预测概率,预测概率最大的对应预测的分类,定义两个变量来保存网络参数W和b的状态
W = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))
y = tf.nn.softmax(tf.matmul(x, W) + b)
采用cross-entropy作为损失函数
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))
采用SGD(Stochastic Gradient Descent)进行网络的优化训练。tensorflow会自动根据前面定义的计算图进行forward和backward计算并更新参数
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)#梯度优化算法随机SGD
对所有的变量进行全局初始化
tf.global_variables_initializer().run()
开始迭代进行训练和评估
for i in range(2000):
batch_xs, batch_ys = mnist.train.next_batch(100)
train_step.run({x:batch_xs, y_:batch_ys})
对模型进行准确率评测
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print('test accuracy %g' % accuracy.eval({x: mnist.test.images, y_: mnist.test.labels}))
测试结果
无隐藏层准确率为91.96%
完整代码:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot = True)
sess = tf.InteractiveSession()
x = tf.placeholder(tf.float32, [None, 28 * 28])
y_ = tf.placeholder(tf.float32, [None, 10])
W = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))
y = tf.nn.softmax(tf.matmul(x, W) + b)
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)#梯度优化算法随机SGD
tf.global_variables_initializer().run()
for i in range(2000):
batch_xs, batch_ys = mnist.train.next_batch(100)
train_step.run({x:batch_xs, y_:batch_ys})
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print('test accuracy %g' % accuracy.eval({x: mnist.test.images, y_: mnist.test.labels}))
一个隐藏层
给隐含层的参数设置Variable并进行初始化,这里in_units是输入节点数,h1_units即隐含层的输出节点数设为400。由于模型使用的激活函数是ReLU,所以需要使用正态分布给参数加一点噪声,来打破完全对称并且避免0梯度
in_units = 784
hl_units = 400
wl = tf.Variable(tf.truncated_normal([in_units, hl_units],stddev=0.1))
bl = tf.Variable(tf.zeros([hl_units]))
w2 = tf.Variable(tf.zeros([hl_units, 10]))
b2 = tf.Variable(tf.zeros([10]))
注:
tf.truncated_normal(shape, mean, stddev) :shape表示生成张量的维度,mean是均值,stddev是标准差。这个函数产生正太分布,均值和标准差自己设定。这是一个截断的产生正太分布的函数,就是说产生正太分布的值如果与均值的差值大于两倍的标准差,那就重新生成。和一般的正太分布的产生随机数据比起来,这个函数产生的随机数与均值的差距不会超过两倍的标准差,但是一般的别的函数是可能的
定义输入x的占位符
x = tf.placeholder(tf.float32,[None, in_units])
接下来定义模型结构,首先定义一个命名为hidden1的实现一个激活函数为sigmoid的隐含层,接下来是softmax输出层
hiddenl = tf.nn.sigmoid(tf.matmul(x, wl) + bl)#非线性激活函数
y = tf.nn.softmax(tf.matmul(hiddenl, w2) + b2)#分类法
接下来定义损失函数和选择优化器来优化loss,这里的损失函数继续使用交叉信息熵,采用SGD(Stochastic Gradient Descent)进行网络的优化训练。
y_ = tf.placeholder(tf.float32, [None, 10])
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)#梯度优化算法随机SGD
对所有的变量进行全局初始化
tf.global_variables_initializer().run()
开始迭代进行训练和评估
for i in range(2000):
batch_xs, batch_ys = mnist.train.next_batch(100)
train_step.run({x:batch_xs, y_:batch_ys})
对模型进行准确率评测
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print('test accuracy %g' % accuracy.eval({x: mnist.test.images, y_: mnist.test.labels}))
测试结果
一个隐藏层准确率为94.48%
完整代码:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot = True)
sess = tf.InteractiveSession()
in_units = 784
hl_units = 400
wl = tf.Variable(tf.truncated_normal([in_units, hl_units],stddev=0.1))
bl = tf.Variable(tf.zeros([hl_units]))
w2 = tf.Variable(tf.zeros([hl_units, 10]))
b2 = tf.Variable(tf.zeros([10]))
x = tf.placeholder(tf.float32,[None, in_units])
hiddenl = tf.nn.sigmoid(tf.matmul(x, wl) + bl)#非线性激活函数
y = tf.nn.softmax(tf.matmul(hiddenl, w2) + b2)#分类法
y_ = tf.placeholder(tf.float32, [None, 10])
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)#梯度优化算法随机SGD
tf.global_variables_initializer().run()
for i in range(2000):
batch_xs, batch_ys = mnist.train.next_batch(100)
train_step.run({x:batch_xs, y_:batch_ys})
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print('test accuracy %g' % accuracy.eval({x: mnist.test.images, y_: mnist.test.labels}))