TensorFlow学习笔记(三)：手写数字识别之卷积神经网络

最新推荐文章于 2024-05-27 11:08:38 发布

zchang81

最新推荐文章于 2024-05-27 11:08:38 发布

阅读量3.9k

点赞数 4

分类专栏： Tensorflow 文章标签： Tensorflow mnist数据集 cnn 卷积神经网络

本文链接：https://blog.csdn.net/zchang81/article/details/67637023

版权

Tensorflow 专栏收录该内容

20 篇文章 0 订阅

订阅专栏

在【TensorFlow学习笔记(一)：手写数字识别之softmax回归】中，我实现了softmax回归，在我的机器上的mnist测试集的结果是 92.9%；
在【 TensorFlow学习笔记(二)：手写数字识别之多层感知机】中，我使用含一个隐层的多层感知机，在我机器上的mnist测试集的结果是 98.14% 。

这两种方法有一个共同的特点，就是把图片像素的二维向量结构当成一维向量结构使用，丢弃了图片的空间结构信息。

在本节中，我们保留图片的二维空间结构信息，使用cnn（卷积神经网路）识别mnist数据集的手写数字。

然后看看在mnist测试集上的准确率会是多少？

为了方便，我们先定义几个常用的函数：
#使用截断的正太分布，标准差为 0.1 的初始值来初始化权值
def weight_variable ( shape ) :
initial = tf.truncated_normal ( shape, stddev = 0.1 )
return tf.Variable ( initial )

#使用常量 0.1 来初始化偏置 B
def bias_variable ( shape ) :
initial=tf.constant ( 0.1, shape = shape )
return tf.Variable ( initial )

#使用步长 1，填充为 'SAME ' 的方式来初始化卷积层，填充方式见我的博文【TensorFlow学习笔记(1)——conv2d 函数的 padding 参数详解】
def conv2d ( x,w ) :
return tf.nn.conv2d ( x, w, strides = [ 1, 1, 1, 1 ] , padding = 'SAME')

#使用步长为2，下采样核为2 * 2 的方式来初始化池化层，填充方式见我的博文【TensorFlow学习笔记(1)—— conv2d 函数的 padding 参数详解】
def max_pool ( x ) :
return tf.nn.max_pool ( x, ksize = [ 1, 2, 2, 1 ], strides = [ 1, 2, 2, 1 ], padding = 'SAME' )

#导入Tensorflow及MNIST数据集：
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets ( "MNIST_data/", one_hot = True )

#定义符号变量 x （数据）和 y_（标签）
x = tf.placeholder ( tf.float32 [ None,784 ] )
y_ = tf.placeholder ( tf.float32, [ None,10 ] )

#先将mnist数据集图片还原为二维向量结构，28 * 28 = 784
x_image = tf.reshape ( x, [ -1, 28, 28, 1 ] )

#第一个卷积层, 总共 32 个 5 * 5 的卷积核，由于使用 'SAME' 填充，因此卷积后的图片尺寸依然是 28 * 28
w_conv1 = weight_variable ( [ 5, 5, 1, 32 ] )
b_conv1 = bias_variable ( [ 32 ] )
b_conv1_1 = conv2d ( x_image, w_conv1 ) + b_conv1
h_conv1 = tf.nn.relu ( conv2d ( x_image, w_conv1 ) + b_conv1 )

#第一个池化层，28 * 28的图片尺寸池化后，变为 14 * 14
h_pool1 = max_pool ( h_conv1 )

#第二个卷积层, 总共 64 个 5 * 5 的卷积核，由于使用 'SAME' 填充，因此卷积后的图片尺寸依然是 14 * 14
w_conv2 = weight_variable ( [ 5, 5, 32, 64 ] )
b_conv2 = bias_variable ( [ 64 ] )
h_conv2 = tf.nn.relu ( conv2d ( h_pool1, w_conv2 ) + b_conv2 )

#第二个池化层，14 * 14 的图片尺寸池化后，变为 7 * 7
h_pool2 = max_pool ( h_conv2 )

#使用上面定义的方式初始化接下来的两个全连接层的参数 W 和 B
w_fc1 = weight_variable ( [ 7 * 7 * 64, 1024 ] )
b_fc1 = bias_variable ( [ 1024 ] )

w_fc2=weight_variable([1024,10])
b_fc2=bias_variable([10])

#将二维图片结构转化为一维图片结构

h_pool2_flat = tf.reshape ( h_pool2, [ -1, 7 * 7 * 64 ] )

#第一个全连接层，图片尺寸从 7 * 7 * 64 维变换为1024 维
h_fc1 = tf.nn.relu ( tf.matmul ( h_pool2_flat, w_fc1 ) + b_fc1 )

#第二个全连接层，图片尺寸从 1024 维变换为 10 维的one-hot向量
y = tf.nn.softmax ( tf.matmul ( h_fc1, w_fc2 ) + b_fc2 )

#计算交叉熵loss，并使用自适应动量梯度下降算法优化loss
cross_entropy = -tf.reduce_sum ( y_ * tf.log ( y ) )
train_step = tf.train.AdamOptimizer ( 0.001 ) .minimize ( cross_entropy )

#计算准确率
correct_prediction = tf.equal ( tf.argmax ( y, 1 ) , tf.argmax ( y_, 1 ) )
accuracy = tf.reduce_mean ( tf.cast ( correct_prediction, "float" ) )

#定义一个交互式的session，并初始化所有变量
sess = tf.InteractiveSession ( )
sess.run ( tf.global_variables_initializer ( ) )

#开始训练，测试准确率
for i in range ( 10000 ) :
batch = mnist.train.next_batch ( 50 )
if i % 200 == 0:
train_acc = accuracy.eval ( feed_dict = { x:batch[0], y_:batch[1] } )
print ( "test accuracy", accuracy.eval ( feed_dict = { x:mnist.test.images, y_:mnist.test.labels } ) )
train_step.run ( feed_dict = { x:batch[0], y_:batch[1] } )

在我的电脑上，准确率是 99.17% ，结果如下：
==================== RESTART: D:\Python_code\ML\mnist.py ====================
Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
accuracy: 0.9917
>>>

完整代码如下：
import tensorflow as tf
import tensorflow.examples.tutorials.mnist.input_data as input_data

def weight_variable(shape):
initial=tf.truncated_normal(shape,stddev=0.1)
return tf.Variable(initial)

def bias_variable(shape):
initial=tf.constant(0.1,shape=shape)
return tf.Variable(initial)

def conv2d(x,w):
return tf.nn.conv2d(x,w,strides=[1,1,1,1],padding='SAME')

def max_pool(x):
return tf.nn.max_pool(x,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')

mnist=input_data.read_data_sets("MNIST_data/",one_hot=True)
x=tf.placeholder(tf.float32,[None,784])
y_=tf.placeholder(tf.float32,[None,10])

x_image=tf.reshape(x,[-1,28,28,1])
w_conv1=weight_variable([5,5,1,32])
b_conv1=bias_variable([32])
b_conv1_1=conv2d(x_image,w_conv1)+b_conv1
h_conv1=tf.nn.relu(conv2d(x_image,w_conv1)+b_conv1)
h_pool1=max_pool(h_conv1)

w_conv2=weight_variable([5,5,32,64])
b_conv2=bias_variable([64])
h_conv2=tf.nn.relu(conv2d(h_pool1,w_conv2)+b_conv2)
h_pool2=max_pool(h_conv2)

w_fc1=weight_variable([7*7*64,1024])
b_fc1=bias_variable([1024])
h_pool2_flat=tf.reshape(h_pool2,[-1,7*7*64])
h_fc1=tf.nn.relu(tf.matmul(h_pool2_flat,w_fc1)+b_fc1)

w_fc2=weight_variable([1024,10])
b_fc2=bias_variable([10])
y=tf.nn.softmax(tf.matmul(h_fc1,w_fc2)+b_fc2)

cross_entropy=-tf.reduce_sum(y_*tf.log(y))
train_step=tf.train.AdamOptimizer(0.0001).minimize(cross_entropy)

correct_prediction=tf.equal(tf.argmax(y,1),tf.argmax(y_,1))
accuracy=tf.reduce_mean(tf.cast(correct_prediction,"float"))

sess=tf.InteractiveSession()
sess.run(tf.global_variables_initializer())

for i in range(100000):
batch=mnist.train.next_batch(100)
if i%100==0:
train_acc=accuracy.eval(feed_dict={x:batch[0],y_:batch[1]})
print("test accuracy",accuracy.eval(feed_dict={x:mnist.test.images,y_:mnist.test.labels}))
train_step.run(feed_dict={x:batch[0],y_:batch[1]})