在【TensorFlow学习笔记(一):手写数字识别之softmax回归】中,我实现了softmax回归,在我的机器上的mnist测试集的结果是 92.9%;
在【 TensorFlow学习笔记(二):手写数字识别之多层感知机】中,我使用含一个隐层的多层感知机,在我机器上的mnist测试集的结果是 98.14% 。
为了方便,我们先定义几个常用的函数:
#使用截断的正太分布,标准差为 0.1 的初始值来初始化权值
def weight_variable ( shape ) :
initial = tf.truncated_normal ( shape, stddev = 0.1 )
return tf.Variable ( initial )
#使用常量 0.1 来初始化偏置 B
def bias_variable ( shape ) :
initial=tf.constant ( 0.1, shape = shape )
return tf.Variable ( initial )
#使用步长 1,填充为 'SAME ' 的方式来初始化卷积层,填充方式见我的博文【TensorFlow学习笔记(1)——conv2d 函数的 padding 参数详解】
def conv2d ( x,w ) :
return tf.nn.conv2d ( x, w, strides = [ 1, 1, 1, 1 ] , padding = 'SAME')
#使用步长为2,下采样核为2 * 2 的方式来初始化池化层,填充方式见我的博文【TensorFlow学习笔记(1)—— conv2d 函数的 padding 参数详解】
def max_pool ( x ) :
return tf.nn.max_pool ( x, ksize = [ 1, 2, 2, 1 ], strides = [ 1, 2, 2, 1 ], padding = 'SAME' )
#导入Tensorflow及MNIST数据集:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets ( "MNIST_data/", one_hot = True )
#定义符号变量 x (数据)和 y_(标签)
x = tf.placeholder ( tf.float32 [ None,784 ] )
y_ = tf.placeholder ( tf.float32, [ None,10 ] )
#先将mnist数据集图片还原为二维向量结构,28 * 28 = 784
x_image = tf.reshape ( x, [ -1, 28, 28, 1 ] )
#第一个卷积层, 总共 32 个 5 * 5 的卷积核,由于使用 'SAME' 填充,因此卷积后的图片尺寸依然是 28 * 28
w_conv1 = weight_variable ( [ 5, 5, 1, 32 ] )
b_conv1 = bias_variable ( [ 32 ] )
b_conv1_1 = conv2d ( x_image, w_conv1 ) + b_conv1
h_conv1 = tf.nn.relu ( conv2d ( x_image, w_conv1 ) + b_conv1 )
#第一个池化层,28 * 28的图片尺寸池化后,变为 14 * 14
h_pool1 = max_pool ( h_conv1 )
#第二个卷积层, 总共 64 个 5 * 5 的卷积核,由于使用 'SAME' 填充,因此卷积后的图片尺寸依然是 14 * 14
w_conv2 = weight_variable ( [ 5, 5, 32, 64 ] )
b_conv2 = bias_variable ( [ 64 ] )
h_conv2 = tf.nn.relu ( conv2d ( h_pool1, w_conv2 ) + b_conv2 )
#第二个池化层,14 * 14 的图片尺寸池化后,变为 7 * 7
h_pool2 = max_pool ( h_conv2 )
#使用上面定义的方式初始化接下来的两个全连接层的参数 W 和 B
w_fc1 = weight_variable ( [ 7 * 7 * 64, 1024 ] )
b_fc1 = bias_variable ( [ 1024 ] )
w_fc2=weight_variable([1024,10])
b_fc2=bias_variable([10])
h_fc1 = tf.nn.relu ( tf.matmul ( h_pool2_flat, w_fc1 ) + b_fc1 )
#第二个全连接层,图片尺寸从 1024 维变换为 10 维的one-hot向量
y = tf.nn.softmax ( tf.matmul ( h_fc1, w_fc2 ) + b_fc2 )
#计算交叉熵loss,并使用自适应动量梯度下降算法优化loss
cross_entropy = -tf.reduce_sum ( y_ * tf.log ( y ) )
train_step = tf.train.AdamOptimizer ( 0.001 ) .minimize ( cross_entropy )
#计算准确率
correct_prediction = tf.equal ( tf.argmax ( y, 1 ) , tf.argmax ( y_, 1 ) )
accuracy = tf.reduce_mean ( tf.cast ( correct_prediction, "float" ) )
#定义一个交互式的session,并初始化所有变量
sess = tf.InteractiveSession ( )
sess.run ( tf.global_variables_initializer ( ) )
#开始训练,测试准确率
for i in range ( 10000 ) :
batch = mnist.train.next_batch ( 50 )
if i % 200 == 0:
train_acc = accuracy.eval ( feed_dict = { x:batch[0], y_:batch[1] } )
print ( "test accuracy", accuracy.eval ( feed_dict = { x:mnist.test.images, y_:mnist.test.labels } ) )
train_step.run ( feed_dict = { x:batch[0], y_:batch[1] } )
在我的电脑上,准确率是 99.17% ,结果如下:
==================== RESTART: D:\Python_code\ML\mnist.py ====================
Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
accuracy: 0.9917
>>>
完整代码如下:
import tensorflow as tf
import tensorflow.examples.tutorials.mnist.input_data as input_data
def weight_variable(shape):
initial=tf.truncated_normal(shape,stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial=tf.constant(0.1,shape=shape)
return tf.Variable(initial)
def conv2d(x,w):
return tf.nn.conv2d(x,w,strides=[1,1,1,1],padding='SAME')
def max_pool(x):
return tf.nn.max_pool(x,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')
mnist=input_data.read_data_sets("MNIST_data/",one_hot=True)
x=tf.placeholder(tf.float32,[None,784])
y_=tf.placeholder(tf.float32,[None,10])
x_image=tf.reshape(x,[-1,28,28,1])
w_conv1=weight_variable([5,5,1,32])
b_conv1=bias_variable([32])
b_conv1_1=conv2d(x_image,w_conv1)+b_conv1
h_conv1=tf.nn.relu(conv2d(x_image,w_conv1)+b_conv1)
h_pool1=max_pool(h_conv1)
w_conv2=weight_variable([5,5,32,64])
b_conv2=bias_variable([64])
h_conv2=tf.nn.relu(conv2d(h_pool1,w_conv2)+b_conv2)
h_pool2=max_pool(h_conv2)
w_fc1=weight_variable([7*7*64,1024])
b_fc1=bias_variable([1024])
h_pool2_flat=tf.reshape(h_pool2,[-1,7*7*64])
h_fc1=tf.nn.relu(tf.matmul(h_pool2_flat,w_fc1)+b_fc1)
w_fc2=weight_variable([1024,10])
b_fc2=bias_variable([10])
y=tf.nn.softmax(tf.matmul(h_fc1,w_fc2)+b_fc2)
cross_entropy=-tf.reduce_sum(y_*tf.log(y))
train_step=tf.train.AdamOptimizer(0.0001).minimize(cross_entropy)
correct_prediction=tf.equal(tf.argmax(y,1),tf.argmax(y_,1))
accuracy=tf.reduce_mean(tf.cast(correct_prediction,"float"))
sess=tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
for i in range(100000):
batch=mnist.train.next_batch(100)
if i%100==0:
train_acc=accuracy.eval(feed_dict={x:batch[0],y_:batch[1]})
print("test accuracy",accuracy.eval(feed_dict={x:mnist.test.images,y_:mnist.test.labels}))
train_step.run(feed_dict={x:batch[0],y_:batch[1]})
在【 TensorFlow学习笔记(二):手写数字识别之多层感知机】中,我使用含一个隐层的多层感知机,在我机器上的mnist测试集的结果是 98.14% 。
这两种方法有一个共同的特点,就是把图片像素的二维向量结构当成一维向量结构使用,丢弃了图片的空间结构信息。
在本节中,我们保留图片的二维空间结构信息,使用cnn(卷积神经网路)识别mnist数据集的手写数字。
然后看看在mnist测试集上的准确率会是多少?为了方便,我们先定义几个常用的函数:
#使用截断的正太分布,标准差为 0.1 的初始值来初始化权值
def weight_variable ( shape ) :
initial = tf.truncated_normal ( shape, stddev = 0.1 )
return tf.Variable ( initial )
#使用常量 0.1 来初始化偏置 B
def bias_variable ( shape ) :
initial=tf.constant ( 0.1, shape = shape )
return tf.Variable ( initial )
#使用步长 1,填充为 'SAME ' 的方式来初始化卷积层,填充方式见我的博文【TensorFlow学习笔记(1)——conv2d 函数的 padding 参数详解】
def conv2d ( x,w ) :
return tf.nn.conv2d ( x, w, strides = [ 1, 1, 1, 1 ] , padding = 'SAME')
#使用步长为2,下采样核为2 * 2 的方式来初始化池化层,填充方式见我的博文【TensorFlow学习笔记(1)—— conv2d 函数的 padding 参数详解】
def max_pool ( x ) :
return tf.nn.max_pool ( x, ksize = [ 1, 2, 2, 1 ], strides = [ 1, 2, 2, 1 ], padding = 'SAME' )
#导入Tensorflow及MNIST数据集:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets ( "MNIST_data/", one_hot = True )
#定义符号变量 x (数据)和 y_(标签)
x = tf.placeholder ( tf.float32 [ None,784 ] )
y_ = tf.placeholder ( tf.float32, [ None,10 ] )
#先将mnist数据集图片还原为二维向量结构,28 * 28 = 784
x_image = tf.reshape ( x, [ -1, 28, 28, 1 ] )
#第一个卷积层, 总共 32 个 5 * 5 的卷积核,由于使用 'SAME' 填充,因此卷积后的图片尺寸依然是 28 * 28
w_conv1 = weight_variable ( [ 5, 5, 1, 32 ] )
b_conv1 = bias_variable ( [ 32 ] )
b_conv1_1 = conv2d ( x_image, w_conv1 ) + b_conv1
h_conv1 = tf.nn.relu ( conv2d ( x_image, w_conv1 ) + b_conv1 )
#第一个池化层,28 * 28的图片尺寸池化后,变为 14 * 14
h_pool1 = max_pool ( h_conv1 )
#第二个卷积层, 总共 64 个 5 * 5 的卷积核,由于使用 'SAME' 填充,因此卷积后的图片尺寸依然是 14 * 14
w_conv2 = weight_variable ( [ 5, 5, 32, 64 ] )
b_conv2 = bias_variable ( [ 64 ] )
h_conv2 = tf.nn.relu ( conv2d ( h_pool1, w_conv2 ) + b_conv2 )
#第二个池化层,14 * 14 的图片尺寸池化后,变为 7 * 7
h_pool2 = max_pool ( h_conv2 )
#使用上面定义的方式初始化接下来的两个全连接层的参数 W 和 B
w_fc1 = weight_variable ( [ 7 * 7 * 64, 1024 ] )
b_fc1 = bias_variable ( [ 1024 ] )
w_fc2=weight_variable([1024,10])
b_fc2=bias_variable([10])
#将二维图片结构转化为一维图片结构
h_pool2_flat = tf.reshape ( h_pool2, [ -1, 7 * 7 * 64 ] )
h_fc1 = tf.nn.relu ( tf.matmul ( h_pool2_flat, w_fc1 ) + b_fc1 )
#第二个全连接层,图片尺寸从 1024 维变换为 10 维的one-hot向量
y = tf.nn.softmax ( tf.matmul ( h_fc1, w_fc2 ) + b_fc2 )
#计算交叉熵loss,并使用自适应动量梯度下降算法优化loss
cross_entropy = -tf.reduce_sum ( y_ * tf.log ( y ) )
train_step = tf.train.AdamOptimizer ( 0.001 ) .minimize ( cross_entropy )
#计算准确率
correct_prediction = tf.equal ( tf.argmax ( y, 1 ) , tf.argmax ( y_, 1 ) )
accuracy = tf.reduce_mean ( tf.cast ( correct_prediction, "float" ) )
#定义一个交互式的session,并初始化所有变量
sess = tf.InteractiveSession ( )
sess.run ( tf.global_variables_initializer ( ) )
#开始训练,测试准确率
for i in range ( 10000 ) :
batch = mnist.train.next_batch ( 50 )
if i % 200 == 0:
train_acc = accuracy.eval ( feed_dict = { x:batch[0], y_:batch[1] } )
print ( "test accuracy", accuracy.eval ( feed_dict = { x:mnist.test.images, y_:mnist.test.labels } ) )
train_step.run ( feed_dict = { x:batch[0], y_:batch[1] } )
在我的电脑上,准确率是 99.17% ,结果如下:
==================== RESTART: D:\Python_code\ML\mnist.py ====================
Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
accuracy: 0.9917
>>>
完整代码如下:
import tensorflow as tf
import tensorflow.examples.tutorials.mnist.input_data as input_data
def weight_variable(shape):
initial=tf.truncated_normal(shape,stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial=tf.constant(0.1,shape=shape)
return tf.Variable(initial)
def conv2d(x,w):
return tf.nn.conv2d(x,w,strides=[1,1,1,1],padding='SAME')
def max_pool(x):
return tf.nn.max_pool(x,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')
mnist=input_data.read_data_sets("MNIST_data/",one_hot=True)
x=tf.placeholder(tf.float32,[None,784])
y_=tf.placeholder(tf.float32,[None,10])
x_image=tf.reshape(x,[-1,28,28,1])
w_conv1=weight_variable([5,5,1,32])
b_conv1=bias_variable([32])
b_conv1_1=conv2d(x_image,w_conv1)+b_conv1
h_conv1=tf.nn.relu(conv2d(x_image,w_conv1)+b_conv1)
h_pool1=max_pool(h_conv1)
w_conv2=weight_variable([5,5,32,64])
b_conv2=bias_variable([64])
h_conv2=tf.nn.relu(conv2d(h_pool1,w_conv2)+b_conv2)
h_pool2=max_pool(h_conv2)
w_fc1=weight_variable([7*7*64,1024])
b_fc1=bias_variable([1024])
h_pool2_flat=tf.reshape(h_pool2,[-1,7*7*64])
h_fc1=tf.nn.relu(tf.matmul(h_pool2_flat,w_fc1)+b_fc1)
w_fc2=weight_variable([1024,10])
b_fc2=bias_variable([10])
y=tf.nn.softmax(tf.matmul(h_fc1,w_fc2)+b_fc2)
cross_entropy=-tf.reduce_sum(y_*tf.log(y))
train_step=tf.train.AdamOptimizer(0.0001).minimize(cross_entropy)
correct_prediction=tf.equal(tf.argmax(y,1),tf.argmax(y_,1))
accuracy=tf.reduce_mean(tf.cast(correct_prediction,"float"))
sess=tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
for i in range(100000):
batch=mnist.train.next_batch(100)
if i%100==0:
train_acc=accuracy.eval(feed_dict={x:batch[0],y_:batch[1]})
print("test accuracy",accuracy.eval(feed_dict={x:mnist.test.images,y_:mnist.test.labels}))
train_step.run(feed_dict={x:batch[0],y_:batch[1]})