LeNet-5模型是Yann LeCun教授于1998年在论文Gradient-based learning applied to document recognition中提出的,它是第一个成功应用于数字识别问题的卷积神经网络。LeNet-5模型一共有7层,下图展示了LeNet-5模型的架构:
LeNet-5特点:
1)每个卷积层包含三个部分:卷积、池化、非线性激活函数;
2)使用卷积提取空间特征
3)降采样的平均池化
4)双曲正切函数(tanh)或S函数(sigmod)
5)MLP作为最后的分类器
6)层与层之间的稀疏连接减少计算复杂度
TensorFlow实现mnist手写字体识别,使用10000次迭代,每次训练50张,每200次迭代输出训练准确率, 最终测试准确率为99.05%
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
#load data sets
mnist = input_data.read_data_sets("MNIST_data/", one_hot = True)
#create session
sess = tf.InteractiveSession()
#define weight function
'''
description: to initialize weight variable
Args: shape [5, 5, 1, 32] #note: conv kernal = 5*5 , color channel = 1, numers of kernal = 32
Returns: trunacted_normal distribution of weight variable with stddev = 0.1
'''
def weight_variable(shape):
init = tf.truncated_normal(shape, stddev = 0.1)
return tf.Variable(init)
#define biases function
'''
description: to initialize biases variable
Args: shape [5, 5, 1, 32] #note: conv kernal = 5*5 , color channel = 1, numers of kernal = 32
Returns: constant biases variable equals 0.1
'''
def biases_variable(shape):
init = tf.constant(0.1, shape = shape)
return tf.Variable(init)
#define conv layer
def conv2d(x, w):
return tf.nn.conv2d(x, w, strides = [1, 1, 1, 1], padding = 'SAME')
#define max pooling layer
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize = [1, 2, 2, 1], strides = [1, 2, 2, 1], padding = 'SAME')
#define parameters
n_input = 784
batch_size = 50
training_batch = 100
#define input place holder
x = tf.placeholder(tf.float32, [None, n_input])
y_true = tf.placeholder(tf.float32, [None, 10])
x_image = tf.reshape(x, [-1, 28, 28, 1])
#define layer conv_1
w_conv_1 = weight_variable([5, 5, 1, 32])
b_biase_1 = biases_variable([32])
h_conv_1 = tf.nn.relu(conv2d(x_image, w_conv_1) + b_biase_1)
h_pool_1 = max_pool_2x2(h_conv_1)
#define layer cov_2
w_conv_2 = weight_variable([5, 5, 32, 64])
b_biase_2 = biases_variable([64])
h_conv_2 = tf.nn.relu(conv2d(h_pool_1, w_conv_2) + b_biase_2)
h_pool_2 = max_pool_2x2(h_conv_2)
#define full connection layer 1
w_fc_1 = weight_variable([7*7*64, 1024])
b_fc_1 = biases_variable([1024])
flatten_1 = tf.reshape(h_pool_2, [-1, 7*7*64])
h_fc_1 = tf.nn.relu(tf.add(tf.matmul(flatten_1, w_fc_1), b_fc_1))
#define dropout layer
keep_prob = tf.placeholder(tf.float32)
fc_1_dropout = tf.nn.dropout(h_fc_1, keep_prob)
#define full connection layer 2
w_fc_2 = weight_variable([1024, 10])
b_fc_2 = biases_variable([10])
y_pred = tf.nn.softmax(tf.matmul(fc_1_dropout, w_fc_2) + b_fc_2)
#define cost function and optimizer
cost = tf.reduce_mean(-tf.reduce_sum(y_true * tf.log(y_pred), reduction_indices = [1]))
optimizer = tf.train.AdamOptimizer(1e-4).minimize(cost)
#calculate accuracy
correct_predition = tf.equal(tf.argmax(y_pred, 1), tf.argmax(y_true, 1))
accuracy = tf.reduce_mean(tf.cast(correct_predition, tf.float32))
#build up session
tf.global_variables_initializer().run()
#total_batch = int(mnist.train.num_examples / batch_size)
for i in range(10000):
batch_xs, batch_ys = mnist.train.next_batch(batch_size)
if i % 200 == 0:
train_accuracy = accuracy.eval(feed_dict = {x:batch_xs, y_true:batch_ys, keep_prob:1.0})
print("step : %d"%(i), " accuracy = ", "{:.9f}".format(train_accuracy))
optimizer.run(feed_dict = {x:batch_xs, y_true:batch_ys, keep_prob:0.5})
#test model
print("test accuracy : %g"%accuracy.eval(feed_dict = {x:mnist.test.images, y_true:mnist.test.labels, keep_prob:1.0}))
practice makes perfect!