1 import
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
trainimg = mnist.train.images
trainlabel = mnist.train.labels
testimg = mnist.test.images
testlabel = mnist.test.labels
# 创建几个占位符 用来输入数据
x = tf.placeholder(tf.float32, [None, n_input])
y = tf.placeholder(tf.float32, [None, n_output])
keepratio = tf.placeholder(tf.float32)
2 define weights and bias
If w and b are defined as follows, the result is shown in the next picture.
We could define f(x) as:
Like full-connected network model, CNN need w and b as well.
# 定义不同层的权重 与 偏置
# 使用 字典的形式方便使用
n_input = 784
n_output = 10
weights = {
#3*3是卷积核的长宽,1是原图片厚度,64是后来的图片的厚度
# 第一层卷积层
'wc1': tf.Variable(tf.random_normal([3, 3, 1, 64], stddev=0.1)),
# 第二层卷积层
'wc2': tf.Variable(tf.random_normal([3, 3, 64, 128], stddev=0.1)),
#池化层使得长宽变成一半,就是28/2/2=7
#第一层全连接层
'wd1': tf.Variable(tf.random_normal([7*7*128, 1024], stddev=0.1)),
#第二层全连接层
'wd2': tf.Variable(tf.random_normal([1024, n_output], stddev=0.1))
}
biases = {
# 分别是上面四层的偏置
'bc1': tf.Variable(tf.random_normal([64], stddev=0.1)),
'bc2': tf.Variable(tf.random_normal([128], stddev=0.1)),
'bd1': tf.Variable(tf.random_normal([1024], stddev=0.1)),
'bd2': tf.Variable(tf.random_normal([n_output], stddev=0.1))
}
3 the whole network
Pool layer rudece the length and wide.
Max pooling means to find the max value among a speical area. There is an example:
The fliter is 2x2, so we find the maximum between a 2x2 area. The stride is 2, so the fliter moves 2 each time.
# 定义神经网络结构
def conv_basic(_input, _w, _b, _keepratio):
# 首先对输入数据进行预处理
# 一开始 图像是以 784 长度的向量保存
# tf 中要求的输入格式是 四维 向量的形式 所以首先进行数据的预处理
# [a, b, c, d] 中 四个维度 分别是指 batch_size、 height、 width、deepth(channels)
_input_r = tf.reshape(_input, shape=[-1, 28, 28, 1])
# 创建第一层 是卷积层 conv1,stride表示步长, padding表示输出的conv1与原输入的长和宽一致
_conv1 = tf.nn.conv2d(_input_r, _w['wc1'], strides=[1,1,1,1], padding='SAME')
# 进行去线性化 这里使用的 ReLU()函数
_conv1 = tf.nn.relu(tf.nn.bias_add(_conv1, _b['bc1']))
# 创建第二层 是池化层 pool1
_pool1 = tf.nn.max_pool(_conv1, ksize=[1, 2, 2, 1] ,strides=[1, 2, 2, 1], padding='SAME')
# 这个函数稍后详细介绍
_pool_dr1 = tf.nn.dropout(_pool1, _keepratio)
# 创建第三层 是卷积层 conv2 (接下来与前面的创建过程是一样的)
_conv2 = tf.nn.conv2d(_pool_dr1, _w['wc2'], strides=[1,1,1,1], padding='SAME')
_conv2 = tf.nn.relu(tf.nn.bias_add(_conv2, _b['bc2']))
# 创建第四层 是池化层 pool2
_pool2 = tf.nn.max_pool(_conv2, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
_pool_dr2 = tf.nn.dropout(_pool2, _keepratio)
# 向量化处理 因为接下来要创建 全连接层 就像之前一样 全连接层要进行的处理时向量形式的
_dense1 = tf.reshape(_pool_dr2, [-1, _w['wd1'].get_shape().as_list()[0]])
# 创建第五层 是全连接层 fc1
_fc1 = tf.nn.relu(tf.add(tf.matmul(_dense1, _w['wd1']), _b['bd1']))
_fc_dr1 = tf.nn.dropout(_fc1, _keepratio)
# 创建第六层 是全连接层 fc2
_out = tf.add(tf.matmul(_fc_dr1, _w['wd2']), _b['bd2'])
# 对上面所有的计算 创建字典形式来返回多个内容
out = {
'input_r': _input_r, 'conv1':_conv1, 'pool1': _pool1, 'pool1_dr1':_pool_dr1,
'conv2': _conv2, 'pool2': _pool2, 'pool_dr2': _pool_dr2, 'dense1': _dense1,
'fc1' : _fc1, 'fc1_dr1': _fc_dr1, 'out': _out
}
return out
4 loss
# 定义损失函数 使用的是交叉熵函数
# 定义优化器 优化方式是 随机梯度下降
_pred = conv_basic(x, weights, biases, keepratio)['out']
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=_pred, labels=y))
optm = tf.train.AdamOptimizer(learning_rate=0.001).minimize(cost)
# 定义正确率计算函数
_corr = tf.equal(tf.argmax(_pred, 1), tf.argmax(y, 1))
accr = tf.reduce_mean(tf.cast(_corr, tf.float32))
init_op = tf.global_variables_initializer()
5 run
sess = tf.Session()
sess.run(init_op)
training_epoches = 10 # 训练轮数
batch_size = 16 # 每一批次数量
display_step = 1 # 用于一定步数之后 输出中间结果
for epoch in range(training_epoches):
avg_cost = 0
total_batch = 100
batch_xs, batch_ys = None, None
for i in range(total_batch):
batch_xs, batch_ys = mnist.train.next_batch(batch_size)
sess.run(optm, feed_dict={x: batch_xs, y:batch_ys, keepratio:0.7})
avg_cost += sess.run(cost, feed_dict={x: batch_xs, y:batch_ys, keepratio:1.})/total_batch
if epoch % display_step == 0:
print('Epoch: %03d/%03d cost: %.9f'%(epoch, training_epoches, avg_cost))
# 这里可以输出训练集的正确率
train_acc = sess.run(accr, feed_dict={x:batch_xs, y:batch_ys, keepratio:1.})
# 测试集的正确率
test_acc = sess.run(accr, feed_dict={x:testimg, y:testlabel, keepratio:1.})
print("Training accuracy: %.03f" % train_acc)
print("Test accuracy: %.03f" % test_acc)
sess.close()
6 result
Epoch: 007/010 cost: 0.108828657
Training accuracy: 1.000
Test accuracy: 0.968
Epoch: 008/010 cost: 0.115439507
Training accuracy: 1.000
Test accuracy: 0.972
Epoch: 009/010 cost: 0.107176731
Training accuracy: 0.938
Test accuracy: 0.972
It is abnormal that training accuracy in Epoch: 008/010 is 1.000, but in Epoch:009/010 is 0.938. And in Epoch: 009/010 Training accuracy is 0.938 but Test accuracy is 0.972.
This two situation is because I choose total_batch as 100. In fact, total_batch should be chose as int(mnist.train.num_examples/batch_size)