卷积神经网络(CNN)模型的灵感来自于人类大脑视觉皮层识别物体的工作原理,且CNN在图像识别任务中表现突出,现用CNN来解决手写数字识别问题,观察CNN识别手写数字的能力。
import sys
import os
import struct
import numpy as np
import matplotlib.pyplot as plt
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
# 加载数据
def load_mnist(path, kind='train'):
"""Load MNIST data from `path`"""
# 图像以字节的格式存储
labels_path = os.path.join(path,
'%s-labels-idx1-ubyte\\%s-labels.idx1-ubyte' % (kind, kind))
images_path = os.path.join(path,
'%s-images-idx3-ubyte\\%s-images.idx3-ubyte' % (kind, kind))
with open(labels_path, 'rb') as lbpath:
# 返回一个由解包数据(string)得到的一个元组(tuple), 即使仅有一个数据也会被解包成元组
# struct.pack()和struct.unpack()
# >:这是大端字节序,它定义一串字节存储的顺序
# 计算机的内部处理都是小端字节序。但是,人类还是习惯读写大端字节序
# I:代表无符号整数
magic, n = struct.unpack('>II',
lbpath.read(8))
# 存取数组内容的文件操作函数,fromfile()函数读回数据时需要用户指定元素类型,
# 并对数组的形状进行适当的修改
labels = np.fromfile(lbpath,
dtype=np.uint8)
with open(images_path, 'rb') as imgpath:
magic, num, rows, cols = struct.unpack(">IIII",
imgpath.read(16))
images = np.fromfile(imgpath,
dtype=np.uint8).reshape(len(labels), 784)
# 逐个像素调整图像比例
images = ((images / 255.) - .5) * 2
return images, labels
X_data, y_data = load_mnist('xxx',
kind='train')
print('Rows: %d, columns: %d' % (X_data.shape[0], X_data.shape[1]))
X_test, y_test = load_mnist('xxx',
kind='t10k')
print('Rows: %d, columns: %d' % (X_test.shape[0], X_test.shape[1]))
X_train, y_train = X_data[:50000,:], y_data[:50000]
X_valid, y_valid = X_data[50000:,:], y_data[50000:]
print('Training: ', X_train.shape, y_train.shape)
print('Validation: ', X_valid.shape, y_valid.shape)
print('Test Set: ', X_test.shape, y_test.shape)
# 迭代小批量的数据
def batch_generator(X, y, batch_size=64,
shuffle=False, random_seed=None):
idx = np.arange(y.shape[0])
if shuffle:
rng = np.random.RandomState(random_seed)
rng.shuffle(idx)
X = X[idx]
y = y[idx]
for i in range(0, X.shape[0], batch_size):
yield (X[i:i + batch_size, :], y[i:i + batch_size])
# 归一化数据,为了能更好地训练性能和收敛性
# 计算每个特征的均值
mean_vals = np.mean(X_train, axis=0)
# 计算所有特征的标准方差
# 不计算每个特征标准方差的原因是像MNIST这样的图像数据集中的有些特征 (像素位置),
# 所有灰度图像所对应的白色像素的常数值为255。
# 因为所有样本的常数无变化,所以那些特征的标准方差为零,结果会出现分母为零的错误
std_val = np.std(X_train)
print("std_val:", std_val)
X_train_centered = (X_train - mean_vals)/std_val
X_valid_centered = (X_valid - mean_vals)/std_val
X_test_centered = (X_test - mean_vals)/std_val
del X_data, y_data, X_train, X_valid, X_test
# 卷积神经网络
class ConvNN(object):
def __init__(self, batchsize=64,
epochs=20, learning_rate=1e-4,
dropout_rate=0.5,
shuffle=True, random_seed=None):
np.random.seed(random_seed)
self.batchsize = batchsize
self.epochs = epochs
self.learning_rate = learning_rate
self.dropout_rate = dropout_rate
self.shuffle = shuffle
g = tf.Graph()
with g.as_default():
## set random-seed:
tf.set_random_seed(random_seed)
## build the network:
self.build()
## initializer
self.init_op = tf.global_variables_initializer()
## saver
self.saver = tf.train.Saver()
## create a session
self.sess = tf.Session(graph=g)
# 构建模型
def build(self):
## Placeholders for X and y:
tf_x = tf.placeholder(tf.float32,
shape=[None, 784],
name='tf_x')
tf_y = tf.placeholder(tf.int32,
shape=[None],
name='tf_y')
is_train = tf.placeholder(tf.bool,
shape=(),
name='is_train')
## reshape x to a 4D tensor:
## [batchsize, width, height, 1]
tf_x_image = tf.reshape(tf_x, shape=[-1, 28, 28, 1],
name='input_x_2dimages')
## One-hot encoding:
tf_y_onehot = tf.one_hot(indices=tf_y, depth=10,
dtype=tf.float32,
name='input_y_onehot')
# 可以用tf.layers.conv2d和tf.layers.dense函数建立卷积和全连接层
## 1st layer: Conv_1
h1 = tf.layers.conv2d(tf_x_image,
kernel_size=(5, 5),
filters=32,
activation=tf.nn.relu)
## MaxPooling
h1_pool = tf.layers.max_pooling2d(h1,
pool_size=(2, 2),
strides=(2, 2))
## 2n layer: Conv_2
h2 = tf.layers.conv2d(h1_pool, kernel_size=(5, 5),
filters=64,
activation=tf.nn.relu)
## MaxPooling
h2_pool = tf.layers.max_pooling2d(h2,
pool_size=(2, 2),
strides=(2, 2))
## 3rd layer: Fully Connected
input_shape = h2_pool.get_shape().as_list()
n_input_units = np.prod(input_shape[1:])
h2_pool_flat = tf.reshape(h2_pool,
shape=[-1, n_input_units])
h3 = tf.layers.dense(h2_pool_flat, 1024,
activation=tf.nn.relu)
## Dropout
# tf.layers.dropout的rate的值取决于计算图中创建的淘汰层,而且在训练和推理时无法改变。
# 相反,只需要提供一个布尔型的参数来确定是否要淘汰。
# 这可以用一个tf.bool类型的占位符来实现,在训练状态下,提供的值为True,
# 而在推理或者预测状态下提供的值为False。
h3_drop = tf.layers.dropout(h3,
rate=self.dropout_rate, # 表示淘汰该单元的概率
training=is_train)
## 4th layer: Fully Connected (linear activation)
h4 = tf.layers.dense(h3_drop, 10,
activation=None)
## Prediction
predictions = {
'probabilities': tf.nn.softmax(h4,
name='probabilities'),
'labels': tf.cast(tf.argmax(h4, axis=1),
tf.int32, name='labels')}
## Loss Function and Optimization
cross_entropy_loss = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(
logits=h4, labels=tf_y_onehot),
name='cross_entropy_loss')
## Optimizer
optimizer = tf.train.AdamOptimizer(self.learning_rate)
optimizer = optimizer.minimize(cross_entropy_loss,
name='train_op')
## Finding accuracy
correct_predictions = tf.equal(
predictions['labels'],
tf_y, name='correct_preds')
accuracy = tf.reduce_mean(
tf.cast(correct_predictions, tf.float32),
name='accuracy')
# 存储训练过的模型
def save(self, epoch, path='./tflayers-model/'):
if not os.path.isdir(path):
os.makedirs(path)
print('Saving model in %s' % path)
self.saver.save(self.sess,
os.path.join(path, 'model.ckpt'),
global_step=epoch)
# 恢复存储的模型
def load(self, epoch, path):
print('Loading model from %s' % path)
self.saver.restore(self.sess,
os.path.join(path, 'model.ckpt-%d' % epoch))
# 训练模型
def train(self, training_set,
validation_set=None,
initialize=True):
## initialize variables
if initialize:
self.sess.run(self.init_op)
self.train_cost_ = []
X_data = np.array(training_set[0])
y_data = np.array(training_set[1])
for epoch in range(1, self.epochs + 1):
batch_gen = batch_generator(X_data, y_data,
shuffle=self.shuffle)
avg_loss = 0.0
for i, (batch_x, batch_y) in enumerate(batch_gen):
feed = {'tf_x:0': batch_x,
'tf_y:0': batch_y,
'is_train:0': True} ## for dropout
loss, _ = self.sess.run(
['cross_entropy_loss:0', 'train_op'],
feed_dict=feed)
avg_loss += loss
print('Epoch %02d: Training Avg. Loss: '
'%7.3f' % (epoch, avg_loss), end=' ')
if validation_set is not None:
feed = {'tf_x:0': validation_set[0],
'tf_y:0': validation_set[1],
'is_train:0': False} ## for dropout
valid_acc = self.sess.run('accuracy:0',
feed_dict=feed)
print('Validation Acc: %7.3f' % valid_acc)
else:
print()
# 在测试集上进行预测
def predict(self, X_test, return_proba=False):
feed = {'tf_x:0': X_test,
'is_train:0': False} ## for dropout
if return_proba:
return self.sess.run('probabilities:0',
feed_dict=feed)
else:
return self.sess.run('labels:0',
feed_dict=feed)
# 创建一个ConvNN类,用20次迭代来训练并存储模型
cnn = ConvNN(random_seed=123)
cnn.train(training_set=(X_train_centered, y_train),
validation_set=(X_valid_centered, y_valid))
cnn.save(epoch=20)
del cnn
# 完成训练以后,可以用模型来对测试集进行预测
cnn2 = ConvNN(random_seed=123)
cnn2.load(epoch=20, path='./tflayers-model/')
print(cnn2.predict(X_test_centered[:10,:]))
# 度量测试集的准确度
preds = cnn2.predict(X_test_centered)
print('Test Accuracy: %.2f%%' % (100*
np.sum(y_test == preds)/len(y_test)))
运行结果:
Rows: 60000, columns: 784
Rows: 10000, columns: 784
Training: (50000, 784) (50000,)
Validation: (10000, 784) (10000,)
Test Set: (10000, 784) (10000,)
std_val: 0.6169897821276509
Epoch 01: Training Avg. Loss: 265.208 Validation Acc: 0.976
Epoch 02: Training Avg. Loss: 70.976 Validation Acc: 0.982
Epoch 03: Training Avg. Loss: 49.074 Validation Acc: 0.987
Epoch 04: Training Avg. Loss: 37.456 Validation Acc: 0.986
Epoch 05: Training Avg. Loss: 31.255 Validation Acc: 0.988
Epoch 06: Training Avg. Loss: 25.722 Validation Acc: 0.989
Epoch 07: Training Avg. Loss: 21.549 Validation Acc: 0.991
Epoch 08: Training Avg. Loss: 19.527 Validation Acc: 0.991
Epoch 09: Training Avg. Loss: 16.550 Validation Acc: 0.990
Epoch 10: Training Avg. Loss: 14.398 Validation Acc: 0.992
Epoch 11: Training Avg. Loss: 12.616 Validation Acc: 0.992
Epoch 12: Training Avg. Loss: 10.580 Validation Acc: 0.991
Epoch 13: Training Avg. Loss: 10.050 Validation Acc: 0.991
Epoch 14: Training Avg. Loss: 9.168 Validation Acc: 0.992
Epoch 15: Training Avg. Loss: 7.263 Validation Acc: 0.992
Epoch 16: Training Avg. Loss: 6.391 Validation Acc: 0.992
Epoch 17: Training Avg. Loss: 6.416 Validation Acc: 0.992
Epoch 18: Training Avg. Loss: 5.201 Validation Acc: 0.992
Epoch 19: Training Avg. Loss: 4.363 Validation Acc: 0.992
Epoch 20: Training Avg. Loss: 4.959 Validation Acc: 0.991
Saving model in ./tflayers-model/
Loading model from ./tflayers-model/
[7 2 1 0 4 1 4 9 5 9]
Test Accuracy: 99.36%
可以看到测试精度达到99.36%,非常高,这意味着10000个中错误分类的测试样本只有64个!