MNIST (Mixed National Institute of Standards and Technology)是一个入门级的计算机视觉数据集,数据集中都是美国中学生手写的数字。它的训练集包含 6 万张图片,测试集包含 1万张图片,并且数字已经进行过预处理和格式化,做了大小调整并居中,图片尺寸也固定为28×28。这个数据集很小,但训练速度很快,而且收敛效果也很好,非常适合作为实战的例子
去学习。
9.1 MNIST 数据集简介
MNIST 数据集是 NIST 数据集的子集,包含以下 4 个文件。
● train-labels-idx1-ubyte.gz:训练集标记文件(28 881 字节)。
● train-images-idx3-ubyte.gz:训练集图片文件(9 912 422 字节)。
● t10k-labels-idx1-ubyte.gz:测试集标记文件(4 542 字节)。
● t10k-images-idx3-ubyte.gz:测试集图片文件(1 648 877 字节)。
MNIST 数据集包括训练集的图片和标记数据,以及测试集的图片和标记数据,在测试集包含的 10 000 个样例中,前 5 000 个样例取自原始的 NIST 训练集,后 5 000 个取自原始的 NIST测试集,因此前 5 000 个预测起来更容易些。
下面具体讲解它们的格式http://yann.lecun.com/exdb/mnist
9.2 MNIST的分类问题
Softmax回归可以解决两种以上的分类,该模型是Logistic回归模型的分类问题上的推广。
# -*- coding:utf-8 -*-
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf
# 加载数据
mnist = input_data.read_data_sets("/tmp/tensorflow/mnist/input_data/", one_hot=True)
# 构建回归模型
x = tf.placeholder(tf.float32, [None, 784])
W = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))
y = tf.matmul(x, W) + b # 预测值
# 定义损失函数和优化器
y_ = tf.placeholder(tf.float32, [None, 10])
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y, labels=y_))
# 采用SGD作为优化器
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
# 训练模型
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()
for _ in range(1000):
batch_xs, batch_ys = mnist.train.next_batch(100)
sess.run(train_step, feed_dict={x: batch_xs, y_:batch_ys})
# 评估训练好的模型
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) #计算预测值和真实值
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) #布尔型转化为浮点数,并取平均值,得到准确率
print(sess.run(accuracy, feed_dict={x:mnist.test.images, y_:mnist.test.labels})) #计算模型在测试集上的准确率
0.918
9.3 训练过程的可视化
9.4 MNIST 的卷积神经网络
https://github.com/nlintz/TensorFlow-Tutorials/blob/master/05_convolutional_net.py
# -*- coding:utf-8 -*-
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf
# 加载数据
mnist = input_data.read_data_sets("", one_hot=True)
trX, trY, teX, teY = mnist.train.images, mnist.train.labels, mnist.test.images, mnist.test.labels
trX = trX.reshape(-1, 28, 28, 1) # 28x28x1 input img
teX = teX.reshape(-1, 28, 28, 1) # 28x28x1 input img
X = tf.placeholder("float", [None, 28, 28, 1])
Y = tf.placeholder("float", [None, 10])
def init_weights(shape):
return tf.Variable(tf.random_normal(shape, stddev=0.01))
w = init_weights([3, 3, 1, 32]) # patch 大小为 3 × 3 ,输入维度为 1 ,输出维度为 32
w2 = init_weights([3, 3, 32, 64]) # patch 大小为 3 × 3 ,输入维度为 32 ,输出维度为 64
w3 = init_weights([3, 3, 64, 128]) # patch 大小为 3 × 3 ,输入维度为 64 ,输出维度为 128
w4 = init_weights([128 * 4 * 4, 625]) # 全连接层,输入维度为 128 × 4 × 4, 是上一层的输出数据又三维的转变成一维, 输出维度为 625
w_o = init_weights([625, 10]) # 输出层,输入维度为 625, 输出维度为 10 ,代表 10 类 (labels)
def model(X, w, w2, w3, w4, w_o, p_keep_conv, p_keep_hidden):
# 第一组卷积层及池化层
l1a = tf.nn.relu(tf.nn.conv2d(X, w, strides=[1, 1, 1, 1], padding='SAME'))
l1 = tf.nn.max_pool(l1a, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
l1 = tf.nn.dropout(l1, p_keep_conv) # dropout 一些神经元
# 第二组卷积层及池化层
l2a = tf.nn.relu(tf.nn.conv2d(l1, w2, strides=[1, 1, 1, 1], padding='SAME'))
l2 = tf.nn.max_pool(l2a, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
l2 = tf.nn.dropout(l2, p_keep_conv) # dropout 一些神经元
# 第三组卷积层及池化层
l3a = tf.nn.relu(tf.nn.conv2d(l2, w3, strides=[1, 1, 1, 1], padding='SAME'))
l3 = tf.nn.max_pool(l3a, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') # l3 shape=(?, 4, 4, 128)
l3 = tf.reshape(l3, [-1, w4.get_shape().as_list()[0]]) # reshape to (?, 128 * 4 * 4 = 2048)
l3 = tf.nn.dropout(l3, p_keep_conv)
# 全连接层,最后dropout
l4 = tf.nn.relu(tf.matmul(l3, w4))
l4 = tf.nn.dropout(l4, p_keep_hidden)
# 输出层
pyx = tf.matmul(l4, w_o)
return pyx #返回预测值
p_keep_conv = tf.placeholder("float")
p_keep_hidden = tf.placeholder("float")
py_x = model(X, w, w2, w3, w4, w_o, p_keep_conv, p_keep_hidden)
# 定义损失函数
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=py_x, labels=Y))
train_op = tf.train.RMSPropOptimizer(0.001, 0.9).minimize(cost)
predict_op = tf.argmax(py_x, 1)
#训练模型和评估模型
batch_size = 128
test_size = 256
with tf.Session() as sess:
tf.global_variables_initializer().run()
for i in range(100):
training_batch = zip(range(0, len(trX), batch_size), range(batch_size, len(trX)+1, batch_size))
for start, end in training_batch:
sess.run(train_op, feed_dict={X:trX[start:end], Y:trY[start:end], p_keep_conv:0.8, p_keep_hidden:0.5})
test_indices = np.arange(len(teX))
np.random.shuffle(test_indices)
test_indices = test_indices[0:test_size]
print(i, np.mean(np.argmax(teY[test_indices], axis=1) ==
sess.run(predict_op, feed_dict={X: teX[test_indices],
p_keep_conv:1.0, p_keep_hidden: 1.0})))
9.5 MNIST 的循环神经网络
# -*- coding:utf-8 -*-
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf
# 加载数据
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
trX, trY, teX, teY = mnist.train.images, mnist.train.labels, mnist.test.images, mnist.test.labels
trX = trX.reshape(-1, 28, 28, 1) # 28x28x1 input img
teX = teX.reshape(-1, 28, 28, 1) # 28x28x1 input img
# 设置训练的超参数
lr = 0.001
training_iters = 100000
batch_size = 128
# 神经网络的参数
n_inputs = 28 # 输入层的n
n_steps = 28 # 28长度
n_hidden_units = 128 # 隐藏层的神经元个数
n_classes = 10 # 输出的数量,即分类的类别,0~9个数字,共有10个
# 输入数据占位符
x = tf.placeholder("float", [None, n_steps, n_inputs])
y = tf.placeholder("float", [None, n_classes])
# 定义权重
weights = {
'in': tf.Variable(tf.random_normal([n_inputs, n_hidden_units])),
'out': tf.Variable(tf.random_normal([n_hidden_units, n_classes]))
}
biases = {
'in': tf.Variable(tf.constant(0.1, shape=[n_hidden_units, ])),
'out': tf.Variable(tf.constant(0.1, shape=[n_classes, ]))
}
#定义RNN模型
def RNN(X, weights, biases):
X = tf.reshape(X, [-1, n_inputs]) #把输入的X转换成X ==》(128 batch * 28 steps, 28 inputs)
# 进入隐藏层
X_in = tf.matmul(X, weights['in']) + biases['in'] # (128 batch * 28 steps, 128 hidden)
X_in = tf.reshape(X_in, [-1, n_steps, n_hidden_units]) # 128 batch , 28 steps, 128 hidden
# 这里采用基本的LSTM循环网络单元:basic LSTM Cell
lstm_cell = tf.contrib.rnn.BasicLSTMCell(n_hidden_units, forget_bias=1.0, state_is_tuple=True)
init_state = lstm_cell.zero_state(batch_size, dtype=tf.float32) #lstm单元由两个部分组成:(c_state, h_state)
# dynamic_rnn接收张量(batch, steps, inputs)或者(steps, batch, inputs)作为X_in
outputs, final_state = tf.nn.dynamic_rnn(lstm_cell, X_in, initial_state=init_state, time_major=False)
results = tf.matmul(final_state[1], weights['out']) + biases['out']
return results
# 定义损失函数和优化器,优化器采用AdamOptimizer
pred = RNN(x, weights, biases)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
train_op = tf.train.AdamOptimizer(lr).minimize(cost)
# 定义模型预测结果及准确率计算方法
correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
# 训练数据及评估模型
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
step = 0
while step * batch_size < training_iters:
batch_xs, batch_ys = mnist.train.next_batch(batch_size)
batch_xs = batch_xs.reshape([batch_size, n_steps, n_inputs])
sess.run([train_op], feed_dict={x:batch_xs, y:batch_ys,})
if step % 20 == 0:
print(sess.run(accuracy, feed_dict={x:batch_xs, y:batch_ys,}))
step += 1
9.6 MNIST 的无监督学习
# -*- coding:utf-8 -*-
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf
# 加载数据
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
trX, trY, teX, teY = mnist.train.images, mnist.train.labels, mnist.test.images, mnist.test.labels
trX = trX.reshape(-1, 28, 28, 1) # 28x28x1 input img
teX = teX.reshape(-1, 28, 28, 1) # 28x28x1 input img
learning_rate = 0.01 # 学习率
training_epochs = 20 # 训练的轮数
batch_size = 256
display_step = 1
examples_to_show = 10
n_hidden_1 = 256
n_hidden_2 = 128
n_input = 784
X = tf.placeholder("float", [None, n_input])
weights = {
'encoder_h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
'encoder_h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
'decoder_h1': tf.Variable(tf.random_normal([n_hidden_2, n_hidden_1])),
'decoder_h2': tf.Variable(tf.random_normal([n_hidden_1, n_input])),
}
biases = {
'encoder_b1': tf.Variable(tf.random_normal([n_hidden_1])),
'encoder_b2': tf.Variable(tf.random_normal([n_hidden_2])),
'decoder_b1': tf.Variable(tf.random_normal([n_hidden_1])),
'decoder_b2': tf.Variable(tf.random_normal([n_input])),
}
# 定义压缩函数
def encoder(x):
layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x, weights['encoder_h1']), biases['encoder_b1']))
layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['encoder_h2']), biases['encoder_b2']))
return layer_2
# 定义解压函数
def decoder(x):
layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x, weights['decoder_h1']), biases['decoder_b1']))
layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['decoder_h2']), biases['decoder_b2']))
return layer_2
# 构建模型
encoder_op = encoder(X)
decoder_op = decoder(encoder_op)
# 得出预测值
y_pred = decoder_op
# 得出真实值,即输入值
y_true = X
# 定义损失函数和优化器
cost = tf.reduce_mean(tf.pow(y_true - y_pred, 2))
optimizer = tf.train.RMSPropOptimizer(learning_rate).minimize(cost)
init = tf.global_variables_initializer()
# 训练数据及评估模型
with tf.Session() as sess:
sess.run(init)
total_batch = int(mnist.train.num_examples/batch_size)
# 开始训练
for epoch in range(training_epochs):
for i in range(total_batch):
batch_xs, batch_ys = mnist.train.next_batch(batch_size)
_, c = sess.run([optimizer, cost], feed_dict={X:batch_xs})
if epoch % display_step == 0:
print("Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(c))
print("Optimization Finished!")
# 对测试集应用训练好的自动编码网络
encode_decode = sess.run(y_pred, feed_dict={X: mnist.test.images[:examples_to_show]})
# 比较此时集原始图片和自动编码网络的重建结果
f, a = plt.subplots(2, 10, figsize=(10, 2))
for i in range(examples_to_show):
a[0][i].imshow(np.reshape(mnist.test.images[i], (28, 28)))
a[1][i].imshow(np.reshape(encode_decode[i], (28, 28))) # 重建结果
f.show()
plt.draw()
plt.waitforbuttonpress()