1.利用tensorflow实现手写体识别
1.1 背景
手写数字的数据集,来自美国国家标准与技术研究所(National Institute of Standards and Technology,NIST),发布与1998年样本来自250个不同人的手写数字,50%高中学生,50%是人口普查局的工作人员数字从0~9,图片大小是28×28像素,黑底白字训练,数据集包含60000个样本,测试数据集包含10000个样本。数据集的标签是长度为 10 的一维数组,数组中每个元素索引号表示对应数字出现的概率 。数据集下载地址 :http://yann.lecun.com/exdb/mnist/
# 手写体识别
import cv2
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import pylab
import numpy as np
#处理自定义输入图片
def pre_pic(picName):
im = cv2.imread(picName, 0)
t, im_arr = cv2.threshold(im, 127, 255, cv2.THRESH_BINARY_INV)
nm_arr = im_arr.reshape([1, 784]) # 1行784列
nm_arr = nm_arr.astype(np.float32)
img_ready = np.multiply(nm_arr, 1.0 / 255.0) # 从0-255之间的数变为0-1之间的浮点数
return img_ready
# 读入数据集(如果没有则在线下载),并转换成独热编码
# 如果不能下载,则到http://yann.lecun.com/exdb/mnist/进行手工下载,下载后拷贝到当前MNIST_data目录下
mnist = input_data.read_data_sets("mnist/", one_hot=True)
x = tf.placeholder(tf.float32, [None, 784]) # 占位符,输入
y = tf.placeholder(tf.float32, [None, 10]) # 占位符,输出
W = tf.Variable(tf.random_normal([784, 10])) # 权重
b = tf.Variable(tf.zeros([10])) # 偏置值
# 构建模型
pred_y = tf.nn.softmax(tf.matmul(x, W) + b) # softmax分类
print("pred_y.shape:", pred_y.shape)
# 损失函数
cross_entropy = -tf.reduce_sum(y * tf.log(pred_y),
reduction_indices=1) # 求交叉熵
cost = tf.reduce_mean(cross_entropy) # 求损失函数平均值
# 参数设置
lr = 0.005
# 梯度下降优化器
optimizer = tf.train.GradientDescentOptimizer(lr).minimize(cost)
training_epochs = 500
batch_size = 100
saver = tf.train.Saver()
model_path = "./model/mnist/mnist_model.ckpt" # 模型路径
# 启动session
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
# 循环开始训练
for epoch in range(training_epochs):
avg_cost = 0.0
total_batch = int(mnist.train.num_examples / batch_size) # 计算总批次
# 遍历全数据集
for i in range(total_batch):
batch_xs, batch_ys = mnist.train.next_batch(batch_size) # 读取一个批次样本
params = {x: batch_xs, y: batch_ys} # 训练参数
o, c = sess.run([optimizer, cost], feed_dict=params) # 执行训练
avg_cost += (c / total_batch) # 求平均损失值
print("epoch: %d, cost=%.9f" % (epoch + 1, avg_cost))
print("Finished!")
# 模型评估
correct_pred = tf.equal(tf.argmax(pred_y, 1), tf.argmax(y, 1))
# 计算准确率
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
params={x:mnist.test.images,y:mnist.test.labels}
acc=sess.run(accuracy,feed_dict=params)
print('测试集成功率:',acc)
# 将模型保存到文件
save_path = saver.save(sess, model_path)
print("Model saved:", save_path)
# 4 加载模型、预测
if __name__ == '__main__':
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
saver.restore(sess,model_path)
# 随机读取两个测试样本,用于预测
# xs,ys=mnist.test.next_batch(2)
# output=tf.argmax(pred_y,1)
# output_val, predv = sess.run(
# [output, pred_y],
# feed_dict={x: xs}
# )
# print("预测结论:\n", output_val, "\n")
# print("实际结果:\n", ys, "\n")
# print("预测概率:\n", predv, "\n")
#
# im = xs[0] # 第1个测试样本数据
# im = im.reshape(-1, 28)
# pylab.imshow(im)
# pylab.show()
#
# im = xs[1] # 第2个测试样本数据
# im = im.reshape(-1, 28)
# pylab.imshow(im)
# pylab.show()
# 输入自定义图片进行预测
testPic = "./img/11.png"
testPicArr = pre_pic(testPic)
output = tf.argmax(pred_y, 1)
output_val, predv = sess.run(
[output, pred_y],
feed_dict={x: testPicArr}
)
print("预测结论:\n", output_val, "\n")
print("预测概率:\n", predv, "\n")
im = testPicArr.reshape(-1, 28)
pylab.imshow(im)
pylab.show()