基于卷积神经网络的手写数字识别系统的设计

深度学习 专栏收录该内容
2 篇文章 0 订阅

基于TensorFlow卷积神经网络的手写数字识别系统的建立

一、知识储备
①卷积神经网络
②OpenCV图像处理
③Tkinter用户界面设计

二、实验代码
(1)模型的训练和保存

import tensorflow as tf
import os
from tensorflow.examples.tutorials.mnist import input_data
import cv2
import numpy as np
import matplotlib.pyplot as plt


mg = tf.Graph()
with mg.as_default():
    # 手写数字图片数据的读取
    # 1、准备数据
    mnist = input_data.read_data_sets("./mnist_data/", one_hot=True)

    # 占位:便于后面运行会话时,将图片数据传入
    x = tf.compat.v1.placeholder(tf.float32, [None, 784])
    y_true = tf.compat.v1.placeholder(tf.int32, [None, 10])

    # 由于输入的图片是[1,784]格式的,要转换为[28, 28]格式的图片(灰度图,二维图)
    # 将x[None, 784]形状进行修改
    input_x = tf.reshape(x, shape=[-1, 28, 28, 1])

    # 第一卷积大层:卷积、激活、池化
    # 卷积层
    # 定义filter和偏置
    conv1_weights = tf.Variable(tf.random.normal([3, 3, 1, 32]))
    conv1_bias = tf.Variable(tf.random.normal([32]))
    conv1_x = tf.nn.conv2d(input_x, conv1_weights, [1, 1, 1, 1], padding="SAME") + conv1_bias
    relu1_x = tf.nn.relu(conv1_x)
    pool1_x = tf.nn.max_pool(relu1_x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")
    
    # 第二卷积大层:卷积、激活、池化
    # 卷积层
    # 定义filter和偏置
    conv2_weights = tf.Variable(tf.random.normal([3, 3, 32, 64]))
    conv2_bias = tf.Variable(tf.random.normal([64]))
    conv2_x = tf.nn.conv2d(pool1_x, conv2_weights, [1, 1, 1, 1], padding="SAME") + conv2_bias
    relu2_x = tf.nn.relu(conv2_x)
    pool2_x = tf.nn.max_pool(relu2_x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")

    # 全连接层
    # [None, 7, 7, 64]->[None, 7 * 7 * 64]
    # [None, 7 * 7 * 64] * [7 * 7 * 64, 10] = [None, 10]
    x_fc = tf.reshape(pool2_x, shape=[-1, 7 * 7 * 64])
    weights_fc = tf.Variable(tf.random.normal([7 * 7 * 64, 10]))
    bias_fc = tf.Variable(tf.random.normal([10]))
    y_predict = tf.matmul(x_fc, weights_fc) + bias_fc

    # softmax回归以及交叉熵损失计算
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y_predict, labels=y_true))

    # 4、梯度下降损失优化:学习率(0.01)
    train_op = tf.compat.v1.train.AdamOptimizer(0.01).minimize(loss)

    # 5、得出每次训练的准确率(通过真实值和预测值进行位置比较,每个样本都比较)
    #with tf.compat.v1.variable_scope("accuracy"):
    equal_list = tf.equal(tf.argmax(y_true, 1), tf.argmax(y_predict, 1))
    accuracy = tf.reduce_mean(tf.cast(equal_list, tf.float32))
    # 初始化变量op
    init_op = tf.compat.v1.global_variables_initializer()
    saver = tf.compat.v1.train.Saver()
# 开启会话去训练
with tf.compat.v1.Session(graph=mg) as sess:
    # 初始化变量
    sess.run(init_op)
    # 循环步数去训练,训练建立的神经网络模型
    acc=[]   # 记录准确率
    for i in range(1100):
        # 获取数据,实时提供
        # 每步提供50个样本训练
        mnist_x, mnist_y = mnist.train.next_batch(500)
        mnist_x = mnist_x*255   # 因为数据集是经过归一化的,像素值为0~1,所以要乘以255
        # 运行训练op
        _, ac = sess.run([train_op, accuracy], feed_dict={x: mnist_x, y_true: mnist_y})
        print("训练第%d步的准确率为:%f, 损失为:%f " % (i + 1,
                                           sess.run(accuracy, feed_dict={x: mnist_x, y_true: mnist_y}),
                                           sess.run(loss, feed_dict={x: mnist_x, y_true: mnist_y})
                                           )
              )

        acc.append(ac)
    plt.plot(acc)
    plt.show()

    saver.save(sess, r"conv_num_recog\shendumodel.ckpt")   #保存模型,以备调用预训练的模型进行预测。

    mnist_x, mnist_y = mnist.test.next_batch(10000)
    mnist_x = mnist_x * 255
    print("准确率为:%f, 损失为:%f" % (sess.run(accuracy, feed_dict={x: mnist_x, y_true: mnist_y}), sess.run(loss, feed_dict={x: mnist_x, y_true: mnist_y})))

    

运行该程序,训练的结果为:
准确率随迭代次数的变化图像
用mnist训练集训练的准确率高达99.0%;
在这里插入图片描述用mnis数据集中10000张预测图片进行预测的准确率为96.97%。由此可知模型的准确率非常好。
在这里插入图片描述

(2)模型的调用,手写板制作,GUI用户界面设计

import tensorflow as tf
import numpy as np
from tkinter import *
import tkinter as tk
from tkinter import messagebox
from PIL import Image, ImageTk
import os
import cv2


class Mouse:
# 利用OpenCV的鼠标响应
    # mouse callback function
    def __init__(self):
        self.img = np.zeros((200, 200, 3), np.uint8)

    def draw_circle(self, event, x, y, flags, param):
        global drawing
        if event == cv2.EVENT_LBUTTONDOWN:
            drawing = True
        elif event == cv2.EVENT_MOUSEMOVE:
            if drawing:
                cv2.circle(self.img, (x, y), 9, (255, 255, 255), -1)
        elif event == cv2.EVENT_LBUTTONUP:
            drawing = False
            cv2.circle(self.img, (x, y), 9, (255, 255, 255), -1)

    def mypic(self):
        return self.img

    def create_image(self):
        cv2.namedWindow('Digital writing pad')
        cv2.setMouseCallback('Digital writing pad', self.draw_circle)
        while 1:
            cv2.imshow('Digital writing pad', self.img)
            k = cv2.waitKey(1)
            if k == 27:
                break
        cv2.destroyAllWindows()


def con_neural_net():
    mg = tf.Graph()
    with mg.as_default():
        x = tf.compat.v1.placeholder(tf.float32, [None, 784])
        y_true = tf.compat.v1.placeholder(tf.int32, [None, 10])
        input_x = tf.reshape(x, shape=[-1, 28, 28, 1])

        conv1_weights = tf.Variable(tf.random.normal([3, 3, 1, 32]))
        conv1_bias = tf.Variable(tf.random.normal([32]))
        conv1_x = tf.nn.conv2d(input_x, conv1_weights, [1, 1, 1, 1], padding="SAME") + conv1_bias
        relu1_x = tf.nn.relu(conv1_x)
        pool1_x = tf.nn.max_pool(relu1_x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")

        conv2_weights = tf.Variable(tf.random.normal([3, 3, 32, 64]))
        conv2_bias = tf.Variable(tf.random.normal([64]))
        conv2_x = tf.nn.conv2d(pool1_x, conv2_weights, [1, 1, 1, 1], padding="SAME") + conv2_bias
        relu2_x = tf.nn.relu(conv2_x)
        pool2_x = tf.nn.max_pool(relu2_x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")

        x_fc = tf.reshape(pool2_x, shape=[-1, 7 * 7 * 64])
        weights_fc = tf.Variable(tf.random.normal([7 * 7 * 64, 10]))
        bias_fc = tf.Variable(tf.random.normal([10]))
        y_predict = tf.matmul(x_fc, weights_fc) + bias_fc

        saver = tf.compat.v1.train.Saver()
    with tf.compat.v1.Session(graph=mg) as sess:
        saver.restore(sess, r"conv_num_recog\shendumodel.ckpt")  # 调用训练好的模型
        m = Mouse()
        m.create_image()
        picture = m.mypic()
        if np.all(picture == 0):
            messagebox.showinfo("提示", "您还没有开始输入数字!")
            return
        else:
            gray_image = cv2.cvtColor(picture, cv2.COLOR_BGR2GRAY)
            pic1 = cv2.resize(gray_image, (28, 28))
            pic2 = pic1.reshape([-1, 784])
            prediction = tf.argmax(sess.run(y_predict, feed_dict={x: pic2}), 1).eval()
            return prediction[0]


class App(tk.Frame):
    def __init__(self, root):
        super().__init__(root)
        root.title('手写数字识别')
        Label(root, text='欢迎使用手写识别系统,以下为使用说明').pack(side=TOP)
        Label(root, text='1.点击数字识别按钮,在弹出窗口用鼠标左键进行手写操作。').pack(side=TOP, anchor=W)
        Label(root, text='2.手写完成后,点击键盘上的Esc按钮,结束手写并输出预测结果').pack(side=TOP, anchor=W)
        Label(root, text='3.点击刷新按钮会清空上次识别的数字').pack(side=TOP, anchor=W)
        Label(root, text='识别到数字是:').pack(side=TOP, anchor=W)
        self.numLabel = Label(root, text='', relief=RAISED, fg="red", font=("黑体", 30, "bold"))
        self.numLabel.pack(side=TOP, anchor=CENTER)

        fm = Frame(root)
        Button(fm, text='数字识别', command=self.re_num).pack(side=TOP, anchor=W, fill=X, expand=YES)
        Button(fm, text='刷新', command=self.renew_num).pack(side=TOP, anchor=W, fill=X, expand=YES)
        fm.pack(side=LEFT, fill=BOTH, expand=YES, padx=20)
        Label(root, text="按住鼠标左键并移动,开始绘制你的理想蓝图吧……").pack(side=BOTTOM)

    def re_num(self):
        self.numLabel.configure(text=str(con_neural_net()))

    def renew_num(self):
        self.numLabel.configure(text='')


if __name__ == "__main__":
    drawing = False  # true if mouse is pressed
    ix, iy = -1, -1
    root = Tk()
    app = App(root)
    root.mainloop()

三、实验结果
运行该程序,出现一个GUI界面:
手写数字识别软件界面
点击数字识别按键,在弹出来的黑色手写板中书写数字(只能识别1~9)
在这里插入图片描述
在这里插入图片描述
书写完数字后,点击电脑键盘上的Esc按键,退出手写板。程序自动将数字传入到程序的卷积神经网络模块中进行识别,并且输出结果。
在这里插入图片描述

GUI用户界面的设计和手写板的制作多种多样,但最重要的是理解神经网络的原理,并且用它来搭建各种功能的神经网络模型。

  • 2
    点赞
  • 3
    评论
  • 22
    收藏
  • 一键三连
    一键三连
  • 扫一扫,分享海报

©️2021 CSDN 皮肤主题: 1024 设计师:白松林 返回首页
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、C币套餐、付费专栏及课程。

余额充值