使用tensorflow实现手写字母识别-＞python

最新推荐文章于 2024-04-29 21:23:46 发布

阿哲-哲学的哲

最新推荐文章于 2024-04-29 21:23:46 发布

阅读量1.4k

点赞数 1

分类专栏： python 文章标签： pycharm ide python

本文链接：https://blog.csdn.net/weixin_50398435/article/details/124882870

版权

python 专栏收录该内容

2 篇文章 0 订阅

订阅专栏

环境准备：IDE：pycharm

python版本：python3.8

外部库：tensorflow2.3、opencv4.0+、matplotlib3.5、sklearn

因为python3.9好像与opencv4.0不兼容还是什么问题，3.9导入之后导入不了opencv4.0的，所以使用3.8

导入库的话可以参考https://blog.csdn.net/weixin_50398435/article/details/124836546

用到的数据集：https://download.csdn.net/download/weixin_50398435/85825390

先上代码：这一个是主体代码，就是一整个模型训练测试的，

import os
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
import numpy as np
import tensorflow.python.keras as keras
from tensorflow.python.keras import layers
import cv2
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D
from sklearn.model_selection import train_test_split


def testdatainout():
    #这里是选择测试的文件
    #E:\testimage\image\a\img037-001.png
    path = input("文件路径：")
    x = []
    y = input("正确值：")
    print(path)

    img = cv2.imread(path,cv2.IMREAD_GRAYSCALE)
    img = cv2.resize(img,(28,28))
    x.append(img)
    x = np.array(x)
    x = x/255.0
    x = x.reshape(28, 28, 1)
    plt.imshow(x)
    plt.show()
    return x,y


#
def getdata():
    dir1 = 'E:\\testimage\\image'
    sub_dir_and_files = os.listdir(dir1)
    sub_dirs = []
    # 一共26个文件夹，a-z
    for x in sub_dir_and_files:
        if os.path.isdir(dir1 + '/' + x):
            sub_dirs.append(x)
    print(sub_dirs)
    # 每个文件夹40个训练样本一共1040个
    # N总图片数量
    N = 0
    # 遍历每个文件夹a-z
    for subdir in sub_dirs:
        N += len(os.listdir(dir1 + '/' + subdir))
    print(N)
    # X放所有图片
    X = []
    # y所有图片的真实内容标签a-z
    y = [''] * N

    i = 0
    # 遍历每个文件夹读取图片放入X
    for subdir in sub_dirs:
        image_files = os.listdir(dir1 + '/' + subdir)
        # print(image_files)
        for image in image_files:
            filename = dir1 + '/' + subdir + '/' + image
            # 图片灰度化
            img = cv2.imread(filename, cv2.IMREAD_GRAYSCALE)
            # 缩小图片由(1200,900)到(28,28)
            img = cv2.resize(img, (28, 28), )
            X.append(img)
            # label用数字0-25代替字符串a-z
            y[i] = ord(subdir) - 97
            i += 1

    # X,y格式list->numpy.ndarray
    X = np.array(X)
    y = np.array(y)
    return  X,y

#建立模型训练模型
def traindata():
    X, y = getdata()
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=10)
    # 归一化
    X_train = X_train / 255.0
    X_test = X_test / 255.0
    X_train = X_train.reshape((728, 28, 28, 1))
    X_test = X_test.reshape((312, 28, 28, 1))
    # 将类别向量转换为二进制（只有0和1）的矩阵类型表示（将原有的类别向量转换为独热编码的形式）
    y_trainOnehot = to_categorical(y_train)
    y_testOnehot = to_categorical(y_test)
    # 建立模型
    model = Sequential()

    # 卷积层
    model.add(
        Conv2D(
            filters=256,
            kernel_size=(5, 5),
            padding='same',  # 保证卷积核大小，不够补零
            input_shape=(28, 28, 1),
            activation='relu'))
    # 池化层
    model.add(MaxPool2D(pool_size=(3, 3)))
    model.add(Dropout(0.25))

    # 卷积层
    model.add(
        Conv2D(filters=256, kernel_size=(5, 5), padding='same', activation='relu'))
    model.add(
        Conv2D(filters=256, kernel_size=(5, 5), padding='same', activation='relu'))

    model.add(MaxPool2D(pool_size=(3, 3)))
    #正则化去掉1/4的神经元
    model.add(Dropout(0.25))

    # 扁平层
    model.add(Flatten())
    # 全连接层激活函数relu
    model.add(Dense(256, activation='relu'))

    model.add(Dropout(0.25))
    # 全连接层激活函数softmax
    model.add(Dense(26, activation='softmax'))
    # 输出模型各层的参数状况
    model.summary()
    # 训练模型
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    model.fit(X_train, y_trainOnehot, epochs=20)
    # 返回损失和精度
    res = model.evaluate(X_test, y_testOnehot)
    print(model.metrics_names)
    print(res)

    #循环多次测试模型
    iftest = 'y'
    while iftest == 'y':
        xxx,yyy = testdatainout()
        xxx = (np.expand_dims(xxx, 0))
        prob = model.predict(xxx)
        print("预测值：", chr(np.argmax(prob) + 97))
        #print("真实值：", chr(yyy + 97))#意味着输入的真实值必须是0-23
        print("真实值：", yyy)
        iftest = input("是否继续测试：y/n")

if __name__ == '__main__':
    # E:\testimage\image\a\img037-001.png
    traindata()
    print("helloworld")

因为数据集图片不是模型理想大小所以第一步先处理数据集，然后因为数据集处理之后没有重新写回去，所以在模型测试时也要进行图片的处理

img = cv2.imread(path,cv2.IMREAD_GRAYSCALE)
    img = cv2.resize(img,(28,28))

然后是制作标签就是遍历数据集目录下的子目录然后转成01来做标识

def getdata():
    dir1 = 'E:\\testimage\\image'
    sub_dir_and_files = os.listdir(dir1)
    sub_dirs = []
    # 一共26个文件夹，a-z
    for x in sub_dir_and_files:
        if os.path.isdir(dir1 + '/' + x):
            sub_dirs.append(x)
    print(sub_dirs)
    # 每个文件夹40个训练样本一共1040个
    # N总图片数量
    N = 0
    # 遍历每个文件夹a-z
    for subdir in sub_dirs:
        N += len(os.listdir(dir1 + '/' + subdir))
    print(N)
    # X放所有图片
    X = []
    # y所有图片的真实内容标签a-z
    y = [''] * N

    i = 0
    # 遍历每个文件夹读取图片放入X
    for subdir in sub_dirs:
        image_files = os.listdir(dir1 + '/' + subdir)
        # print(image_files)
        for image in image_files:
            filename = dir1 + '/' + subdir + '/' + image
            # 图片灰度化
            img = cv2.imread(filename, cv2.IMREAD_GRAYSCALE)
            # 缩小图片由(1200,900)到(28,28)
            img = cv2.resize(img, (28, 28), )
            X.append(img)
            # label用数字0-25代替字符串a-z
            y[i] = ord(subdir) - 97
            i += 1

    # X,y格式list->numpy.ndarray
    X = np.array(X)
    y = np.array(y)
    return  X,y

然后就是划分训练集和测试集：

X, y = getdata()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=10)

然后就是归一化

# 归一化
    X_train = X_train / 255.0
    X_test = X_test / 255.0
    X_train = X_train.reshape((728, 28, 28, 1))
    X_test = X_test.reshape((312, 28, 28, 1))
    # 将类别向量转换为二进制（只有0和1）的矩阵类型表示（将原有的类别向量转换为独热编码的形式）
    y_trainOnehot = to_categorical(y_train)
    y_testOnehot = to_categorical(y_test)

然后建立模型在进行训练，模型中一些参数如卷积核，学习率之类的按个人需要来规划就好

# 建立模型
    model = Sequential()

    # 卷积层
    model.add(
        Conv2D(
            filters=256,
            kernel_size=(5, 5),
            padding='same',  # 保证卷积核大小，不够补零
            input_shape=(28, 28, 1),
            activation='relu'))
    # 池化层
    model.add(MaxPool2D(pool_size=(3, 3)))
    model.add(Dropout(0.25))

    # 卷积层
    model.add(
        Conv2D(filters=256, kernel_size=(5, 5), padding='same', activation='relu'))
    model.add(
        Conv2D(filters=256, kernel_size=(5, 5), padding='same', activation='relu'))

    model.add(MaxPool2D(pool_size=(3, 3)))
    #正则化去掉1/4的神经元
    model.add(Dropout(0.25))

    # 扁平层
    model.add(Flatten())
    # 全连接层激活函数relu
    model.add(Dense(256, activation='relu'))

    model.add(Dropout(0.25))
    # 全连接层激活函数softmax
    model.add(Dense(26, activation='softmax'))
    # 输出模型各层的参数状况
    model.summary()

然后就是训练模型：这里是进行了二十次迭代训练比较慢（个人觉得）

# 训练模型
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    model.fit(X_train, y_trainOnehot, epochs=20)
    # 返回损失和精度
    res = model.evaluate(X_test, y_testOnehot)
    print(model.metrics_names)
    print(res)

然后是人工测试：

#循环多次测试模型
    iftest = 'y'
    while iftest == 'y':
        xxx,yyy = testdatainout()
        xxx = (np.expand_dims(xxx, 0))
        prob = model.predict(xxx)
        print("预测值：", chr(np.argmax(prob) + 97))
        #print("真实值：", chr(yyy + 97))#意味着输入的真实值必须是0-23
        print("真实值：", yyy)
        iftest = input("是否继续测试：y/n")

测试方法为：

def testdatainout():
    #这里是选择测试的文件
    #E:\testimage\image\a\img037-001.png
    path = input("文件路径：")
    x = []
    y = input("正确值：")
    print(path)

    img = cv2.imread(path,cv2.IMREAD_GRAYSCALE)
    img = cv2.resize(img,(28,28))
    x.append(img)
    x = np.array(x)
    x = x/255.0
    x = x.reshape(28, 28, 1)
    plt.imshow(x)
    plt.show()
    return x,y

这一个是小工具类的代码主要是用于得到一个图片数据集的所有图片的路径：

import os
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
import numpy as np
import tensorflow.python.keras as keras
from tensorflow.python.keras import layers
import cv2
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D
from sklearn.model_selection import train_test_split

def test():
    path = 'E:\\testimage\\image\\a\\img037-001.png'
    x = []
    #y = input("正确值(0-25)：")
    print(path)

    img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
    img = cv2.resize(img, (28, 28))
    x.append(img)
    x = np.array(x)
    x = x / 255.0
    print(x)

    x = x.reshape(28, 28, 1)
    print(x)
    plt.imshow(x)
    plt.show()
//这一个是做那个图片展示窗口，因为不是展示原图的，所以做了两步处理

def geturldata():
    dir1 = 'E:\\testimage\\image'
    sub_dir_and_files = os.listdir(dir1)
    print(sub_dir_and_files)
    sub_dirs = []
    for x in sub_dir_and_files:
        if os.path.isdir(dir1 + '/' + x):
            sub_dirs.append(dir1 + '\\'+x)
    print(sub_dirs)

    imurl = []

    for i in sub_dirs:
        if os.path.isdir(i):
            filename = os.listdir(i)
            for j in filename:
                imurl.append(i + '\\' + j)
    print(imurl)
    return imurl

def savefilename():
    imgurl = geturldata()

    path = "E:\\testimage"
    filename = "filenamedata.txt"
    if not os.path.exists(path):
        os.makedirs(path)
    # 这里是复原文件，既写空
    fo = open(path + '\\' + filename, "w")
    fo.truncate()

    for i in imgurl:
        fo.write(i)
        fo.write('\n')
    fo.close()



if __name__ == '__main__':
    geturldata()
    savefilename()

效果展示：

主体思路比较简单就是，你先确定模型时图片大小样式，然后是否需要把图片处理一下得到类似该实验中只有一个28*28的01矩阵，这里需要注意通道数问题，就是上面图片灰度化处理，把四通道变成三通道，处理好数据集之后（当然标签在处理数据集时因为循环问题一起完成）就是建立模型选择合适的损失率，卷积核等等，然后在测试模型。

对于其原理的我是参考了书本一些讲解如python机器学习，[美] 塞巴斯蒂安·拉施卡（Sebastian Raschka）著，陈斌译，中间几章内容，就是原来不难实现不易

阿哲-哲学的哲

关注

1
点赞
踩
31

收藏

觉得还不错? 一键收藏
打赏
0
评论
使用tensorflow实现手写字母识别-＞python

环境准备：IDE：pycharmpython版本：python3.8外部库：tensorflow2.3、opencv4.0+、matplotlib3.5、sklearn首先需要理解一下tensorflow这个东西，该程序主要是利用tensorflow的keras.models 的Sequential模型。
复制链接

扫一扫