opencv实战（2）：手写数字检测

最新推荐文章于 2025-05-04 12:55:41 发布

做好自己吧!

最新推荐文章于 2025-05-04 12:55:41 发布

阅读量529

点赞数 16

文章标签： opencv neo4j 人工智能

本文链接：https://blog.csdn.net/2403_87587520/article/details/147670894

版权

只有实战我们才能真正掌握

声明：文章较长，但是注释详细，新入手的小白也可以“生啃代码”

写代码不易,写注释也不易,如果喜欢,点赞加评论吧,谢谢!

一.前期准备

安装tensorflow库(注意py版本不超3.11)
准备好手写数字(0-9)的图片,放如同一个文件

二.导入所需要的库函数

import cv2
import numpy as np
from tensorflow.keras.datasets import mnist #type:ignore #注释的意思是去除vc代码提示出现的黄线（强迫症）
from tensorflow.keras.models import Sequential #type:ignore
from tensorflow.keras.layers import Dense#type:ignore 
from tensorflow.keras.utils import to_categorical#type:ignore
import os#读写文件有关
import matplotlib.pyplot as plt#绘图需要的库

#设置matplotlib支持中文
plt.rcParams['font.sans-serif']=["SimHei"]
plt.rcParams['axes.unicode_minus']=False

简单解释一下:
1.import mnist表示导入数据集，就像做菜要先准备食材
2.做蛋糕需要一个模具，sequential就是这样一个模具。
表示一层一层添加的神经网络（一层一层的房蛋糕材料）
3.Dense表示蛋糕的每一层（蛋糕层），把所有材料混合一起充分反映
4.import to_categorical给蛋糕分类标签，如草莓蛋糕，巧克力蛋糕等。它把书记标签转成更容易让计算机理解的形式，
如[0,0,0,0,1,0,0]

三.代码逻辑一览

分为四个函数，对应四个步骤

1.加载和预处理数据集（准备面粉,和面）
2.构建和训练BP神经网络（一层层整蛋糕）
3.使用opencv预处理图像
4.可视化处理

四.加载和预处理数据集

#1.加载处理数据集
def load_and_preprocess_mnist():
    #加载数据集
    (train_images,train_labels),(test_images,test_labels)=mnist.load_data()

    '''
    返回值的顺序：mnist.load_data()是Keras的内置函数，已经预定好了返回数据的数量（返回两个元组）：
    训练集（train）:60000图片和标签, 用来教学生
    测试集（test）:10000图片和标签，用来考学生
    如果要减少，可以通过train_images=train_images[:1000]
    train_labels=train_labels[:1000]
    '''

    train_images = train_images.reshape((60000,28*28))
    #表示把60000张图片（是一个二维数组），压缩成一维数组（一条线上有28*28个像素点）
    '''
    关于reshape：将原来数组变换的方法。
    在现在的代码表示将train_images分成60000分，每一份的大小是28*28，表示一维
    如果是reshape（3000,1,2）表示将原数组分成三千份，每一份是一个一行二列的代码
    reshape关于-1的用处：reshape（3000,1，-1）自动计算第三维是多少

    '''
    train_images = train_images.astype('float32')/255
    
    #将食材标准化，原来图片的像素值是0-255证书，现在变成0-1的小数，方便模型处理
    test_images=test_images.reshape((10000,28*28))
    test_images=test_images.astype('float')/255

    #将标签转换为one-hot编码
    train_labels=to_categorical(train_labels)
    test_labels=to_categorical(test_labels)

    return (train_images,train_labels),(test_images,test_labels)

五.构建和训练神经网络

#2.构建神经网络模型
def build_and_train_model(train_images,train_labels):
    '''
    需要三层神经元的原因：
    第一层（粗筛）：512个神经元，输出512个输出（第一层要指定input格式，
    后面的层的输入就是前一层的输出）
    第二层（进一步筛选）
    第三层：输出想要的十个结果（0-9）
    '''
    model =Sequential()#蛋糕的模具，表示按顺序一层一层添加的神经网络模型
    model.add(Dense(512,activation='relu',input_shape=(28*28,)))
    #Dense：全连接层，把所有材料混合在一起
    #activation：激活函数。
    #relu:工作原则：收到的是正数，原封不动返回；负数就返回0  公式：f(x) = max(0, x)
    
    model.add(Dense(256,activation='relu'))
    #第二层蛋糕，256个神经元

    model.add(Dense(10,activation='softmax'))
    #10：对应0-9十个数字，softmax：把输出变成概率，计算每个数字的可能性
    '''
    softmax:为多分类问题设计：
    十个神经元代表十个数字，分别给一个训练的数字图片打分，
    softmax将十个神经元的输出结果转化成概率，将其中概率
    最高的作为预测结果
    '''
    #蛋糕烹饪方法
    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    '''
    optimizer:优化算法
    loss：衡量蛋糕好不好
    metrics：评估标准，看蛋糕准不准确
    '''
    
    print("Training the model...")
    
    #开始烤蛋糕（训练模型）
    history=model.fit(train_images,train_labels,
                      epochs=10,#烤十次
                      batch_size=128,#每次烤128个蛋糕
                      validation_split=0.2,#留20%蛋糕用来测试
                      verbose=1)#显示训练过程
    
    '''
    这段代码就像制作一个三层蛋糕（神经网络）：
    准备模具（Sequential）
    添加三层材料（Dense）
    配置烹饪方法（compile）
    开始烤蛋糕（fit）
    '''
    return model

六.opencv预处理图像

#3.使用opencv预处理图像
def preprocess_custom_image(image_path):#preprocess(预处理)custom(自定义)
    img=cv2.imread(image_path,cv2.IMREAD_GRAYSCALE)

    if img is None:
        raise ValueError(f'无法读取图像：{image_path}')
    
    _,img=cv2.threshold(img,0,255,cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)
    #第一次INV没加上，所以处理后的图像就很怪，因为INV表示反向，加和不加差别很大
    '''
    _：阈值。由于使用了 cv2.THRESH_OTSU，这里返回的是自动计算的最佳阈值。
    img：二值化后的图像。像素值大于阈值时设为 0（黑色），否则设为 255（白色）。
    '''

    contours,_=cv2.findContours(img.copy(),cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
    ##第一个返回值表示返回的轮廓，第二个表示轮廓之间的关系，不大重要，因为我们的目标只是获取轮廓
    '''
    contours：检测到的轮廓列表。每个轮廓是一个包含点坐标的 NumPy 数组。
    _：轮廓的层次信息。由于使用了 cv2.RETR_EXTERNAL，这里只检测最外层轮廓，层次信息不重要。
    
    '''

    if contours:
        cnt=max(contours,key=cv2.contourArea) 
        x,y,w,h=cv2.boundingRect(cnt)

        img=img[y:y+h,x:x+w]#找到轮廓，截取图片

    #调整图片大小并且填充28*28，保持数字居中
    '''
    如果说上面的代码你觉得难是因为没学过，但是下面这段代码足够傻逼，因为
    需要你脑子绕好几个弯，即使看起来很简单，秦始皇在的话一定会愤怒的统一
    矩阵格式的，呜呜呜。
    首先先明白opencv的矩阵排列是（高，宽），高是沿着y轴方向，宽是沿着x轴方向。
    np的矩阵排列格式是（x，y）（就是坐标轴上的（x，y））
    另一个坑：img.shape返回的是什么？
    img.shape[0]是宽（opencv格式下），img.shape[1]是高。
    resized是按照比例放大缩小（但是如果用到img.shape要注意返回值）
    有时间可以用下面的代码画一画，思考一下
    '''

   
    if img.shape[0]>img.shape[1]:
        resized=cv2.resize(img,(int(img.shape[1]*28/img.shape[0]),28))
    else:
        resized=cv2.resize(img,(28,int(img.shape[0])*28/img.shape[1]))   
    #int(img.shape[0]*28/img.shape[1]))保持比例不变

    canvas=np.zeros((28,28),dtype=np.uint8)#保证和opencv的格式相同
    start_x=(28-resized.shape[1])//2
    start_y=(28-resized.shape[0])//2
    canvas[start_y:start_y+resized.shape[0],start_x:start_x+resized.shape[1]]=resized

    #转换为模型的输入格式
    img_array=canvas.reshape(1,784)
    img_array=img_array.astype('float')/255

    return img_array,canvas

七.可视化处理

#4.可视化处理
def test_custom_digits(model,custom_digits_dir):#dir是dictory（目录）的缩写
    result={}#字典
    plt.figure(figsize=(15,8))
    #创建子图(画布)

    for i in range(10):
        img_path=os.path.join(custom_digits_dir,f'{i}.jpg')
        try:
            img_array,processed_img=preprocess_custom_image(img_path)
            prediction=model.predict(img_array)
            predicted_digit=np.argmax(prediction)
            #返回最大概率值的索引，也就是对应的数字（十个神经元对应0-9）
            confidence=np.max(prediction)*100
            result[i]=(predicted_digit,confidence,processed_img)
        except Exception as e:
            print(f"Error processing {img_path}:{e}")
            result[i]=(-1,0,np.zeros((28,28)))

    print("\n自定义手写数字的识别结果：")
    print(" 实际数字\t预测数字\t置信度\t状态")

    correct=0

    for i in range(10):
        actual=i
        predicted,confidence,img=result[i]
        is_correct=actual==predicted
        if is_correct:
            correct+=1

        plt.subplot(2,5,i+1)#创建一个2行5列的子图布局，i+1表示位置
        plt.imshow(img,cmap='gray')#camp表示灰度映射，防止默认彩色（处理后变成了灰度图）
        plt.title(f"实际：{actual}\n预测：{predicted}\n可信度:{confidence:.1f}")
        plt.axis('off')#关闭子图坐标显示

    accuracy=correct/10*100
    print(f"\n自定义手写数字识别的准确率：{accuracy:.1f}")
    plt.tight_layout()#自动调整子图之间的间距
    plt.show()

    return result

八.主函数

#5.主函数

def main():
    (train_imgs,train_labels),(test_imgs,test_labels)=load_and_preprocess_mnist()

    model=build_and_train_model(train_imgs,train_labels)

    test_loss,test_acc=model.evaluate(test_imgs,test_labels)

    cusom_digits_dir='shu zi 1'
    if os.path.exists(cusom_digits_dir):
        test_custom_digits(model,cusom_digits_dir)
    else:
        print(f"\n目录{cusom_digits_dir}不存在")

if __name__ =='__main__':#确保为主程序当前模块可以直接运行