Raspberrypi 基于openCV的简单数字识别

aoliba_believer

已于 2024-07-27 14:28:09 修改

阅读量1.6k

点赞数 23

文章标签： opencv 计算机视觉图像处理

于 2024-07-23 10:48:22 首次发布

本文链接：https://blog.csdn.net/aoliba_believer/article/details/140625471

版权

前言

本项目是树莓派识别数字的小例程

电赛备赛时写的，实现对1-8的数字识别并与下位机通信控制小车进行运动

我们的送药小车

项目前置

python 版本要求

python>=3.6

库依赖要求

做图像处理

numpy

opencv-python

做引脚操控

RPi.GPIO

pyserial

rpi-lgpi

项目主体

主要实现思路

传统图像处理，使用模板匹配

主要步骤

对模板图像进行预处理，得到1-8总共8个模板
对识别图像进行预处理，将图像二值化转换取出矩形区域组成ROI区域
ROI和模板做模板匹配

详细过程

仅展示部分源码，详细源码可以去GitHub库

模板图像预处理

模板预览图

模板预览图本身是含有八个数字带外接矩形黑框的一张图，我们对它作二值化处理，在对二值化模板图像边缘提取，通过轮廓检测将模板的轮廓提取，在将其外接矩形坐标画出来，在对不同数字1-8进行排序，最后将整个图像按比例内截，在重排成（100，150）的固定的模板存入字典。

代码实现

# 定义需识别数字
to_detect_num = None
# 硬件初始化
ser = hardware_init()
template = cv2.imread(osp.join(template_dir, 'num_template.jpg'))
gray_template = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY)
print('二值化模板图像已读取')
# 边缘检测
edged_template = cv2.Canny(template, 80, 200) # 边缘低阈值75和高阈值20
# 轮廓检测
template_contours, hierarchy = cv2.findContours(edged_template, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# 读取模板轮廓的矩形坐标
x, y, w, h = cv2.boundingRect(template_contours[1])
print(f'检测到{len(template_contours)}个轮廓')
# 模板数字排序 1-8 (太菜了写不出自动排序的脚本 >_< )
sorted_template_contours = [template_contours[5], template_contours[4], template_contours[1],template_contours[0], template_contours[7], template_contours[6], template_contours[3], template_contours[2]]
# 构造模板字典
digit_dict = {}
# 分割模板图像
for (i, c) in enumerate(sorted_template_contours):
    (x, y, w, h) = cv2.boundingRect(c)
    roi = gray_template[y+10:y+h-10, x+10:x+w-10]
    roi = cv2.resize(roi, (100, 150))
    digit_dict[i] = roi

识别图像预处理

从摄像头获取视频流，将其缩放固定比例减少后续的运算量，转换为灰度图并应用高斯模糊将锐度过高的区域排除边缘和消除噪点，再使用Canny边缘检测，提取边缘后进行膨胀，得到识别数字完整的矩形边缘轮廓。通过轮廓检测再将轮廓面积最大的前5个轮廓进行提取并排序，计算轮廓周长，进行近似轮廓，看是否能构成四边形，将能构成四边形的轮廓且面积符合一定要求的轮廓进行筛选，并通过两个矩形左x坐标的大小来判断数字的左右并重排。本身通过相机拍到的矩形数字框就不可能是正好的矩形，需要我们进行图像变换，因此进行透射变换，将提取到的四边形变换成矩形然后二值化。

轮廓提取结果

提取矩形后画出的轮廓

二值化之后的图像

透射变换二值化之后的识别图像

代码实现

# 初始化视频捕获对象
cap = cv2.VideoCapture(0)
while True:
    # 逐帧读取视频
    start_time = time.time()
    ret, frame = cap.read()
    if not ret:
        break
    # 图像resize
    ratio = frame.shape[0] / 500.0
    orig_frame = frame.copy()
    # 转换为灰度图像
    gray_img = cv2.cvtColor(orig_frame, cv2.COLOR_BGR2GRAY)
    # 应用高斯模糊
    blur_img = cv2.GaussianBlur(gray_img, (5, 5), 0)
    # Canny 边缘检测
    edged_img = cv2.Canny(blur_img, low_threshold, high_threshold) # 边缘低阈值75和高阈值200
    cv2.imshow("edged_img",edged_img)
    # 定义膨胀操作的核
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
    # 对边缘检测后的图像进行膨胀操作
    dilated_img = cv2.dilate(edged_img, kernel, iterations=2)
    cv2.imshow("Dilated Image", dilated_img)
    # 轮廓检测
    contours, _ = cv2.findContours(edged_img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    sorted_contours = sorted(contours, key=cv2.contourArea, reverse=True)[:5]
    i = 0
    screenCnt = []  # 在循环开始前初始化screenCnt
    output = []
    for c in sorted_contours:
        # 计算轮廓周长
        peri = cv2.arcLength(c, True)
        # 近似轮廓  0.02*peri为近似精度
        approx = cv2.approxPolyDP(c, 0.02*peri, True)
        # 如果近似轮廓有四个顶点，则认为找到了数字边缘
        if len(approx)==4 and cv2.contourArea(approx) > 10000 and cv2.contourArea(approx) < 45000:
            #num_detect.shrink_approx(approx, 3)
            screenCnt.append(approx)
            i += 1
            if i == 2:
                break

    # 根据检测到的两个数字的x左坐标来判断左右并排序
    if len(screenCnt) == 2:
        num_detect.left_right_sort(screenCnt)
        # print(f"左:{screenCnt[0]}, 右:{screenCnt[1]}")    
    # 在尝试展示轮廓之前检查screenCnt是否已定义
    if screenCnt != []:
        cv2.drawContours(frame, screenCnt, -1, (255, 255, 0), 2)
        postion_count = 0
        warped_list = []
        thresh_list = []
        for c in screenCnt:    
            # 检查轮廓c是否有点（即是否有效）
            if c.size == 0:
                print("找到一个空的轮廓，跳过。")
                continue  # 跳过当前轮廓，继续下一个轮廓
            # 透视变换
            warped_list.append(num_detect.four_point_transform(orig_frame, c.reshape(4, 2) * ratio))
            warped_list[postion_count] = cv2.cvtColor(warped_list[postion_count], cv2.COLOR_BGR2GRAY)
            thresh_list.append(cv2.threshold(warped_list[postion_count], 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1])           
            # 显示透视变换结果
            cv2.imshow(f"Thresh_{postion_count}", thresh_list[postion_count])

ROI区域

首先将图像预处理得到的二值化图像进行重排，变成（150，100）的图像，再进行高斯模糊和腐蚀操作滤除噪点，边缘检测再次提取矩形框边缘，轮廓检测提取矩形框轮廓，最后将整个图像按比例内截去除矩形框，在重排成（100，150）的ROI区域送给下步模板匹配。

框定ROI区域在做边缘检测结果

去除矩形框之后的ROI区域

代码实现

if roi.size > 0:  # 检查roi是否为空
    roi = cv2.resize(thresh_list[postion_count], (100, 150))
    # 定义结构元素
    kernel = np.ones((5,5), np.uint8)
    roi_blur = cv2.GaussianBlur(roi, (7, 7), 0)
    roi_edge = cv2.Canny(roi_blur, 80, 200)
    roi_eroded = cv2.erode(roi_edge, kernel, iterations=2)
    # 找到边缘的轮廓
    roi_contours, _ = cv2.findContours(roi_edge, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    for cnt in roi_contours:
        (x,y,w,h) = cv2.boundingRect(roi_edge)
    cv2.rectangle(roi, (x, y), (x+w, y+h), (0, 255, 0), 2)
    cv2.imshow("roi_edge",roi_edge)  
    roi = cv2.resize(roi[y:y+h, x:x+w],(100, 150))
    roi = cv2.resize(roi[0+15:150-15, 0+12:100-12],(100, 150))
    cv2.imshow(f"ROI{postion_count}", roi)
else:
    print("ROI为空，跳过调整大小。")
postion_count += 1

模板匹配

将字典里的数字与模板图像取出与识别图像的ROI区域进行模板匹配，找到最适合的数字，再进行个赋值。

模板匹配结果

代码实现

# 初始化模板匹配
scores = []
for (digit, digitROI) in digit_dict.items():
    result = cv2.matchTemplate(roi, digitROI, cv2.TM_CCOEFF) # 模板匹配
    (_, score, _, _) = cv2.minMaxLoc(result)
    scores.append(score)
trust = False
# 找到最适合的数字，从scores中找到最大值的索引 + 1     1~8
for score in scores:
    if score > 0.8:
        trust = True      
if trust:
    output.append(str(scores.index(max(scores)) + 1))

下位机通信

通过串口去传输数据，将左右数字传给下位机MCU进行进一步的控制。

代码实现


if output != []:# 如果识别结果不为空
    if to_detect_num is not None: # 如果这不是第一次检测
        for i in range(len(output)):
            cv2.putText(frame, output[i], (screenCnt[i][0][0][0], screenCnt[i][0][0][1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
        if len(output) == 2:
            print(f"左：{output[0]}，右：{output[1]}")
            if(hardware.UART_read(ser)==b'O'):
                print(f"发送{output[0]}、{output[1]}成功")
                hardware.UART_write(ser, f'({output[0]}{output[1]})\r\n'.encode())
    else:
        while(hardware.UART_read(ser)==b'O'):
            hardware.UART_write(ser, f'({output[0]}{output[0]})\r\n'.encode())
            print(f"发送{output[0]}成功")
            to_detect_num = int(output[0])
            print(f"需要识别的数字为 {output[0]}")
            print("等待下位机OK")
            start_wait_OK = time.time()
            if(((start_wait_OK - time.time())*10)%10 > 1):
                start_wait_OK = time.time()
                print("等待超时")
                hardware.UART_write(ser, f'({output[0]}{output[0]})\r\n'.encode())
                print(f"发送{output[0]}成功")