python根据截图坐标识别文字

以学为干。

于 2024-10-03 17:39:47 发布

阅读量419

点赞数 3

文章标签： python 开发语言计算机视觉

本文链接：https://blog.csdn.net/qq_41644139/article/details/142693989

版权

import ddddocr
import pyautogui
import io


def capture_image(left, top, width, height):
    # 截取屏幕图像并返回图像对象
    return pyautogui.screenshot(region=(left, top, width, height))


def ocr_text(left, top, width, height, expected_text):
    # 截取图片并识别文字
    image = capture_image(left, top, width, height)

    # 使用BytesIO将图像保存为字节流
    img_byte_arr = io.BytesIO()
    image.save(img_byte_arr, format='PNG')  # 保存为PNG格式
    img_byte_arr.seek(0)  # 重置流位置

    ocr = ddddocr.DdddOcr()

    # 进行OCR识别
    result = ocr.classification(img_byte_arr.read())
    print(f'自动识别为：{result}')

    # 返回识别结果与预期文本的比较
    return result == expected_text


if __name__ == '__main__':
    # 已知的右下角坐标和矩形的宽高
    bottom_right_x = 59
    bottom_right_y = 115
    width = 59
    height = 29

    # 计算左上角坐标
    top_left_x = bottom_right_x - width
    top_left_y = bottom_right_y - height

    # 打印结果
    print("Bounding Rectangle:")
    print("左上角坐标: ({}, {})".format(top_left_x, top_left_y))
    print("右下角坐标: ({}, {})".format(bottom_right_x, bottom_right_y))

    # 使用 PIL 显示截取的图像
    screenshot = capture_image(top_left_x, top_left_y, width, height)
    # screenshot.save("screenshot.png")  # 保存截取的图像
    screenshot.show()  # 显示截取的图像

    # 调用OCR函数进行识别
    result = ocr_text(top_left_x, top_left_y, width, height, 'aa')
    print(f'识别是否成功：{result}')