pytesseract文字识别,提高准确率的方法

鱼儿丨

已于 2024-02-03 14:17:27 修改

阅读量5.8k

点赞数 2

分类专栏： python工具文章标签：计算机视觉 opencv python

于 2023-03-09 20:13:30 首次发布

本文链接：https://blog.csdn.net/starlit_night/article/details/129430259

版权

python工具专栏收录该内容

4 篇文章

订阅专栏

该代码实现了一个Python类，用于识别图像中的文本。它首先通过欧几里得距离计算对图像进行颜色对比，转化为白底黑字，然后使用pytesseract库进行文字识别，主要针对数字进行识别。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >


import math ,pytesseract ,cv2
from PIL import Image


class identifyText:

    def __init__(self) -> None:
        # 定义相似颜色的阈值，5~200之间为最佳值，5~500为有效值
        self.threshold = 100

        img_path = r'screen\screen.png'
        new_img_path = r'screen\new_screen.png'

        if self.transformedImage(img_path ,new_img_path):
            print(self.characterRecognition(new_img_path))


    # 计算两个颜色之间的欧几里得距离
    def color_distance(self ,c1 ,c2):
        # 如果图片没有透明通道则不需要传入和计算a通道
        r1, g1, b1, a1 = c1
        r2, g2, b2, a2 = c2
        return math.sqrt((r1-r2)**2 + (g1-g2)**2 + (b1-b2)**2 + (a1-a2)**2)

    # 转化图像为白底黑字，以提高识别准确性
    def transformedImage(self ,img_path ,new_img_path):

        # 打开原图
        img = Image.open(img_path)
        # 创建一个白色的背景图像
        new_img = Image.new('RGBA', img.size, (255, 255, 255, 255))

        # 遍历所有像素点
        for x in range(img.width):
            for y in range(img.height):
                # 获取当前像素点的颜色
                color = img.getpixel((x, y))
                # 如果原图当前坐标颜色与给定颜色相似，则在背景图中相同的坐标写入黑色像素点
                if self.color_distance(color, (247, 245, 244, 255)) < self.threshold:
                    new_img.putpixel((x, y), (0,0,0,255))

        # 保存新图像
        new_img.save(new_img_path)
        return True

    # 文字识别
    def characterRecognition(self ,new_img_path):
        # 感觉好像没有必要进行灰度和二值化处理了，白底黑字的准确性挺高的，代码留这，你们自己看着整

            # 读取新图像
            # img = cv2.imread(new_img_path)
            # # 将图片转换为灰度图像
            # gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            # # 对图像进行二值化处理
            # thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
            # config = "--psm 7 --oem 3 -c tessedit_char_whitelist=0123456789"
            # text = pytesseract.image_to_string(thresh, config=config)

        # 读取新图像
        img = cv2.imread(new_img_path)
        # 进行文字识别
        # --psm 7 单行识别 , --oem 3 使用 LSTM OCR 引擎 , -c tessedit_char_whitelist=0123456789 只识别数字字符
        config = "--psm 7 --oem 3 -c tessedit_char_whitelist=0123456789"
        text = pytesseract.image_to_string(img, config=config)

        # 防止识别不到报错
        try:
            # 去除其他符号，对数字进行重新整合
            return int(''.join(filter(str.isdigit, text)))
        except Exception:
            return '未能识别文字'

if __name__ == "__main__":
    identifyText()