Paddleocr的部署与增加判定条件

我是碳酸会冒泡i

已于 2024-05-30 16:22:53 修改

阅读量633

点赞数 11

分类专栏： python 文章标签： python 算法

于 2024-05-30 16:20:42 首次发布

本文链接：https://blog.csdn.net/diyuxiaoguaishou/article/details/139326509

版权

python 专栏收录该内容

4 篇文章 0 订阅

订阅专栏

文章目录

前言
一、Paddleocr是什么？
二、使用步骤
- 1.引入库
- 2.代码主体部分
总结

前言

自己和同学一起看了看paddleocr的相关代码，在基础上增加了几个识别的条件，于是分享出来。
在这里插入图片描述

一、Paddleocr是什么？

paddleocr就是文字识别，可以识别多种语言文字。其中添加了文字占比，文字块数，字符总数，是否为长文本和大字体的判定
（长文本：总文本像素占图像总像素的百分比>20%且字符总数>300；大字体：如果单个文本块的宽度除以字符数>100，或者单个文本块的高度除以字符数>100）

这个是GitHub原网址，大家可以去这里看看

https://github.com/PaddlePaddle/PaddleOCR/blob/main/doc/doc_ch/quickstart.md

二、使用步骤

1.引入库

代码如下（示例）：

import argparse
import json
from PIL import Image
from paddleocr import PaddleOCR
import os

2.代码主体部分

代码如下（示例）：

class AdvancedImageClassifier:
    def perform_ocr_on_image(self, image_path):
        try:
            ocr = PaddleOCR(use_angle_cls=True, lang="ch", show_log=False)
            result = ocr.ocr(image_path, cls=True)
            img = Image.open(image_path)
            img_width, img_height = img.size
            img_pixels = img_width * img_height
            total_text_pixels = 0
            text_blocks_count = 0
            total_characters_count = 0
            has_large_fonts = False

            for res in result:
                if res is None:
                    continue  # 如果识别结果为空，跳过这个结果
                text_blocks_count += len(res)
                for line in res:
                    box = line[0]
                    x_min, y_min = box[0]
                    x_max, y_max = box[2]
                    text_width = x_max - x_min
                    text_height = y_max - y_min
                    text_pixels = text_width * text_height
                    if line[1][1] >= 0.7:
                        total_text_pixels += text_pixels
                        total_characters_count += len(line[1][0])
                        if text_width / len(line[1][0]) > 100 or text_height / len(line[1][0]) > 100:
                            has_large_fonts = True

            text_percentage = (total_text_pixels / img_pixels) * 100

            output = {
                "image_path": image_path,
                "font_analysis": {
                    "labelName": "字体",
                    "labelValueList": [
                        {"labelName": "文字占比", "labelValue": int(text_percentage * 100)},
                        {"labelName": "文字块数", "labelValue": text_blocks_count},
                        {"labelName": "字符总数", "labelValue": total_characters_count},
                        {"labelName": "长文本", "labelValue": text_percentage > 20 and total_characters_count > 300},
                        {"labelName": "大字体", "labelValue": has_large_fonts}
                    ]
                }
            }
            return output
        except Exception as e:
            print(f"Error processing image {image_path}: {e}")
            return None

def parse_opt():
    parser = argparse.ArgumentParser()
    parser.add_argument('--image_folders', type=str, nargs='+', default=[
        '输入文件夹位置',
        ], help='要分析的根图片文件夹路径列表')
    parser.add_argument('--output_folder', type=str, default='输出文件夹位置', help='结果保存文件夹')
    return parser.parse_args()

def main(opt):
    classifier = AdvancedImageClassifier()
    processed_images = 0

    if not os.path.exists(opt.output_folder):
        os.makedirs(opt.output_folder)

    for image_folder in opt.image_folders:
        for root, _, files in os.walk(image_folder):
            for file in files:
                if file.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif', '.webp')):
                    processed_images += 1
                    image_path = os.path.join(root, file)
                    print(f"Processing image {processed_images}: {file}")
                    text_data = classifier.perform_ocr_on_image(image_path)

                    if text_data and text_data['font_analysis']['labelValueList'] and any(val['labelValue'] > 0 for val in text_data['font_analysis']['labelValueList']):
                        output_file = os.path.join(opt.output_folder, f"{os.path.splitext(file)[0]}.text.json")
                        try:
                            with open(output_file, 'w', encoding='utf-8') as f:
                                json.dump(text_data, f, indent=2, ensure_ascii=False)
                            print(f"Result saved to {output_file}")
                        except Exception as e:
                            print(f"Error saving result to {output_file}: {e}")
                    else:
                        print(f"No text found in image: {file}")

if __name__ == '__main__':
    opt = parse_opt()
    main(opt)