Python Ocr识别基于CnOcr模型的Ocr识别系统教程

XJB-Lanxi

已于 2025-03-06 10:28:11 修改

阅读量439

点赞数 6

分类专栏： python养成日记文章标签： ocr python

于 2025-03-06 10:18:57 首次发布

本文链接：https://blog.csdn.net/xjbsw0/article/details/146060047

版权

python养成日记专栏收录该内容

4 篇文章

订阅专栏

前言

目前可用用于Ocr识别的模型框架非常多，各有利弊。今天博主选用python结合CnOcr模型来做一款OCR识别系统。给大家做参考。

工具

编程语言：python 3.9

识别模型：CnOcr（优点：该模型安装比较简单，识别准确率比较高）

安装及测试

1、安装CnOcr库。

# 安装 CNOCR 和依赖（确保已安装 PyTorch）
pip install cnocr

2、写代码。

from cnocr import CnOcr

# 初始化模型（显式指定检测和识别模型）
ocr = CnOcr(
    det_model_name='en_PP-OCRv3_det',  # 英文检测模型
    rec_model_name='en_PP-OCRv3'       # 英文识别模型
)

# 使用方式与默认模型一致
results = ocr.ocr('test.jpg')
print([res['text'] for res in results])

运行代码，查看结果。图片上文字准确识别。测试成功

注：首次运行需要联网，运行过程中会自动下载模型

进阶版需求：

1、增加一个GUI界面，手动导入图片后识别图片上的文字

2、识别结果装填在表格中展示在GUI界面上

3、识别结果保存到excel表格中

直接上完整代码：

"""
OCR 终极版 - 含网络配置、日志监控、时间戳和智能排序
环境要求：Python 3.7+，需安装以下库：
pip install cnocr pandas pillow openpyxl
"""

import os
import tkinter as tk
from tkinter import ttk, filedialog, messagebox, scrolledtext
from PIL import Image, ImageTk
from cnocr import CnOcr
import time
import random
import socket
import threading
import pandas as pd
import json

class OCRSystemPro:
    def __init__(self):
        # 初始化配置
        self.config_file = "ocr_config.json"
        self.default_config = {
            "udp_ip": "0.0.0.0",
            "udp_port": 12345,
            "auto_save": True,
            "max_log_lines": 200
        }
        self.load_config()
        
        # OCR初始化
        self.ocr = CnOcr(
            det_model_name='en_PP-OCRv3_det',
            rec_model_name='en_PP-OCRv3'
        )
        self.current_image = None
        self.excel_file = "ocr_result.xlsx"
        
        # GUI初始化
        self.window = tk.Tk()
        self.window.title("智能OCR系统 Pro")
        self.window.geometry("1200x800")
        self.setup_ui()
        
        # 网络线程
        self.udp_running = False
        self.udp_thread = None
        
        # 图片显示相关
        self.canvas_image = None  # 保持图片引用
        
        # 初始化数据存储
        self.init_excel()
        self.log("系统初始化完成", "INFO")

    def setup_ui(self):
        """创建高级界面布局"""
        style = ttk.Style()
        style.theme_use('clam')
        
        # 顶部控制区
        control_frame = ttk.Frame(self.window, padding=10)
        control_frame.pack(fill=tk.X)
        
        # 网络配置按钮
        self.btn_network = ttk.Button(
            control_frame, text="网络配置", 
            command=self.show_network_config
        )
        self.btn_network.pack(side=tk.LEFT, padx=5)
        
        # 模式选择
        self.mode_var = tk.StringVar(value="manual")
        ttk.Label(control_frame, text="模式:").pack(side=tk.LEFT, padx=5)
        ttk.Radiobutton(control_frame, text="手动", variable=self.mode_var,
                       value="manual", command=self.mode_changed).pack(side=tk.LEFT)
        ttk.Radiobutton(control_frame, text="自动", variable=self.mode_var,
                       value="auto", command=self.mode_changed).pack(side=tk.LEFT)
        
        # 操作按钮
        btn_frame = ttk.Frame(control_frame)
        btn_frame.pack(side=tk.RIGHT)
        self.btn_load = ttk.Button(btn_frame, text="导入图片", command=self.load_image)
        self.btn_load.pack(side=tk.LEFT, padx=5)
        self.btn_recognize = ttk.Button(btn_frame, text="开始识别", 
                                      command=self.start_ocr, state=tk.DISABLED)
        self.btn_recognize.pack(side=tk.LEFT)
        
        # 固定尺寸图片预览区
        img_frame = ttk.LabelFrame(self.window, text="图片预览 (800x500)", padding=10)
        img_frame.pack(pady=10, fill=tk.BOTH, expand=True)
        
        self.canvas = tk.Canvas(img_frame, width=800, height=500, bg='#F0F0F0')
        self.canvas.pack()
        
        # 结果表格区
        result_frame = ttk.LabelFrame(self.window, text="识别结果", padding=10)
        result_frame.pack(fill=tk.BOTH, expand=True)
        
        columns = ("id", "time", "filename", "result")
        self.result_table = ttk.Treeview(
            result_frame, columns=columns,
            show='headings', height=8
        )
        
        # 表格配置
        self.result_table.heading("id", text="唯一ID", anchor='w')
        self.result_table.heading("time", text="识别时间", anchor='center')
        self.result_table.heading("filename", text="文件名", anchor='w')
        self.result_table.heading("result", text="识别内容", anchor='w')
        
        self.result_table.column("id", width=180, stretch=False)
        self.result_table.column("time", width=160, stretch=False)
        self.result_table.column("filename", width=200, stretch=False)
        self.result_table.column("result", width=600)
        
        vsb = ttk.Scrollbar(result_frame, orient="vertical", command=self.result_table.yview)
        hsb = ttk.Scrollbar(result_frame, orient="horizontal", command=self.result_table.xview)
        self.result_table.configure(yscrollcommand=vsb.set, xscrollcommand=hsb.set)
        
        self.result_table.grid(row=0, column=0, sticky="nsew")
        vsb.grid(row=0, column=1, sticky="ns")
        hsb.grid(row=1, column=0, sticky="ew")
        result_frame.grid_rowconfigure(0, weight=1)
        result_frame.grid_columnconfigure(0, weight=1)
        
        # 日志区
        log_frame = ttk.LabelFrame(self.window, text="系统日志", padding=10)
        log_frame.pack(fill=tk.BOTH, expand=False, pady=5)
        self.log_area = scrolledtext.ScrolledText(
            log_frame, height=6, wrap=tk.WORD,
            font=('Consolas', 9)
        )
        self.log_area.pack(fill=tk.BOTH, expand=True)

    def load_config(self):
        """加载配置文件"""
        try:
            if os.path.exists(self.config_file):
                with open(self.config_file, 'r') as f:
                    self.config = {**self.default_config, **json.load(f)}
            else:
                self.config = self.default_config
        except:
            self.config = self.default_config

    def save_config(self):
        """保存配置"""
        with open(self.config_file, 'w') as f:
            json.dump(self.config, f, indent=2)

    def log(self, message, level="INFO"):
        """记录日志"""
        timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
        tag = f"[{timestamp}][{level}]"
        
        color_map = {
            "INFO": "black",
            "WARNING": "orange",
            "ERROR": "red"
        }
        
        self.log_area.configure(state='normal')
        self.log_area.insert(tk.END, f"{tag} {message}\n", level)
        self.log_area.tag_config(level, foreground=color_map.get(level, "black"))
        
        lines = int(self.log_area.index('end-1c').split('.')[0])
        if lines > self.config['max_log_lines']:
            self.log_area.delete(1.0, f"{lines - self.config['max_log_lines']}.0")
        
        self.log_area.see(tk.END)
        self.log_area.configure(state='disabled')

    def show_network_config(self):
        """显示网络配置窗口"""
        config_win = tk.Toplevel(self.window)
        config_win.title("网络配置")
        
        ttk.Label(config_win, text="IP地址:").grid(row=0, column=0, padx=5, pady=5, sticky='e')
        ip_entry = ttk.Entry(config_win, width=15)
        ip_entry.insert(0, self.config['udp_ip'])
        ip_entry.grid(row=0, column=1, padx=5, pady=5)
        
        ttk.Label(config_win, text="UDP端口:").grid(row=1, column=0, padx=5, pady=5, sticky='e')
        port_entry = ttk.Entry(config_win, width=8)
        port_entry.insert(0, str(self.config['udp_port']))
        port_entry.grid(row=1, column=1, padx=5, pady=5)
        
        def save_config():
            try:
                new_ip = ip_entry.get()
                socket.inet_pton(socket.AF_INET, new_ip)
                
                new_port = int(port_entry.get())
                if 1024 <= new_port <= 65535:
                    self.config['udp_ip'] = new_ip
                    self.config['udp_port'] = new_port
                    self.save_config()
                    self.log(f"更新网络配置: IP->{new_ip} 端口->{new_port}", "INFO")
                    config_win.destroy()
                    self.restart_udp_listener()
                else:
                    messagebox.showerror("错误", "端口号必须介于1024-65535之间")
            except socket.error:
                messagebox.showerror("错误", "无效的IP地址格式")
            except:
                messagebox.showerror("错误", "无效的端口号")
        
        ttk.Button(config_win, text="保存", command=save_config).grid(row=2, columnspan=2, pady=10)

    def load_image(self):
        """加载图片并保持固定预览尺寸"""
        path = filedialog.askopenfilename(filetypes=[("图片文件", "*.png *.jpg *.jpeg")])
        if path:
            try:
                self.current_image = path
                img = Image.open(path)
                
                # 保持宽高比缩放
                width, height = img.size
                ratio = min(800/width, 500/height)
                new_size = (int(width*ratio), int(height*ratio))
                img = img.resize(new_size, Image.Resampling.LANCZOS)
                
                # 居中显示
                self.canvas.delete("preview")
                photo = ImageTk.PhotoImage(img)
                self.canvas_image = photo  # 保持引用
                x = (800 - new_size[0]) // 2
                y = (500 - new_size[1]) // 2
                self.canvas.create_image(x, y, anchor='nw', image=photo, tags="preview")
                
                self.btn_recognize.config(state=tk.NORMAL)
                self.log(f"成功加载图片: {os.path.basename(path)}", "INFO")
            except Exception as e:
                messagebox.showerror("错误", f"图片加载失败: {str(e)}")
                self.log(f"图片加载失败: {str(e)}", "ERROR")

    def init_excel(self):
        """初始化Excel文件"""
        if not os.path.exists(self.excel_file):
            pd.DataFrame(columns=["id", "time", "filename", "result"]).to_excel(
                self.excel_file, index=False)
            self.log("创建新的Excel文件", "INFO")

    def save_to_excel(self, data):
        """追加数据到Excel"""
        try:
            if not os.path.exists(self.excel_file):
                self.init_excel()
                
            df = pd.read_excel(self.excel_file)
            df = pd.concat([df, pd.DataFrame([data])], ignore_index=True)
            df.to_excel(self.excel_file, index=False)
        except Exception as e:
            self.log(f"保存失败: {str(e)}", "ERROR")
            messagebox.showerror("错误", f"保存失败: {str(e)}")

    def generate_id(self):
        """生成15位唯一ID"""
        return f"{int(time.time()*1000)}{random.randint(100,999)}"

    def start_ocr(self):
        """执行识别操作"""
        if not self.current_image:
            return
        
        try:
            start_time = time.time()
            self.log(f"开始识别: {os.path.basename(self.current_image)}", "INFO")
            
            # OCR识别
            results = self.ocr.ocr(self.current_image)
            text = ' '.join([res['text'] for res in results])
            
            # 生成记录
            record = {
                "id": self.generate_id(),
                "time": time.strftime("%Y-%m-%d %H:%M:%S"),
                "filename": os.path.basename(self.current_image),
                "result": text
            }
            
            # 更新表格
            self.result_table.insert('', 0, values=(
                record["id"],
                record["time"],
                record["filename"],
                record["result"]
            ))
            
            # 保存到Excel
            if self.config['auto_save']:
                self.save_to_excel(record)
                self.log(f"结果已保存到 {self.excel_file}", "INFO")
            
            # 重置状态
            self.current_image = None
            self.canvas.delete("preview")
            self.btn_recognize.config(state=tk.DISABLED)
            
            cost_time = round(time.time() - start_time, 2)
            self.log(f"识别完成，耗时{cost_time}s", "INFO")
            
        except Exception as e:
            self.log(f"识别失败: {str(e)}", "ERROR")
            messagebox.showerror("错误", f"识别失败: {str(e)}")

    def restart_udp_listener(self):
        """重启UDP监听"""
        if self.mode_var.get() == "auto":
            self.udp_running = False
            if self.udp_thread and self.udp_thread.is_alive():
                self.udp_thread.join(1)
            self.start_udp_listener()

    def start_udp_listener(self):
        """启动UDP监听线程"""
        self.udp_running = True
        self.udp_thread = threading.Thread(target=self.udp_listener, daemon=True)
        self.udp_thread.start()

    def udp_listener(self):
        """UDP监听线程"""
        sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
        try:
            sock.bind((self.config['udp_ip'], self.config['udp_port']))
            self.log(f"启动UDP监听 {self.config['udp_ip']}:{self.config['udp_port']}", "INFO")
        except Exception as e:
            self.log(f"监听失败: {str(e)}", "ERROR")
            return
        
        while self.udp_running:
            try:
                data, addr = sock.recvfrom(1024)
                if data and data[0] == 0x01:
                    self.window.after(0, self.start_ocr)
                    self.log(f"收到触发信号 from {addr[0]}:{addr[1]}", "INFO")
            except:
                pass
        sock.close()

    def mode_changed(self):
        """模式切换处理"""
        if self.mode_var.get() == "auto":
            self.start_udp_listener()
            self.btn_recognize.config(state=tk.DISABLED)
        else:
            self.udp_running = False
            self.btn_recognize.config(state=tk.NORMAL if self.current_image else tk.DISABLED)

    def run(self):
        self.window.protocol("WM_DELETE_WINDOW", self.on_close)
        self.window.mainloop()

    def on_close(self):
        """关闭窗口时的清理"""
        self.udp_running = False
        if self.udp_thread and self.udp_thread.is_alive():
            self.udp_thread.join(1)
        self.window.destroy()

if __name__ == '__main__':
    app = OCRSystemPro()
    app.run()

运行代码进入软件界面：点击“导入图片 ”加载图片；点击“开始识别”进行OCR识别图片。