使用python制作的OCR工具，适用截图识别、单图识别、批量识别

肖维滔

已于 2023-04-14 10:22:11 修改

阅读量1k

点赞数

文章标签： python opencv 计算机视觉

于 2023-04-13 21:46:30 首次发布

本文链接：https://blog.csdn.net/weixin_61206738/article/details/130139779

版权

1、使用tkinter制作GUI界面

2、使用PIL获取截图图片

3、使用opencv对图像进行缩放，高斯滤波以及二值化处理

4、使用pytesseract引擎进行OCR文字识别

具体的代码如下：

#导入相关的库

import cv2 as cv

import numpy as np

import pytesseract

from PIL import ImageGrab

import tkinter as tk

import os

from tkinter import filedialog

#定义以下方法

#图像识别方法

def imageact(src,lang='chi_sim'):

'''

src:灰度图片

lang:指定ocr引擎的识别方式

使用高斯滤波和直方图均衡化CLAHE算法处理图像

使用自适应阈值进行图像二值化

返回两个值，一个是提取的文字，一个是处理后的图像

'''

#5*5的高斯滤波

src = cv.GaussianBlur(src, (5, 5), 0)

#简单阈值，获得二值化图形

_,src=cv.threshold(src,180,255,cv.THRESH_BINARY)

# 文字提取

text = pytesseract.image_to_string(src, lang=lang)

return text,src

#屏幕截图方法

def screengrab():

#定义三个全局变量，f1：GUI窗口对象,ck：GUI画布对象,color：颜色变量

global f1,ck,color

#将根窗口最小化

root.state('icon')

#并在窗口顶层生成一个新的子窗口

f1=tk.Toplevel(root)

#设置子窗口的透明度，使这个子窗口呈现玻璃效果

f1.wm_attributes('-alpha',0.6)

#将窗口设置为无边框

f1.overrideredirect(True)

#获取屏幕长和宽

ws=f1.winfo_screenwidth()

hs=f1.winfo_screenheight()

#使新建的f1窗口和当前屏幕窗口一样大，使得当前窗口好像被雾遮住

s=str(ws)+'x'+str(hs)+'+0+0'

f1.geometry(s)

color='gray'

#设置灰色为窗口透明颜色

f1.wm_attributes('-transparentcolor', color)

#在f1窗口上生成一个画布对象，用于后续截图获取矩形区域

ck=tk.Canvas(f1)

ck.pack(fill=tk.BOTH,expand=tk.Y)

#绑定鼠标左键按下事件，为在Toplevel窗体上拖动鼠标画矩形做准备

ck.bind("<ButtonPress-1>",StartMove)

#绑定鼠标左键松开事件

ck.bind("<ButtonRelease-1>",StopMove)

#绑定鼠标左键被按下时移动鼠标事件

ck.bind("<B1-Motion>", OnMotion)

return

#定义按下鼠标左键时的方法

def StartMove(event):

#定义4个全局变量，x1,y1:用来记录当前鼠标在屏幕（不是画布所在窗口）的位置， first_x,first_y用来记录当前鼠标的位置

global x1,y1,first_x,first_y

first_x=event.x

first_y=event.y

#鼠标在屏幕中的位置等于鼠标在f1窗口中的位置+f1窗口左上角在屏幕中的位置

x1,y1=first_x+f1.winfo_rootx(),first_y+f1.winfo_rooty()

#绘制一个初始的矩形

ck.create_rectangle(first_x,first_y,event.x+1,event.y+1,fill=color, outline=color,tags=('L'))

return

#定义鼠标移动时的方法

def OnMotion(event):

global first_x,first_y,ck

#当鼠标移动时，矩阵随鼠标移动而变化

ck.coords('L',first_x,first_y,event.x,event.y)

return

#定义左键松开时的方法

def StopMove(event):

global x2,y2,im

#记录鼠标松开时，此时的坐标

x2,y2=event.x+f1.winfo_rootx(),event.y+f1.winfo_rooty()

#使用ImageGrab模块将矩形起始坐标x1,y1和终点坐标x2,y2围成的区域转化为图形对象

im = ImageGrab.grab((x1,y1,x2,y2))

#将图形对象转化为灰度值的图形数组

img=np.array(im.convert('L'))

#将图形像素提高5倍，提高OCR的识别率

img=cv.resize(img,None,fx=5,fy=5)

#调用图形OCR识别方法

text,src=imageact(img)

#识别完成后，将f1窗口关闭,主窗口取消最小化

f1.destroy()

root.state('normal')

return

#定义单图提取的方法

def oneocr():

#读取单个图片文件

base_path=filedialog.askopenfilename()

if base_path.split('.')[-1] in ['jpg','JPG','png','PNG']:

img=cv.imread(base_path,cv.COLOR_BGR2GRAY)

text=imageact(img)

print(text)

return

#最后定义一个批量识别图片文件夹的方法

def pi_screengrab():

#使用tkinter中的filedialog.askdirectory()方法打开一个文件夹

path_dir=filedialog.askdirectory()

if os.path.isdir(path_dir):

#打开一个记事本，用于保存图像识别得到的文字

f=open(r'D:\desktop/result.txt','w',encoding='utf-8')

for n,base_path in enumerate(os.listdir(path_dir)):

if base_path.split('.')[-1] in ['jpg','png','JPG','PNG']:

src_path=os.path.join(path_dir,base_path)

#将图片读取为灰度图

img=cv.imread(src_path,cv.COLOR_BGR2GRAY)

text,=imageact(img)

f.write(f'第{n}张图片:\n')

f.write(text)

f.write('\n***********************\n')

f.close()

return

path_dir=r''

root=tk.Tk()

#框架

frm=tk.Frame(root)

frm.pack(fill=tk.BOTH)

#截图按钮

tk.Button(frm,text='定位截图',command=screengrab).pack(side='left')

#从文件夹批量提取按钮

tk.Button(frm,text='批量提取',command=pi_screengrab).pack(side='left')

#识别单个图片

tk.Button(frm,text='单图提取',command=oneocr).pack(side='left')

root.mainloop()

以上就是所有的代码

肖维滔

关注

0
点赞
踩
2

收藏

觉得还不错? 一键收藏
0
评论
使用python制作的OCR工具，适用截图识别、单图识别、批量识别

使用python tkinter，opencv，pytesseract实现截图文字识别、单图文字识别和图片批量识别
复制链接

扫一扫