[Python]利用百度AI的ocr实现屏幕内区域文字识别

整体思路

1、[截图](https://blog.csdn.net/weixin_44343083/article/details/112252207)
2、图片压缩(百度的ocr要求图片小于4M)
3、调用百度ocr的函数进行文字识别

代码要申请百度ocr接口才能实现
百度ocr接口申请教程

代码展示

写完后只是粗略地测试了一下,可能会有一点bug。如果有bug及代码修改方式希望能告知一下,谢谢!!!

from PIL import Image, ImageGrab
from aip import AipOcr
from tkinter import Tk, Button, Canvas
from ctypes import windll
from win32clipboard import OpenClipboard, EmptyClipboard, SetClipboardData, CloseClipboard
from win32con import CF_DIB,CF_UNICODETEXT
from io import BytesIO

def send_to_clipboard(clip_type, data):#将数据传入剪贴板
    OpenClipboard()
    EmptyClipboard()
    SetClipboardData(clip_type, data)
    CloseClipboard()

def photo_compression(im):#图片压缩
    width, height = im.size
    while width * height > 4000000: #该数值压缩后的图片大概 两百多k
        width = width // 2
        height = height // 2
    e_img = im.resize((width, height), Image.BILINEAR)
    return e_img

def ocr(im,choice):#使用百度OCR进行文字识别,支持jpg、jpeg、png、bmp格式
    APP_ID = '**********'
    API_KEY = '**********'
    SECRECT_KEY = '**********'
	#这三个自己去百度AI上申请
    client = AipOcr(APP_ID, API_KEY, SECRECT_KEY)
    output = BytesIO()
    im.convert("RGB").save(output, "BMP")
    data = output.getvalue()
    output.close()
    if choice == 1:
        result = client.basicGeneral(data)
    elif choice == 2:
        result = client.basicAccurate(data)
    out = ''
    #print(result.get('words_result'))
    for text in result.get('words_result'):
        out+=text.get('words')+'\n'
    #print(out)
    send_to_clipboard(CF_UNICODETEXT,out)


class CTkPrScrn:
    def __init__(self):
        self.start_x, self.start_y = 0, 0
        self.end_x, self.end_y = 0, 0
        self.on_x, self.on_y = 0, 0
        self.scale = 1

        self.WIN = Tk()
        self.WIN.attributes("-alpha", 0.5)  # 设置窗口半透明
        self.WIN.attributes("-fullscreen", True)  # 设置全屏
        self.WIN.attributes("-topmost", True)  # 设置窗口在最上层

        self.width, self.height = self.WIN.winfo_screenwidth(), self.WIN.winfo_screenheight()

        # 创建画布
        self.canvas = Canvas(self.WIN, width=self.width, height=self.height, bg="gray")

        self.WIN.bind('<Button-1>', self.xFunc1)  # 绑定鼠标左键点击事件
        self.WIN.bind('<ButtonRelease-1>', self.xFunc2)  # 绑定鼠标左键点击释放事件
        self.WIN.bind('<B1-Motion>', self.xFunc3)  # 绑定鼠标左键点击移动事件
        self.WIN.bind('<Escape>', lambda e: self.WIN.destroy())  # 绑定Esc按键退出事件

        self.button_yes = Button(self.WIN, text='√', fg='green',command=self.event_yes)#确认截图按钮
        self.button_no = Button(self.WIN, text='×', fg='red',command=self.event_no)#重新截图按钮
        #self.button_yes.bind('<Button-1>',self.screenGrab)
        #self.button_no.bind('<Button-1>',self.canvas.delete("prscrn"))

        user32 = windll.user32
        gdi32 = windll.gdi32
        dc = user32.GetDC(None)
        widthScale = gdi32.GetDeviceCaps(dc, 8)  # 分辨率缩放后的宽度
        heightScale = gdi32.GetDeviceCaps(dc, 10)  # 分辨率缩放后的高度
        width = gdi32.GetDeviceCaps(dc, 118)  # 原始分辨率的宽度
        height = gdi32.GetDeviceCaps(dc, 117)  # 原始分辨率的高度
        self.scale = width / widthScale
        #print(self.width, self.height, widthScale, heightScale, width, height, self.scale)

        self.WIN.mainloop()  # 窗口持久化

    def xFunc1(self, event):  # 鼠标左键按下
        # print(f"鼠标左键点击了一次坐标是:x={g_scale * event.x}, y={g_scale * event.y}")
        # print(event.state)
        self.on_x, self.on_y = event.x, event.y

    def xFunc2(self, event):  # 鼠标左键释放
        if event.x == self.start_x or event.y == self.start_y:
            return
        else:
            self.end_x, self.end_y = event.x, event.y
            self.button_yes.pack()
            self.button_no.pack()
            # print(self.end_y,self.height)
            if self.end_y >= self.height - 100:
                self.button_yes.place(x=self.end_x - 25, y=self.end_y - 35)
                self.button_no.place(x=self.end_x - 50, y=self.end_y - 35)
            else:
                self.button_yes.place(x=self.end_x - 20, y=self.end_y + 5)
                self.button_no.place(x=self.end_x - 45, y=self.end_y + 5)

    def xFunc3(self, event):
        # print(f"鼠标左键点击了一次坐标是:x={self.scale * event.x}, y={self.scale * event.y}")
        self.start_x, self.start_y = self.on_x, self.on_y
        if event.x == self.start_x or event.y == self.start_y:
            return
        self.canvas.delete("prscrn")
        self.canvas.create_rectangle(self.start_x, self.start_y, event.x, event.y,
                                     fill='white', outline='red', tag="prscrn")
        # 包装画布
        self.canvas.pack()

    def event_yes(self):
        self.WIN.update()
        self.WIN.destroy()
        im = ImageGrab.grab((self.scale * self.start_x, self.scale * self.start_y,
                             self.scale * self.end_x, self.scale * self.end_y))
        #print(self.__start_x,self.__end_x)
        #im.show()
        ocr(photo_compression(im),1)

    def event_no(self):
        self.canvas.delete("prscrn")
        self.button_yes.place_forget()
        self.button_no.place_forget()
        self.button_yes.pack_forget()
        self.button_no.pack_forget()


if __name__ == '__main__':
    prScrn = CTkPrScrn()
  • 1
    点赞
  • 6
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值