前言
仅用于个人python学习记录,百度有官方的模块
pip install baidu-aip
access_token获取
略
代码
import requests
import base64
import json
import logging
from enum import Enum, unique
import keyring
logging.basicConfig()
logging.getLogger().setLevel(logging.DEBUG)
@unique
class OcrType(Enum):
#标准版
STANDARD_BASIC = "https://aip.baidubce.com/rest/2.0/ocr/v1/general_basic"
#标准版含位置
STANDARD_WITH_LOCATION = "https://aip.baidubce.com/rest/2.0/ocr/v1/general"
#高精度版
ACCURATE_BASIC = "https://aip.baidubce.com/rest/2.0/ocr/v1/accurate_basic"
#高精度版含位置
ACCURATE_WITH_LOCATION = "https://aip.baidubce.com/rest/2.0/ocr/v1/accurate"
#办公文档识别
DOC_ANALYSiS_OFFICE = "https://aip.baidubce.com/rest/2.0/ocr/v1/doc_analysis_office"
#网络图片文字识别
WEB_IMAGE = "https://aip.baidubce.com/rest/2.0/ocr/v1/webimage"
#网络图片文字识别 含位置
WEB_IMAGE_WITH_LOCATION = "https://aip.baidubce.com/rest/2.0/ocr/v1/webimage_loc"
#手写文字识别
HAND_WRITING = "https://aip.baidubce.com/rest/2.0/ocr/v1/handwriting"
#数字识别
NUMBERS = "https://aip.baidubce.com/rest/2.0/ocr/v1/numbers"
#表格文字识别(同步接口)
FORM_SYNCH = "https://aip.baidubce.com/rest/2.0/ocr/v1/form"
#表格文字识别(异步接口)
FORM_ASYNCH = "https://aip.baidubce.com/rest/2.0/solution/v1/form_ocr/request"
#二维码识别
QRCORD = "https://aip.baidubce.com/rest/2.0/ocr/v1/qrcode"
class BaiduGeneralOcr():
#ocr版本
ocr_type = 0
def __init__(self,ocr_type:OcrType):
'''
ocr_type ocr识别类型 STANDARD_BASIC标准版 标准版含位置STANDARD_WITH_LOCATION 高精读版ACCURATE_BASIC 高精度版含位置ACCURATE_WITH_LOCATION
'''
self.ocr_type = ocr_type.value
def gen_access_token(self,API_Key,Secret_Key):
'''
生成access_token
API_Key API_KEY
Secret_Key Secret_Key
'''
requestUrl = f'''https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id={API_Key}&client_secret={Secret_Key}'''
response = requests.get(requestUrl)
if response:
self.access_token = response.json()['access_token']
def __check(self):
'''
参数检查
'''
image=self.image
url=self.url
pdf_file=self.pdf_file
if((image==None or image=="") and (url==None or url=="") and (pdf_file==None or pdf_file=="")):
raise ValueError("image,url,pdf_file至少传入一项")
if(image!=None and image!=""):
f = open(image, 'rb')
self.options['image'] = base64.b64encode(f.read()).decode()
elif((image==None or image=="") and (url!=None and url!="")):
self.options['url'] = url
else:
f = open(pdf_path, 'rb')
self.options['pdf_file'] = base64.b64encode(f.read()).decode()
keys = self.options.keys()
if(("image" not in keys) and ("url" not in keys) and ("pdf_file" not in keys)):
raise ValueError("image,url,pdf_file至少传入一项")
try:
self.ocr_type.value
except Exception as e:
self.ocr_type = OcrType.STANDARD_BASIC
logging.info("ocr_type类型,已重置为标准版")
def __request(self,request_url,data):
#设置header
headers = {'content-type': 'application/x-www-form-urlencoded'}
#请求
print(self.options)
print(headers)
return requests.post(request_url+"?access_token=%s" %self.access_token, data= self.options, headers=headers).json()
def recoginze(self,image:str=None,url:str=None,pdf_file:str=None,options=None):
'''
识别
'''
self.image=image
self.url=url
self.pdf_file=pdf_file
self.options = options or {}
self.__check()
#发送请求
return self.__request(self.ocr_type.value,options)
def set_access_token(self,access_token):
'''
设置access_token
access_token access_token
'''
self.access_token = access_token
def ocr_help(self):
print("==========百度ocr使用说明==========")
print("本API基于ocr通用识别api编写,官方文档地址:https://cloud.baidu.com/doc/OCR/s/zk3h7xz52")
print("使用示例:")
print('''
baiduOcr = BaiduGeneralOcr(OcrType.STANDARD_BASIC)
baiduOcr.set_access_token("access_token")
wordsList = baiduOcr.recoginze(image="D:\\txt1.png",options={})['words_result']
for word in wordsList:
print(word)
''')
baiduOcr = BaiduGeneralOcr(OcrType.STANDARD_BASIC)
baiduOcr.set_access_token(keyring.get_password("baidu_dev","access_token"))
response = baiduOcr.recoginze(image="D:\\txt1.png",options={})
wordsList = response['words_result']
for word in wordsList:
print(word)