有道的OCR,怎么说呢。我不知道是我写的有问题还是咋的,返回过来的是编码后的str,其他接口传回来的基本都把坐标啊识别文字啊什么的用list放好。我也不知道该咋弄,就直接解码再观察一下str的样式,用split给分开。
# -*- coding: utf-8 -*-
import sys
import uuid
import requests
import base64
import hashlib
import time
YOUDAO_URL = 'https://openapi.youdao.com/ocrapi'
APP_KEY = 'xxxxxxxxxxxx'#自己替换喽
APP_SECRET = 'xxxxxxxxxxx'
#这玩意要钱的,注册的50元优惠很快就莫的了
def truncate(q):
if q is None:
return None
size = len(q)
return q if size <= 20 else q[0:10] + str(size) + q[size - 10:size]
def encrypt(signStr):
hash_algorithm = hashlib.sha256()
hash_algorithm.update(signStr.encode('utf-8'))
return hash_algorithm.hexdigest()
def do_request(data):
headers = {'Content-Type': 'application/x-www-form-urlencoded'}
return requests.post(YOUDAO_URL, data=data, headers=headers)
def connect(path_img):
name=path_img.split('/')[-1].split('.')[0]
path_save='youdao_OCR'
if not os.path.exists(path_save):
os.mkdir(path_save)
path_save=path_save+'/'+name+'.txt'
f = open(path_img, 'rb') # 二进制方式打开图文件
q = base64.b64encode(f.read()).decode('utf-8') # 读取文件内容,转换为base64编码
f.close()
data = {}
data['detectType'] = '10012' #代表换行
data['imageType'] = '1'
data['langType'] = 'auto' #'zh-CHS'#'auto'
data['img'] = q
data['docType'] = 'json'
data['signType'] = 'v3'
curtime = str(int(time.time()))
data['curtime'] = curtime
salt = str(uuid.uuid1())
signStr = APP_KEY + truncate(q) + salt + curtime + APP_SECRET
sign = encrypt(signStr)
data['appKey'] = APP_KEY
data['salt'] = salt
data['sign'] = sign
response = do_request(data)
# print(response.content)
# 出来乱码,解码之后是str格式,我用split保存关键位置的文字
results=response.content.decode()
# print(A)
with open(path_save, 'w') as file:
aticle = results.split('"text":"')[1:]
for line in aticle:
word = line.split('","lang":')[0]
file.write(word)
file.write('\n')
if __name__ == '__main__':
import os
import glob
path_img='pic'
extensions=['jpg', 'JPG', 'jpeg', 'JPEG']
for extension in extensions:
for path in glob.glob(os.path.join(path_img,'*.'+extension)):
connect(path)