# encoding:utf-8
# 利用百度云智能的资源进行文字识别
import requests
import base64
from tkinter import filedialog,Tk
import langid
from translate import Translator
#获取密玥
def Get_acess_token(API_Key,Secret_Key):#获取acess_token
host = r'https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id=' + API_Key + '&client_secret=' + Secret_Key
response = requests.get(host)
if response:
text = response.json()
print('access_token:' + text['access_token'])
print('Access Token的有效期(秒为单位,有效期30天):' + str(text['expires_in']))
return text['access_token']
else:
print(r'检查到鉴权认证失败,请参见文档:https://ai.baidu.com/ai-doc/REFERENCE/Ck3dwjhhu')
#提取文字
def Get_words(IDcardAdress,acess_token,language):
request_url = r'https://aip.baidubce.com/rest/2.0/ocr/v1/accurate_basic'
f = open(IDcardAdress,'rb')
img = base64.b64encode(f.read())
request_url = request_url + '?access_token=' + acess_token
headers = {'content-type': 'application/x-www-form-urlencoded'}
params = {'image':img, 'language_type':language}
response = requests.post(request_url,data = params ,headers = headers)
if response:
text = response.json()
str = ''
for wordList in text['words_result']:
str = str + wordList['words']+'\n'
return str
#翻译翻译
def translation(text):
language_type = langid.classify(text)[0] # 结果是二元组('zh',-98.31488)
if language_type == 'zh': # 如果是汉语
news_content = text
elif language_type == 'en': # 如果是英语
translator = Translator(from_lang="english", to_lang="chinese")
news_content = translator.translate(text)
else: # 如果是俄语、乌克兰等前苏联地区语言
translator = Translator(from_lang="russian", to_lang="chinese")
news_content = translator.translate(text)
return news_content
if __name__ == '__main__':
API_Key = input('请输入API_Key:\n') #'GooaU6ePWXwOaf41UZybx8LX' # input('请输入API_Key:\n')#
Secret_Key = input('请输入Secret_Key:\n')#'LiMnPzKRgELmicb42DQka04MTmqlFRTz' # input('请输入Secret_Key:\n')#
IDcardAdress = input('请输入目标图片的路径:\n') # input('请输入图像路径:\n')# C:\Users\pc\Desktop\123.jpeg
language = input(r'请输入识别语言(CHN_ENG/ENG/RUS):')
acess_token = Get_acess_token(API_Key, Secret_Key)
wordStr = Get_words(IDcardAdress,acess_token,language)
news_content = translation(wordStr)
print(news_content.replace(' ','\n'))
《Python自动化》学习笔记:百度云智能进行文字识别(代码干货)
最新推荐文章于 2024-06-08 20:51:13 发布