欢迎评论席交流学习 原创内容
import requests
import base64
import json
import pandas as pd
import os
#百度申请的 百度智能云上找到对应的文字识别(要找对)创建应用,记得勾选需要的,然后记下下面的3个key
APP_ID=''
API_KEY=''
SECRET_KEY=''
#获取access_token
host = 'https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id=API_KEY&client_secret='
response = requests.get(host)
if response:
print(response.json())
price=[列表内容]
list=[]
t=0
def static_info():
# url = "https://aip.baidubce.com/rest/2.0/ocr/v1/accurate_basic" #高精度接口 每天500次调用限制
url = "https://aip.baidubce.com/rest/2.0/ocr/v1/general_basic" #普通接口 好像是每天50000次限制
params = {"image": img}
access_token = 这里填写上面获取到的access_token
request_url = url + "?access_token=" + access_token
headers = {'content-type': 'application/x-www-form-urlencoded'}
response = requests.post(request_url, data=params, headers=headers)
return response
for d in price:
file_num=0
for root, dirs, files in os.walk('路径'+str(d)):
file=[i for i in files if len(i)>15]
for p in file:
print(p)
try:
f = open('路径‘+p, 'rb')
except IOError:
print('此处没有文件了')
else:
img = base64.b64encode(f.read())
response=static_info()
if response:
js=response.json()
words_dic=js.get('words_result')
if words_dic:
for i in words_dic:
data=[[d],words_dic[0]['words'],i['words']]
list.append(data)
list.append([])
p= pd.DataFrame(list)
p.to_excel('路径/文件名',header=False)
print('数据读取完毕')