首先去百度申请一个OCRapi,然后用pip install 安装以下库
import requests
from pprint import pprint
import base64
import os
from docx import Document
import re
import json
def get_content(file):
host = 'https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id=替换你的&client_secret=替换你的'
response = requests.get(host)
access_token = response.json()['access_token']
request_url = "https://aip.baidubce.com/rest/2.0/ocr/v1/accurate_basic"
f = open(file, 'rb')
img = base64.b64encode(f.read())
params = {"image":img}
request_url = request_url + "?access_token=" + access_token
headers = {'content-type': 'application/x-www-form-urlencoded'}
json_data = requests.post(request_url, data=params, headers=headers).json()
words = '\n'.join([i['words'] for i in json_data['words_result']])
return words
content_list=[]
files = os.listdir('img\\')
for file in files:
filename = 'img\\' + file
words = get_content(file=filename)
print(words)
content_list.append(words)
doc=Document()
content='\n'.join(content_list)
doc.add_paragraph(content)
doc.save('data.doc')
10-31
4644
![](https://csdnimg.cn/release/blogv2/dist/pc/img/readCountWhite.png)