百度AI开放平台
URL:http://ai.baidu.com/?track=cp:aipinzhuan|pf:pc|pp:AIpingtai|pu:title|ci:|kw:10005792
使用步骤:
点击控制台 —> 登录 —> 选择文字识别
—> 点击创建应用(输入内容),点击创建 —> 点击查看应用详情(记住API KEY和Secret Key)
—> 点击技术文档 —> 选择增值税发票识别
import requests
import base64
from selenium import webdriver
from PIL import Image
# 定义获取token的函数
def get_token():
# client_id 为官网获取的AK, client_secret 为官网获取的SK
host = 'https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id=mgLpUUm17V7dVoYBZfZDaUsA&client_secret=A8re4uRiGTrcoY6wbWtEwc4OpjZY6GPK'
response = requests.get(host)
if response:
return response.json()['access_token']
# 定义识别函数
def shibie(access_token):
'''
增值税发票识别
'''
request_url = "https://aip.baidubce.com/rest/2.0/ocr/v1/vat_invoice"
# 二进制方式打开图片文件
f = open('fapiao.png', 'rb')
img = base64.b64encode(f.read())
params = {"image": img}
# access_token = access_token
request_url = request_url + "?access_token=" + access_token
headers = {'content-type': 'application/x-www-form-urlencoded'}
response = requests.post(request_url, data=params, headers=headers)
if response:
content = response.json()['words_result']
# 获取发票代码
InvoiceCode = content['InvoiceCode']
# 获取发票号码
InvoiceNum = content['InvoiceNum']
# 获取开票日期
InvoiceDate = content['InvoiceDate']
# 获取校验码
CheckCode = content['CheckCode']
return InvoiceCode,InvoiceNum,InvoiceDate,CheckCode
# 定义识别函数
def check(detail):
# 调用IE浏览器 注意路径要到exe
driver = webdriver.Ie(executable_path=r'D:\IEDriverServer_x64_2.48.0\IEDriverServer.exe')
driver.maximize_window()
# 3. 请求
driver.get(url='https://inv-veri.chinatax.gov.cn/')
# 查找并输入发票代码
driver.find_element_by_id('fpdm').send_keys(detail[0])
# 查找并输入发票号码
driver.find_element_by_id('fphm').send_keys(detail[1])
# 查找并输入开票日期 日期不带年月日 所以将年月日去掉
date = detail[2].replace('年','').replace('月','').replace('日','')
driver.find_element_by_id('kprq').send_keys(date)
# 查找并输入校验码 后六位
driver.find_element_by_id('kjje').send_keys(detail[3][-6::])
# 截图
driver.save_screenshot('yzm.png')
# 局部截图,只截取验证码的部分
# 打开图片
im = Image.open('yzm.png')
# 局部截图
# crop((左,上,右,下)) 接收元组
im= im.crop((715,500,1200,610))
# 保存图片
im.save('jubu.png')
# 对接打码平台
yzm = dama()
driver.find_element_by_id('yzm').send_keys(yzm)
# 找到并点击查验
driver.find_element_by_id('checkfp').click()
...
# 定义打码平台函数 此处用的是超级鹰
def dama():
# 读取文件
fp = open('jubu.png', 'rb')
image = base64.b64encode(fp.read())
fp.close()
post_url = 'http://upload.chaojiying.net/Upload/Processing.php'
# 定义参数字典
data = {
'user': '用户名',
'pass': '密码',
'softid': '',
'codetype': '解码编号',
'file_base64': image
}
response = requests.post(url=post_url, data=data)
print(response.json()['pic_str'])
return response.json()['pic_str']
if __name__ == '__main__':
access_token = get_token()
detail = shibie(access_token)
check(detail)