今天学习利用百度云orc识别身份证信息. 自己先去申请应用,获得appID与secret之后, 去请求获取access_token. 该access_token有效期30天. 过期更新.
import sys
import urllib
import ssl
import json
import base64
import pandas as pd
# 这里是为了获取access_token
# host = 'https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id=你的ID&client_secret=你的secret'
# request = ul.request.urlopen(host)
# # request.add_header('Content-Type', 'application/json; charset=UTF-8')
# # response = request.urlopen(request)
# content = request.read()
# newContent = content.decode(encoding='utf-8')
# # print(newContent)
# content_dict = eval(newContent)
# print(content_dict['access_token'])
# access_token = '*****'
# url = 'https://aip.baidubce.com/rest/2.0/ocr/v1/idcard?access_token='+access_token
# 增加header
# headers = {
# 'Content-Type':'application/x-www-form-urlencoded'
# }
# request = urllib.request.Request(url, headers=headers)
# response = urllib.request.urlopen(request)
def get_file_content(fpath):
with open(fpath, 'rb') as fp:
return(fp.read())
def img_to_str(img_path):
access_token = '******'
url = 'https://aip.baidubce.com/rest/2.0/ocr/v1/idcard?access_token='+access_token
# 二进制方式打开图文件
f = open(img_path, 'rb')
# 参数image:图像base64编码
img = base64.b64encode(f.read())
params = {"image": img, "id_card_side": "front"}
params = urllib.parse.urlencode(params).encode("utf-8")
request = ul.request.Request(url, params)
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
response = urllib.request.urlopen(request)
content = response.read()
newContent = content.decode(encoding='utf-8')
content_dict = eval(newContent)
new_content_dict = content_dict['words_result']
name_list = ["住址", "出生", "姓名", "公民身份号码", "性别","民族"]
content_rs = []
for name in name_list:
content_rs.append(new_content_dict[name]['words'] )
rs = [x+": "+y+'\n' for x,y in zip(name_list, content_rs)]
rs1 = ''.join(rs)
print(rs1)
image_name = '3.jpg ' # 1.png # 2.jpg # 2.jpg
print(img_to_str(image_name))