阿里官方身份证OCR文字识别API 批量识别身份证信息到Excel表格

最新推荐文章于 2024-05-06 13:55:30 发布

weiabc

最新推荐文章于 2024-05-06 13:55:30 发布

阅读量570

点赞数 9

分类专栏：平时办公文章标签： ocr python

本文链接：https://blog.csdn.net/weiabc/article/details/136402871

版权

平时办公专栏收录该内容

3 篇文章 0 订阅

订阅专栏

本文介绍了一个使用Python编写的脚本，用于在工程施工单位中处理工人信息，通过阿里云OCRAPI识别身份证上的信息，并将结果存储在Excel表格中。脚本包括文件夹选择、批量识别及导出功能，可能遇到的常见问题如图片格式错误和API调用限制也被提及。

摘要由CSDN通过智能技术生成

1、工程施工单位中经常要统计工人的身份信息，用于工人信息统计表、工资发放表。在网上查询了相关内容，经调试后可以运行。

2、运行时若在pycharm中出现图片格式错误等原因，一般就是阿里官方身份证OCR文字识别API过期或次数用完了，试用是免费的，只有100次，每张照片计一次，可以用新支付宝首次 1分可以有500次。

#!/usr/bin/env python
# -*- coding: utf-8 -*-


import base64
import glob
import json
import threading
import time
import tkinter
from tkinter import *
from tkinter import filedialog
from tkinter import ttk
from tkinter.filedialog import askdirectory
from tkinter.messagebox import showinfo

import pandas as pd
import requests
from urllib3.filepost import writer
import openpyxl

ENCODING = 'utf-8'


# 选择文件夹返回文件夹的路径
def selectPath():
    init()
    path_ = askdirectory()  # 使用askdirectory()方法返回文件夹的路径
    if path_ == "":
        path.set(path_)
        path.get()  # 当打开文件路径选择框后点击"取消" 输入框会清空路径，所以使用get()方法再获取一次路径
        showinfo('提示', '未选择文件夹')
    else:
        path_ = path_.replace("/", "\\")  # 实际在代码中执行的路径为“\“ 所以替换一下
        path.set(path_)
        showinfo('提示', '已选择' + str(path.get()) + '文件夹！')
    print("路径：" + str(glob.glob(path.get() + "\\*")))


# 识别按钮实现线程同时开始
def shibie():
    if path.get() != '':
        thread_it(create)
        thread_it(tijiao)
    else:
        showinfo('提示', '请选择文件夹！')

    # 调用阿里云接口


def tijiao():
    init()
    global statzzx
    statzzx = 0
    id = 0

    for file_abs in glob.glob(path.get() + "\\*"):
        file_ab = file_abs.replace("\\", "/")
        # 如果没有configure字段，configure设为None
        # configure = None
        img_base64data = get_img_base64(file_ab)
        try:
            name, sex, nationality, birth, address, num = predict(url, appcode, img_base64data, configure)
        except TypeError:
            print("图片错误")
            continue

        if flag == 0:
            id = id + 1
            img_file.append(
                {'姓名': name, '性别': sex, '民族': nationality, '出生': birth, '住址': address, '身份证号码': num,
                 "图片路径": file_abs})
            img_file1.append([id, name, sex, nationality, birth, address, num, file_abs])
        else:
            print('识别错误')
            continue
    statzzx = 1
    insert()


# 将函数打包进线程
def thread_it(func):
    '''将函数打包进线程'''
    # 创建
    t = threading.Thread(target=func)
    # 守护 !!!
    t.setDaemon(True)
    # 启动
    t.start()


# 将数据导出表格
def writeExcel():
    if len(img_file):
        # 存在值即为真
        pf = pd.DataFrame(img_file)
        order = ['姓名', '性别', '民族', '出生', '住址', '身份证号码', '图片路径']
        pf = pf[order]
        file_path = filedialog.asksaveasfilename(defaultextension='.py', filetypes=[("Excel files", ".xlsx")])
        print("文件保存路径：" + str(file_path))

        #print("sadfsafasfafasf" + str(img_file[0]))
        #print("sadfsafasfafasf" + str(img_file[0]['姓名']))
        #pf.to_excel(file_path, encoding='utf-8', index=False, sheet_name="身份证信息")

        pf.to_excel(file_path, index=False, sheet_name="身份证信息")

        #pf.to_excel(writer, sheet_name='Sheet1', engine='openpyxl')

        print("导出Excel成功!")
        showinfo('成功', '导出Excel成功!')
    else:
        print("请选择文件夹！")
        showinfo('提示', '请先批量识别！')
    # 文件识别进度框


def create():
    top = Toplevel()
    top.title('文件识别中...')

    pb = ttk.Progressbar(top, length=280, mode="determinate", orient=HORIZONTAL)  # indeterminate determinate
    w = 300
    h = 70
    x1 = int((screenwidth - w) / 2)
    y1 = int((screenheight - h) / 2)
    top.geometry('{}x{}+{}+{}'.format(w, h, x1, y1))
    pb.pack(padx=10, pady=20)
    pb["maximum"] = 100
    pb["value"] = 0

    print("\n" * 2)
    print("执行开始".center(scale + 28, '-'))
    start = time.perf_counter()
    for i in range(scale + 1):
        time.sleep(0.03)
        if (statzzx != 1):
            pb["value"] = i  # 每次更新1
            root.update()  # 更新画面
            a = '*' * i
            b = '.' * (scale - i)
            c = (i / scale) * 100
            t = time.perf_counter() - start
            print("\r任务进度:{:>3.0f}% [{}->{}]消耗时间:{:.2f}s".format(c, a, b, t), end="")
        else:
            print("文件获取成功！")
            showinfo('提示', '文件识别成功！')
            break
    print("\n" + "执行结束".center(scale + 28, '-'))
    top.destroy()


# 表格数据插入
def insert():
    # 插入数据
    for index, data in enumerate(img_file1):
        table.insert('', END, values=data)  # 添加数据到末尾


# 表格数据删除
def delete():
    obj = table.get_children()  # 获取所有对象
    for o in obj:
        table.delete(o)  # 删除对象


# 图片转码
def get_img_base64(img_file):
    with open(img_file, 'rb') as infile:
        s = infile.read()
        return base64.b64encode(s).decode(ENCODING)


# 接口访问
def predict(url, appcode, img_base64, kv_configure):
    param = {}
    param['image'] = img_base64
    if kv_configure is not None:
        param['configure'] = json.dumps(kv_configure)
    body = json.dumps(param)
    data1 = bytes(body, "utf-8")

    headers = {'Authorization': 'APPCODE %s' % appcode}
    response = requests.post(url=url, headers=headers, data=data1)
    if response:
        data = response.json()
        print(data)
        name = data['name']
        sex = data['sex']
        nationality = data['nationality']
        birth = data['birth']
        address = data['address']
        num = data['num']
        return (name, sex, nationality, birth, address, num)
    else:
        flag = 1
        return flag


root = Tk()
root.title("身份证信息批量获取")
path = StringVar()
appcode = '一串字母数字'  # 可在阿里云购买 https://market.aliyun.com/products/57124001/cmapi010401.html#sku=yuncode440100000
url = 'https://cardnumber.market.alicloudapi.com/rest/160601/ocr/ocr_idcard.json'
configure = {'side': 'face'}
flag = 0
scale = 100
statzzx = 0
img_file = []
img_file1 = []

screenwidth = root.winfo_screenwidth()  # 屏幕宽度
screenheight = root.winfo_screenheight()  # 屏幕高度
width = 1000
height = 500
x = int((screenwidth - width) / 2)
y = int((screenheight - height) / 2)
root.geometry('{}x{}+{}+{}'.format(width, height, x, y))  # 大小以及位置
tabel_frame = tkinter.Frame(root)
xscroll = Scrollbar(tabel_frame, orient=HORIZONTAL)
yscroll = Scrollbar(tabel_frame, orient=VERTICAL)

columns = ['id', '姓名', '性别', '民族', '出生', '住址', '身份证号码', '图片路径']
table = ttk.Treeview(
    master=root,  # 父容器
    height=10,  # 表格显示的行数,height行
    columns=columns,  # 显示的列
    show='headings',  # 隐藏首列
    xscrollcommand=xscroll.set,  # x轴滚动条
    yscrollcommand=yscroll.set,  # y轴滚动条
)


# 初始化控件
def init():
    root.grid_columnconfigure(1, minsize=200)  # Here
    table.heading('id', text='序号', )  # 定义表头
    table.heading('姓名', text='姓名', )  # 定义表头
    table.heading('性别', text='性别', )  # 定义表头
    table.heading('民族', text='民族', )  # 定义表头
    table.heading('出生', text='出生', )  # 定义表头
    table.heading('住址', text='住址', )  # 定义表头
    table.heading('身份证号码', text='身份证号码', )  # 定义表头
    table.heading('图片路径', text='图片路径', )  # 定义表头

    table.column('id', width=10, minwidth=10, anchor=S, )  # 定义列
    table.column('姓名', width=30, minwidth=30, anchor=S, )  # 定义列
    table.column('性别', width=20, minwidth=10, anchor=S)  # 定义列
    table.column('民族', width=20, minwidth=10, anchor=S)  # 定义列
    table.column('出生', width=50, minwidth=50, anchor=S)  # 定义列
    table.column('住址', width=200, minwidth=100, anchor=S)  # 定义列
    table.column('身份证号码', width=150, minwidth=100, anchor=S)  # 定义列
    table.column('图片路径', width=150, minwidth=100, anchor=S)  # 定义列
    table.grid(row=3, columnspan=4, padx=18, ipadx=165, ipady=100, pady=10)
    delete()
    global img_file
    img_file = []
    global img_file1
    img_file1 = []


def demo():
    Button(root, text="文件夹批量选择", command=lambda: thread_it(selectPath), width=15).grid(row=0, column=0, padx=18,
                                                                                              pady=10, sticky='w')
    Entry(root, textvariable=path, state="readonly", width=83).grid(row=0, column=1, pady=10, sticky='w')
    Button(root, text="批量识别", command=shibie, width=13).grid(row=0, padx=3, column=2, sticky='w', pady=10)
    Button(root, text="导出表格", command=writeExcel, width=13).grid(row=0, padx=2, column=3, sticky='w', pady=10)
    init()


if __name__ == '__main__':
    demo()
    root.mainloop()

weiabc

关注

9
点赞
踩
7

收藏

觉得还不错? 一键收藏
0
评论
阿里官方身份证OCR文字识别API 批量识别身份证信息到Excel表格

2、运行时若在pycharm中出现图片格式错误等原因，一般就是阿里官方身份证OCR文字识别API过期或次数用完了，试用是免费的，只有100次，每张照片计一次，可以用新支付宝首次 1分可以有500次。1、工程施工单位中经常要统计工人的身份信息，用于工人信息统计表、工资发放表。在网上查询了相关内容，经调试后可以运行。
复制链接

扫一扫