下面是身份证正反面识别的代码
import easyocr
import os
import re
import pandas as pd
class Card:
def __init__(self):
self.images = r'./OCR'
self.ocr = easyocr.Reader(['ch_sim', 'en'], gpu=True) # 创建一次,供多次使用
def read_card(self): # 识别图片文字,并遍历
data = []
for image in os.listdir(self.images):
image_path = os.path.join(self.images, image)
content = self.ocr.readtext(image_path, detail=0)
content = ''.join(content) # 列表转换为纯文本
new_content = content.replace(" ", "") # 去除掉空格内容
print(f'正在识别:{image}')
# 正面信息匹配
id_type = '中华人民共和国居民身份证' if '中华人民共和国居民身份证' in new_content else ''
name = re.findall(r'名(.*?)性', new_content)
gender = re.findall(r'性别(.*?)民族', new_content)
nation = re.findall(r'民族(.*?)出生', new_content)
birth_date = re.findall(r'出生(\d{4}年\d{1,2}月\d{1,2}日)', new_content) # 新增匹配出生日期的正则表达式
address = re.findall(r'住址(.*?)公民身份号码', new_content)
number = re.findall(r'公民身份号码(\d+)', new_content)
# 背面信息匹配
issuing_authority = re.findall(r'签发机关(.*?)有效期限', new_content)
valid_period = re.findall(r'有效期限(.*?)$', new_content)
new_name = ''.join(name)
new_gender = ''.join(gender)
new_nation = ''.join(nation)
new_birth_date = ''.join(birth_date) # 将出生日期转换为字符串
new_address = ''.join(address)
new_number = ''.join(number)
new_issuing_authority = ''.join(issuing_authority)
new_valid_period = ''.join(valid_period)
print(f'完成识别:{image}')
data.append([id_type, new_name, new_gender, new_nation, new_birth_date, new_address, new_number, new_issuing_authority, new_valid_period])
print(data)
return data
def write_to_excel(self, data):
df = pd.DataFrame(data, columns=['身份证类型', '姓名', '性别', '民族', '出生日期', '地址', '身份证号码', '签发机关', '有效期限'])
print(f'识别结果如下:')
print(df)
df.to_excel(r'./OCR/识别结果.xlsx', index=False)
if __name__ == '__main__':
info = Card()
card_data = info.read_card()
info.write_to_excel(card_data)
运行结果会生成一个xlsx文档
第一排为人像那一面
第二排为国徽那一面