from cnocr import CnOcr
import re
def ocr_front(path):
ocr = CnOcr()
s = ocr.ocr(path)
text = ''
for txt in s:
text += txt['text']
# print(text)
dict_txt = {'国籍': text.split('居')[0],
'证件类别': text.split('国')[1].split('签')[0],
text.split('证')[1][:4]: text.split('关')[1].split('有')[0],
'有' + text.split('有')[1][:3]: text.split('限')[1]
}
return dict_txt
def ocr_back(path):
ocr = CnOcr()
s = ocr.ocr(path)
text = ''
for txt in s:
text += txt['text']
# print(text)
dict_txt = {'姓名': text[text.find('名')+1:text.find('别')-1],
'性别': text[text.find('别')+1:text.find('民族')],
'出生': re.sub(r'\D', '', text[text.find('生')+1:text.find('住')]),
'住址': text[text.find('址')+1:text.find('公民')],
'公民身份号码': text[text.find('份号码')+3:text.find('份号码')+21]
}
return dict_txt
if __name__ == '__main__':
Idc_front = ocr_front('Id_Card_n.jpeg')
Idc_back = ocr_back('123456.png')
for key in Idc_front:
print("{}:{}".format(key, Idc_front[key]))
for key in Idc_back:
print("{}:{}".format(key, Idc_back[key]))
cnocr再训练:
一、获取源代码
git clone https://github.com/breezedeus/cnocr.git
二、准备训练数据
引用+参考:CNOCR重训练_萧班的博客-CSDN博客_cnocr 训练
如侵权,联系删除