基于python的一款简单的通用OCR识别身份证
话不多说,直入主题
本文就说说怎么自己动手做一个通用的OCR识别身份证,告别对别人的API
的依赖
预处理校正图像
感知、提取目标区域
识别目标区域内容
预处理校正图像
实际应用时所接收的图像不可能每一张都是处于理想化的识别状态,所以在得到图像后,第一步要做的就是对图像进行预处理,将图像进行校正,达到相对比较理想化的识别状态。
一、对得到的图像进行高斯滤波降噪
blur = cv2.GaussianBlur(image, (7, 7), 0)
二、使用霍夫变换检测外轮廓边缘
lines = cv2.HoughLines(canny, 1, np.pi / 180, 118)
三、找出最小的旋转角度,对图像进行旋转
def rotate_about_center2(src, radian, scale=1.):
w = src.shape[1]
h = src.shape[0]
angle = radian * 180 / np.pi
nw = (abs(np.sin(radian) * h) + abs(np.cos(radian) * w)) * scale
nh = (abs(np.cos(radian) * h) + abs(np.sin(radian) * w)) * scale
rot_mat = cv2.getRotationMatrix2D((nw * 0.5, nh * 0.5), angle, scale)
rot_move = np.dot(rot_mat, np.array([(nw - w) * 0.5, (nh - h) * 0.5, 0]))
rot_mat[0, 2] += rot_move[0]
rot_mat[1, 2] += rot_move[1]
return cv2.warpAffine(src, rot_mat, (int(math.ceil(nw)), int(math.ceil(nh))), flags=cv2.INTER_LANCZOS4)
def get_group(arr):
# 按照4个弧度区间分组,返回不为空的分组数据
radian_45 = np.pi / 4
radian_90 = np.pi / 2
radian_135 = radian_45 * 3
ret_arr = [[], [], [], []]
for i in range(len(arr)):
if arr[i] < radian_45:
ret_arr[0].append(arr[i])
elif arr[i] < radian_90:
ret_arr[1].append(arr[i])
elif arr[i] < radian_135:
ret_arr[2].append(arr[i])
else:
ret_arr[3].append(arr[i])
while [] in ret_arr:
ret_arr.remove([])
return ret_arr
def get_min_var_avg(arr):
# 按照不同弧度区间分组,返回方差最小的一个分组的弧度平均值
group_arr = get_group(arr)
if len(group_arr) <= 1:
var_arr = group_arr
else:
var_arr = [np.var(group_arr[i]) for i in range(len(group_arr))]
min_var = 10000
min_i = 0
for i in range(len(var_arr)):
if var_arr[i][0] < min_var:
min_var = var_arr[i]
min_i = i
avg = np.mean(group_arr[min_i])
return avg
def get_rotate_radian(radian, reverse=False):
# 旋转弧度转换
radian_45 = np.pi / 4
radian_90 = np.pi / 2
radian_135 = radian_45 * 3
radian_180 = np.pi
if radian < radian_45:
ret_radian = radian
elif radian < radian_90:
ret_radian = radian - radian_90
elif radian < radian_135:
ret_radian = radian - radian_90
else:
ret_radian = radian - radian_180
if reverse:
ret_radian += radian_90
# print(ret_radian)
return ret_radian
此处借鉴了一位博主的思路,我记得我当时收藏过这位博主的,确实他的这个思路很简单粗暴,关键是效率和质量都很高。但是今天找了一上午没找到这篇文章了。
到此第一步的图像预处理就完成了,所有的图像被接收后都得先进行旋转校正,说白了就是将里面的文字摆正了。不然使用公共库识别的时候会严重影响精度
这一步的总的接口函数:
def rotate(image):
blur = cv2.GaussianBlur(image, (7, 7), 0)
canny = cv2.Canny(blur, 20, 150, 3)
lines = cv2.HoughLines(canny, 1, np.pi / 180, 118)
l = len(lines[0])
theta_arr = [lines[0][i][1] for i in range(l)]
rotate_theta = get_min_var_avg(theta_arr)
img2 = rotate_about_center2(image,
get_rotate_radian(rotate_theta, image.shape[0] > image.shape[1]))
return img2
感知、提取目标区域
这一步稍微复杂一点,采用了一点图像分割技术里的区域生长概念。其实了解的朋友应该知道,区域生长说复杂其实也不复杂,就是一个局部检索的变相运用而已。
相关的概念链接我会放在本文最末,有兴趣的可以去看看。讲的特别细致
一、区域生长
它的大体步骤是:
a、找到没有归属的像素
b、以这个像素为中心,它4邻域的像素如果满足生长准则,就与这个中心像素合并
c、不停得从这个合并的区域中提取像素作为中心像素,重复步骤b
d、当找不到任何一个可以合并的像素时,生长结束
class Point(object):
def __init__(self, x, y):
self.x = x
self.y = y
def getX(self):
return self.x
def getY(self):
return self.y
def getGrayDiff(img, currentPoint, tmpPoint):
return abs(int(img[currentPoint.x, currentPoint.y]) - int(img[tmpPoint.x, tmpPoint.y]))
def selectConnects(p):
if p != 0:
connects = [Point(-1, -1), Point(0, -1), Point(1, -1), Point(1, 0), Point(1, 1), Point(0, 1), Point(-1, 1),
Point(-1, 0)]
else:
connects = [Point(0, -1), Point(1, 0), Point(0, 1), Point(-1, 0)]
return connects
def regionGrow(img, seeds, thresh, p=1):
height, weight = img.shape
seedMark = np.zeros(img.shape)
seedList = []
for seed in seeds:
seedList.append(seed)
label = 1
connects = selectConnects(p)
while (len(seedList) > 0):
currentPoint = seedList.pop(0)
seedMark[currentPoint.x, currentPoint.y] = label
for i in range(8):
tmpX = currentPoint.x + connects[i].x
tmpY = currentPoint.y + connects[i].y
if tmpX < 0 or tmpY < 0 or tmpX >= height or tmpY >= weight:
continue
grayDiff = getGrayDiff(img, currentPoint, Point(tmpX, tmpY))
if grayDiff < thresh and seedMark[tmpX, tmpY] == 0:
seedMark[tmpX, tmpY] = label
seedList.append(Point(tmpX, tmpY))
return seedMark
区域生长
此步骤用于身份证图像上的效果就是,生长停止后的图像只会剩下文字信息与头像图像了。我也试过用比较传统的OTSU及自适应阈值处理等方式,这些方式均会对身份证上的信息产生影响,造成图像信息部分丢失。 所以这里就采用区域生长的方式,相当于将身份证的卡片底色腐蚀掉。
对生长停止后的信息进行相应的腐蚀+二值处理,得到一个掩膜。使用这个掩膜对原图像进行掩膜处理,及提取出需要识别的文字信息了。
def adjust_image(image):
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
ret, binary = cv2.threshold(gray, 100, 255, cv2.THRESH_BINARY)
median_x = np.array(gray).shape[1] // 2
median_y = np.array(gray).shape[0] // 2
seeds = [Point(median_x, median_y)]
pic = regionGrow(gray, seeds, 3)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (17, 17))
pic_dilate = cv2.morphologyEx(pic, cv2.MORPH_DILATE, kernel)
mask = pic_dilate - binary
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (15, 15))
mask = cv2.morphologyEx(mask, cv2.MORPH_DILATE, kernel)
mask_ = np.zeros(mask.shape, dtype=np.uint8)
ret, mask = cv2.threshold(mask, 0, 255, cv2.THRESH_BINARY)
indexs = np.argwhere(mask == 255)
for index in indexs:
mask_[index[0], index[1]] = 255
image = cv2.add(image, np.zeros(np.shape(image), dtype=np.uint8), mask=mask_)
return image
二、对提取出的信息进行等比放大
这一步的目的是为了避免有时候得到的图像成像比较小,影响识别,所以对提取的目标区域进行放大
target_points = [(0, 0), (0, image.shape[0]),
(image.shape[1], image.shape[0]), (image.shape[1], 0)]
four_points = [(flag[0], flag[1]), (flag[0], flag[1]+flag[3]),
(flag[0] + flag[2], flag[1] + flag[3]), (flag[0] + flag[2], flag[1])]
target_points = np.array(target_points, np.float32)
four_points = np.array(four_points, np.float32)
M = cv2.getPerspectiveTransform(four_points, target_points)
Rotated= cv2.warpPerspective(image, M, (flag[2], flag[3]))
以上两步算是把前期的准备工作做完了。下面是最后一步,也是最简单的一步。进行识别了
识别目标区域内容
这一步采用的python的pytesseract库进行识别,很简单了,就不一一叙述了
def recognition(img):
img_data = np.array(img)
gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
ret, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
result = pytesseract.image_to_string(binary, lang='chi_sim')
return result
id_num = demo_2.recognition(Rotated)
if bool(id_num) == False:
id_num = demo_2.recognition(image)
with open('./id.txt', 'w')as p:
p.write(id_num)
with open('./id.txt', 'r')as p:
result = p.readlines()
results = []
for i in result:
results.append(re.findall(r'[\u4e00-\u9fa5-a-zA-Z0-9]', i))
# print("姓名:", ''.join(results[0]))
flag = False
for result in results:
if result != []:
res = []
name = []
result = ''.join(result)
res.append(re.findall(r'[0-9A-Z]', result))
name.append(re.findall(r'[\u4e00-\u9fa5]', result))
if name != [] and flag == False:
print("姓名:", ''.join(name[0]))
flag = True
if len(res[0]) >= 11:
print("身份证号码:", ''.join(res[0]))
需要源码的请收藏本文并在评论区留下邮箱