经过前面的步骤,现在已经得到一个407 * 407的正方形图片,但是这个图片的不一定是正确的方向,即左上、左下、右上存在三个定位点。我们需要对这个图片进行旋正处理。
首先对图片进行一个二值化处理,在这个步骤,考虑到图片可能因为光照的原因,二维码部分存在较大亮度差,导致灰度值差异较大,我选择使用自适应阈值二值化方式进行处理
threshed_image = cv2.adaptiveThreshold(image, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 71,
-20)
然后找定位图形,按照二维码生成规范,每个二维码图形应当有三个完整的定位图形,分别位于左上、左下、右上角。
按照之前看的网上的方案,是通过轮廓关系来寻找,即:
1.定位图形的轮廓具有两层子轮廓
2.子轮廓/父轮廓面积比在0.5左右((5 * 5) / (7 * 7))
但是这个方案对图像要求很高,因为很多二维码图形本身是断续的,无法找到连续的轮廓,所以这个方案不是很实用。
《干货 | 史上最全的支付宝二维码扫码优化技术方案》:https://yq.aliyun.com/articles/599505?utm_content=m_1000000228
还有一种方案是支付宝的同学分享的,基于扫描线段关系,按照二维码的规范,生成的定位图像的边宽度比为1:1:3:1:1,如果可以找到符合这个线段宽度比例且颜色组合正确,基本可以认定是找到了定位点。但是这种方案在实际的计算过程中遇到了一个问题,就是二维码的边长随机性太大,按照预期,每个边长应当在11px左右,但是实际的扫描过程中,经常出现20个px的超宽边,也出现过5px的超窄边,使用宽度比例的方案效果欠佳,代码实现如下:
def scan_location(image_cut):
image_cut_copy = np.copy(image_cut)
cv2.line(image_cut_copy, (35, 0), (35, 100), (0, 0, 0), 1)
cv2.line(image_cut_copy, (0, 35), (100, 35), (0, 0, 0), 1)
vertical_flag = False
horizental_flag = False
noise_length = 1
vertical_count = 0
horizental_count = 0
rate_1_1_min = 0.45
rate_1_1_max = 2.2
rate_3_1_min = 2
rate_3_1_max = 5
width_1_min = 5
width_1_max = 20
width_3_min = 25
width_3_max = 50
width_7_min = 60
width_7_max = 100
# 循环中,i是y轴,j是x轴
# 计算横向
for i in range(0, 99):
line_array = []
begin_index = -1
end_index = -1
color = -1
for j in range(0, 100):
if begin_index == -1:
begin_index = 0
color = image_cut[i][j]
if j == 99:
end_index = j
if color != image_cut[i][j]:
end_index = j - 1
if begin_index != -1 and end_index != -1:
line_array.append([begin_index, end_index, color])
begin_index = j
end_index = -1
color = image_cut[i][j]
# 过滤噪声
# 线条长度小于噪声长度,认为是噪声,进行过滤
filter_line_array = []
for k in range(0, len(line_array)):
if k == 0:
filter_line_array.append(line_array[k])
elif line_array[k][1] - line_array[k][0] <= noise_length:
if k != len(line_array) - 1:
filter_line_array[len(filter_line_array) - 1][1] = line_array[k + 1][1]
else:
filter_line_array.append(line_array[k])
# 判断线条关系是否符合1:1:3:1:1
# 颜色关系为黑白黑白黑
# 如果线条数量少于5,调过本行扫描
if len(filter_line_array) < 5:
continue
for k in range(0, len(filter_line_array) - 4):
line1_width = filter_line_array[k][1] - filter_line_array[k][0]
line1_color = filter_line_array[k][2]
line2_width = filter_line_array[k + 1][1] - filter_line_array[k + 1][0]
line2_color = filter_line_array[k + 1][2]
line3_width = filter_line_array[k + 2][1] - filter_line_array[k + 2][0]
line3_color = filter_line_array[k + 2][2]
line4_width = filter_line_array[k + 3][1] - filter_line_array[k + 3][0]
line4_color = filter_line_array[k + 3][2]
line5_width = filter_line_array[k + 4][1] - filter_line_array[k + 4][0]
line5_color = filter_line_array[k + 4][2]
line_all_width = line1_width + line2_width + line3_width + line4_width + line5_width;
# 黑1:白1
if line1_color != 0 or line2_color != 255 or not (
line1_width / line2_width > rate_1_1_min and line1_width / line2_width < rate_1_1_max):
continue;
# 2:黑3
if line3_color != 0 or not (
line3_width / line2_width > rate_3_1_min and line3_width / line2_width < rate_3_1_max):
continue;
# 5:白1
if line4_color != 255 or not (
line4_width / line2_width > rate_1_1_min and line4_width / line2_width < rate_1_1_max):
continue;
# 6:黑1
if line5_color != 0 or not (
line5_width / line1_width > rate_1_1_min and line5_width / line1_width < rate_1_1_max):
continue;
# 颜色和宽度判定
if line1_color != 0 or line3_color != 0 or line5_color != 0 or line2_color != 255 or line4_color != 255 or not (
line1_width >= width_1_min and line1_width <= width_1_max) or not (
line2_width >= width_1_min and line2_width <= width_1_max) or not (
line4_width >= width_1_min and line4_width <= width_1_max) or not (
line5_width >= width_1_min and line5_width <= width_1_max) or not (
line3_width >= width_3_min and line3_width <= width_3_max) or not (
line_all_width >= width_7_min and line_all_width <= width_7_max):
continue
horizental_count += 1
if horizental_count > 10:
horizental_flag = True
break
# 计算纵向
for j in range(0, 100):
line_array = []
last_node_num = -1
begin_index = -1
end_index = -1
color = -1
for i in range(0, 100):
if begin_index == -1:
begin_index = 0
color = image_cut[i][j]
if i == 99:
end_index = i
if color != image_cut[i][j]:
end_index = i - 1
if begin_index != -1 and end_index != -1:
line_array.append([begin_index, end_index, color])
begin_index = i
end_index = -1
color = image_cut[i][j]
# 过滤噪声
# 线条长度小于噪声长度,认为是噪声,进行过滤
filter_line_array = []
for k in range(0, len(line_array)):
if k == 0:
filter_line_array.append(line_array[k])
elif line_array[k][1] - line_array[k][0] <= noise_length:
if k != len(line_array) - 1:
filter_line_array[len(filter_line_array) - 1][1] = line_array[k + 1][1]
else:
filter_line_array.append(line_array[k])
# 判断线条关系是否符合1:1:3:1:1
# 颜色关系为黑白黑白黑
# 如果线条数量少于5,调过本行扫描
if len(filter_line_array) < 5:
continue
for k in range(0, len(filter_line_array) - 4):
line1_width = filter_line_array[k][1] - filter_line_array[k][0]
line1_color = filter_line_array[k][2]
line2_width = filter_line_array[k + 1][1] - filter_line_array[k + 1][0]
line2_color = filter_line_array[k + 1][2]
line3_width = filter_line_array[k + 2][1] - filter_line_array[k + 2][0]
line3_color = filter_line_array[k + 2][2]
line4_width = filter_line_array[k + 3][1] - filter_line_array[k + 3][0]
line4_color = filter_line_array[k + 3][2]
line5_width = filter_line_array[k + 4][1] - filter_line_array[k + 4][0]
line5_color = filter_line_array[k + 4][2]
# 黑1:白1
if line1_color != 0 or line2_color != 255 or not (
line1_width / line2_width > rate_1_1_min and line1_width / line2_width < rate_1_1_max):
continue;
# 2:黑3
if line3_color != 0 or not (
line3_width / line2_width > rate_3_1_min and line3_width / line2_width < rate_3_1_max):
continue;
# 5:白1
if line4_color != 255 or not (
line4_width / line2_width > rate_1_1_min and line4_width / line2_width < rate_1_1_max):
continue;
# 6:黑1
if line5_color != 0 or not (
line5_width / line1_width > rate_1_1_min and line5_width / line1_width < rate_1_1_max):
continue;
# 颜色和宽度判定
if line1_color != 0 or line3_color != 0 or line5_color != 0 or line2_color != 255 or line4_color != 255 or not (
line1_width >= width_1_min and line1_width <= width_1_max) or not (
line2_width >= width_1_min and line2_width <= width_1_max) or not (
line4_width >= width_1_min and line4_width <= width_1_max) or not (
line5_width >= width_1_min and line5_width <= width_1_max) or not (
line3_width >= width_3_min and line3_width <= width_3_max) or not (
line_all_width >= width_7_min and line_all_width <= width_7_max):
continue
vertical_count += 1
if vertical_count > 10:
vertical_flag = True
break
if vertical_flag and horizental_flag:
return "all"
elif vertical_flag or horizental_flag:
return "half"
else:
return "none"
由于线段经常出现断点,每次扫描还需要考虑当前的分割是真正的白色线段还是发生的白色噪声,噪声还需要将前后线段续接。并且由于线段宽度不确定,写了很多预定义的阈值,整体代码结构较乱,而且需要经常调阈值来适配。
换一个思考方式,我们知道我们需要在四个边角去找定位图形,我先把四个边角切分出来,作为待扫描目标
定位图形具有以下特征:中心是一个黑色方块,中圈是一个白色方框,外圈是一个黑色方框。
从目前搜集到的发票来看,黑色部分大都存在断线的情况,但是白色区域没有出现被黑色部分的溢出占满的情况,所以,当我们扫描到一个方框,边距合适,且线条颜色为纯白时,基本可以认定是中心中圈的白色方框。
此时基本可以认定当前是一个定位点。但是由于发票存在很多定位点缺损的情况,即从左侧开始打印的时候,左侧的二维码部分存在缺失,导致定位点缺损,这种情况我们需要进行特殊处理,所以,我们还需要判定,当前这个定位点是一个完整的定位点还是一个残损的定位点。
完整的定位点还满足一个条件,即中圈白框外面必然存在一个黑色方框,所以,当我们以白框坐标为基准,扫描周边的时候,如果可以扫描到一个线框,平均颜色趋近于黑色,我们就可以认定为这是一个完整的定位点。
当扫描到完整定位点时,返回all,扫描到残损定位点时,返回half,未扫描到是,返回none
构造多个白框宽度进行扫描,避免由于图像打印不标准而扫描宽度一成不变导致的漏扫
# 根据白边扫描定位点
def scan_location_white_edge(image_cut):
# 构造48、45、42、39、36的两个白格子进行扫描
scan_result = scan_location_white_edge_once(image_cut, 48)
if scan_result == "all" or scan_result == "half":
return scan_result
scan_result = scan_location_white_edge_once(image_cut, 45)
if scan_result == "all" or scan_result == "half":
return scan_result
scan_result = scan_location_white_edge_once(image_cut, 42)
if scan_result == "all" or scan_result == "half":
return scan_result
scan_result = scan_location_white_edge_once(image_cut, 39)
if scan_result == "all" or scan_result == "half":
return scan_result
scan_result = scan_location_white_edge_once(image_cut, 36)
if scan_result == "all" or scan_result == "half":
return scan_result
return "none"
单次扫描,每次扫描到白框后,都基于当前的白框宽度,加上5种间隔宽度进行扫描,避免漏扫
# 根据白边扫描定位点,单次处理
def scan_location_white_edge_once(image_cut, window_width):
# 通过图形的平均黑度判断当前是否存在
# 算法不太靠谱
image_color = max(image_cut.mean() - 80, 15)
# print(image_color)
# 设置白边黑边间隔
gap_1 = 5
gap_2 = 8
gap_3 = 11
gap_4 = 14
gap_5 = 17
sum_difference = (window_width * window_width - (window_width - 2) * (window_width - 2)) * 255
for i in range(0, 100 - window_width):
for j in range(0, 100 - window_width):
outer_color = image_cut[i:i + window_width, j:j + window_width].sum()
inner_color = image_cut[i + 1:i + (window_width - 1), j + 1:j + (window_width - 1)].sum()
if outer_color - inner_color == sum_difference:
# 找到白边之后对黑边进行扫描
for black_gap in (gap_3,gap_2,gap_4,gap_1,gap_5):
for k in range(0, 100 - window_width - black_gap * 2):
for l in range(0, 100 - window_width - black_gap * 2):
outer_color = image_cut[k:k + window_width + black_gap * 2,
l:l + window_width + black_gap * 2].sum()
inner_color = image_cut[k + 1:k + window_width + black_gap * 2 - 1,
l + 1:l + window_width + black_gap * 2 - 1].sum()
# 如果找到四边的颜色均值小50,即只有1/5是白色线段,说明当前是一个黑框,返回all
if (outer_color - inner_color) / ((window_width + black_gap * 2) * 4) < image_color:
return "all"
return "half"
return "none"
当扫描出所有的定位点之后,根据定位点的关系进行图像旋转处理
根据发票二维码打印的实际情况,一般会出现以下几种情况:
三个all,即左上,左下,右上
两个half一个all,左上、左下的half,右上的all
一个all,右上的all
除了这几种情况,其他的情况都属于不确定,统一按照一个规则进行图像旋转
根据以上收集到的定位点情况,进行图像旋转,由于使用opencv的旋转api可能会导致出现1px的黑边或者白边,所以继续使用投影变换进行旋转
# 将图片的定位点旋转正
def upright_image(self, image):
# 扫描定位点
(all_array, half_array) = loc.scan_all_location(image)
all_set = set(all_array)
half_set = set(half_array)
# 如果有三个all的点,按照左上,左下,右上来计算旋转角度
rotation_angle = 0
defect_flag = False
# 如果all + half 数量为3,按照缺损脚进行旋转
if len(all_array) + len(half_array) == 3:
if not "left_top" in all_array and not "left_top" in half_array:
rotation_angle = 180
elif not "left_bottom" in all_array and not "left_bottom" in half_array:
rotation_angle = 90
elif not "right_top" in all_array and not "right_top" in half_array:
rotation_angle = 270
# 如果有一个all或者一个half,按照右上来计算旋转角度
elif len(all_array) == 1 or len(all_array) == 0 and len(half_array) == 1:
if len(all_array) == 1:
location = all_array[0]
else:
location = half_array[0]
if location == "left_top":
rotation_angle = 270
elif location == "left_bottom":
rotation_angle = 180
elif location == "right_bottom":
rotation_angle = 90
else:
rotation_angle = 90
if len(half_array) != 0 or (len(half_array) == 0 and len(all_array) == 1):
# 设置缺损状态为True
defect_flag = True
if self.trace_image:
cv2.imwrite(self.trace_path + "401_rotation_source_" + self.image_name, image)
# 旋转会补出黑边,修改为投影变换
# upright_rotation = cv2.getRotationMatrix2D((407 / 2, 407 / 2), rotation_angle, 1)
# # 第三个参数是输出图像的尺寸中心
# uprighted_image = cv2.warpAffine(image, upright_rotation, (407, 407))
# 生成透视变换矩阵
source_position = np.float32([[0, 0], [0, 406], [406, 406], [406, 0]])
if rotation_angle == 0:
uprighted_image = image
elif rotation_angle == 90:
target_position = np.float32([[0, 406], [406, 406], [406, 0], [0, 0]])
elif rotation_angle == 180:
target_position = np.float32([[406, 406], [406, 0], [0, 0], [0, 406]])
elif rotation_angle == 270:
target_position = np.float32([[0, 406], [406, 406], [406, 0], [0, 0]])
if rotation_angle != 0:
transform = cv2.getPerspectiveTransform(source_position, target_position)
# 进行透视变换
uprighted_image = cv2.warpPerspective(image, transform, (407, 407))
if self.trace_image:
cv2.imwrite(self.trace_path + "401_rotation_target_" + self.image_name, uprighted_image)
return uprighted_image, defect_flag
旋正后的图片如图所示:
至此,我们已经得到了一个旋正的正方形,可以基于这个图形进行一些识别算法的编写了