团队博客: CSDN AI小组
前言
1、本文基于上一篇文章:关于提高OCR识别准确率的一些优化(二)进行了一些优化,将图片方向识别准确率提升至96%。
2、在阅读这篇文章之前,建议先看上一篇,以便更好的理解
一、优化思路
1、在上一篇文章中,我们使用paddleocr的方向分类器直接判别图片方向,发现效果并不怎么好,而且效率也很低,识别一张图片平均耗时2s。
2、鉴于以上存在的问题,于是想出了一个新的优化方案:
- 使用paddleocr的文本矩形框检测得到所有文本矩形框坐标
- 取出长宽比在 5 - 25 和 0.04 - 0.2 之间的文本矩形框坐标
- 从中随机取出一个矩形或按长宽比大小排序,取出长宽比居中的矩形(这里为了简便,直接取出第0个矩形)
- 用取出来的矩形,从原图中抠图
- 将抠出来的图片,作为paddleocr方向分类器的输入
二、完整代码
import cv2
import os
import time
import numpy as np
from PIL import Image
from paddleocr import PaddleOCR
class GetImageRotation(object):
def __init__(self):
self.ocr = PaddleOCR(use_angle_cls=True)
self.ocr_angle = PaddleOCR(use_angle_cls=True)
def get_real_rotation_when_null_rect(self, rect_list):
w_div_h_sum = 0
count = 0
for rect in rect_list:
p0 = rect[0]
p1 = rect[1]
p2 = rect[2]
p3 = rect[3]
width = abs(p1[0] - p0[0])
height = abs(p3[1] - p0[1])
w_div_h = width / height
if abs(w_div_h - 1.0) < 0.5:
count +=1
continue
w_div_h_sum += w_div_h
length = len(rect_list) - count
if length == 0:
length = 1
if w_div_h_sum / length >= 1.5:
return 1
else:
return 0
def get_real_rotation_flag(self, rect_list):
ret_rect = []
w_div_h_list = []
w_div_h_sum = 0
for rect in rect_list:
p0 = rect[0]
p1 = rect[1]
p2 = rect[2]
p3 = rect[3]
width = abs(p1[0] - p0[0])
height = abs(p3[1] - p0[1])
w_div_h = width / height
# w_div_h_list.append(w_div_h)
# print(w_div_h)
if 5 <= abs(w_div_h - 1.0) <= 25 or 0.04 <= abs(w_div_h) <= 0.2:
ret_rect.append(rect)
w_div_h_sum += w_div_h
if w_div_h_sum / len(ret_rect) >= 1.5:
return 1, ret_rect
else:
return 0, ret_rect
def crop_image(self, rect, image):
p0 = rect[0]
p1 = rect[1]
p2 = rect[2]
p3 = rect[3]
crop = image[int(p0[1]):int(p2[1]), int(p0[0]):int(p2[0])]
# crop_image = Image.fromarray(crop)
return crop
def get_img_real_angle(self, img_path):
ret_angle = 0
image = cv2.imread(img_path)
# ocr = PaddleOCR(use_angle_cls=True)
# angle_cls = ocr.ocr(img_path, det=False, rec=False, cls=True)
rect_list = self.ocr.ocr(image, rec=False)
# print(rect_list)
if rect_list != [[]]:
try:
real_angle_flag, rect_good = get_real_rotation_flag(rect_list)
# rect_crop = choice(rect_good)
rect_crop = rect_good[0]
image_crop = crop_image(rect_crop, image)
# ocr_angle = PaddleOCR(use_angle_cls=True)
angle_cls = self.ocr_angle.ocr(image_crop, det=False, rec=False, cls=True)
print(angle_cls)
except:
real_angle_flag = get_real_rotation_when_null_rect(rect_list)
# ocr_angle = PaddleOCR(use_angle_cls=True)
angle_cls = self.ocr_angle.ocr(image, det=False, rec=False, cls=True)
print(angle_cls)
else:
return 0
print('real_angle_flag: {}'.format(real_angle_flag))
if angle_cls[0][0] == '0':
if real_angle_flag:
ret_angle = 0
else:
ret_angle = 270
if angle_cls[0][0] == '180':
if real_angle_flag:
ret_angle = 180
else:
ret_angle = 90
return ret_angle
def get_files_path_2(file_dir):
'''获取指定文件夹下所有指定后缀名的文件的绝对路径'''
files_path = []
# label = file_dir.split('/')[-1]
for root, dirs, files in os.walk(file_dir):
for file in files:
path = os.path.join(root, file)
files_path.append(path)
return files_path
问:为什么要实例化两个PaddleOCR?
答:仅实例化一个PaddleOCR时,会出现如下警告,导致不能检测方向
[2021/07/03 12:51:32] root WARNING: Since the angle classifier is not initialized, the angle classifier will not be uesd during the forward process
应该是PaddleOCR内部的问题,有时间可以深究一下
三、测试
from time import time
get_image_rotation = GetImageRotation()
image_path = get_files_path_2('/Users/zhangzc/Desktop/workplace/ocrtest/test')
count = 0
time_list = []
for path in image_path:
if path == '/Users/Desktop/workplace/ocrtest/test/.DS_Store':
continue
t1 = time()
angle = get_image_rotation.get_img_real_angle(path)
t2 = time()
print('----'*10)
print(angle)
print('cost time: {} s'.format(t2-t1))
time_list.append(t2-t1)
print('----'*10)
if angle != 0:
print('****'*10)
print(path)
print('****'*10)
count +=1
print('print average cost time : {} s'.format(np.mean(time_list)))
测试结果:
- 200张0度图片,96%准确率
- 200张90度图片,仅有13%准确率
- 200张180度图片,88%准确率
- 200张270度图片,85%准确率
平均耗时:1.25s
四、分析
1、从测试结果发现,90度的图片准确率太低
2、90度的图片,绝大多数都被检测为270度
3、于是拿出所有方向抠出来的图片比较:
4、仔细观察就会发现:90度的图片旋转90度后,成了180度,270度的图片,旋转90度后,变成了0度,而paddleocr的方向分类器,在这两个方向上的识别准确率也比较高。因此,在图片被识别为270度时,我们可以将图片顺时针旋转90度后再输入到方向分类器中识别,或许会有个更好的效果。于是,我们开始优化。
五、优化
直接看代码吧
def rotate_bound_white_bg(self, image, angle):
# 旋转angle角度,缺失背景白色(255, 255, 255)填充
(h, w) = image.shape[:2]
(cX, cY) = (w // 2, h // 2)
M = cv2.getRotationMatrix2D((cX, cY), -angle, 1.0)
cos = np.abs(M[0, 0])
sin = np.abs(M[0, 1])
nW = int((h * sin) + (w * cos))
nH = int((h * cos) + (w * sin))
M[0, 2] += (nW / 2) - cX
M[1, 2] += (nH / 2) - cY
return cv2.warpAffine(image, M, (nW, nH), borderValue=(255, 255, 255))
class GetImageRotation(object):
def __init__(self):
self.ocr = PaddleOCR(use_angle_cls=True)
self.ocr_angle = PaddleOCR(use_angle_cls=True)
def get_real_rotation_when_null_rect(self, rect_list):
w_div_h_sum = 0
count = 0
for rect in rect_list:
p0 = rect[0]
p1 = rect[1]
p2 = rect[2]
p3 = rect[3]
width = abs(p1[0] - p0[0])
height = abs(p3[1] - p0[1])
w_div_h = width / height
if abs(w_div_h - 1.0) < 0.5:
count += 1
continue
w_div_h_sum += w_div_h
length = len(rect_list) - count
if length == 0:
length = 1
if w_div_h_sum / length >= 1.5:
return 1
else:
return 0
def get_real_rotation_flag(self, rect_list):
ret_rect = []
w_div_h_mean = 0
real_rect_count = 0
rect_big_list = []
rect_small_list = []
w_div_h_sum_big = []
w_div_h_sum_small = []
for rect in rect_list:
p0 = rect[0]
p1 = rect[1]
p2 = rect[2]
p3 = rect[3]
width = abs(p1[0] - p0[0])
height = abs(p3[1] - p0[1])
w_div_h = width / height
if 5 <= w_div_h <= 25:
real_rect_count +=1
rect_big_list.append(rect)
w_div_h_sum_big.append(w_div_h)
if 0.04 <= w_div_h <= 0.2:
real_rect_count -=1
rect_small_list.append(rect)
w_div_h_sum_small.append(w_div_h)
if real_rect_count > 0:
ret_rect = rect_big_list
w_div_h_mean = np.mean(w_div_h_sum_big)
else:
ret_rect = rect_small_list
w_div_h_mean = np.mean(w_div_h_sum_small)
if w_div_h_mean >= 1.5:
return 1, ret_rect
else:
return 0, ret_rect
def crop_image(self, rect, image):
p0 = rect[0]
p1 = rect[1]
p2 = rect[2]
p3 = rect[3]
crop = image[int(p0[1]):int(p2[1]), int(p0[0]):int(p2[0])]
# crop_image = Image.fromarray(crop)
return crop
def get_img_real_angle(self, img_path):
ret_angle = 0
image = cv2.imread(img_path)
# ocr = PaddleOCR(use_angle_cls=True)
# angle_cls = ocr.ocr(img_path, det=False, rec=False, cls=True)
rect_list = self.ocr.ocr(image, rec=False)
if rect_list != [[]]:
except_flag = False
try:
real_angle_flag, rect_good = self.get_real_rotation_flag(
rect_list)
rect_crop = choice(rect_good)
# rect_crop = rect_good[0]
image_crop = self.crop_image(rect_crop, image)
# ocr_angle = PaddleOCR(use_angle_cls=True)
angle_cls = self.ocr_angle.ocr(
image_crop, det=False, rec=False, cls=True)
except:
except_flag = True
real_angle_flag = self.get_real_rotation_when_null_rect(
rect_list)
# ocr_angle = PaddleOCR(use_angle_cls=True)
angle_cls = self.ocr_angle.ocr(
image, det=False, rec=False, cls=True)
else:
return 0
if angle_cls[0][0] == '0':
if real_angle_flag:
ret_angle = 0
else:
ret_angle = 270
if not except_flag:
anticlockwise_90 = rotate_bound_white_bg(image_crop, 90)
angle_cls = self.ocr_angle.ocr(anticlockwise_90, det=False, rec=False, cls=True)
if angle_cls[0][0] == '0':
ret_angle = 270
if angle_cls[0][0] == '180':
ret_angle = 90
if angle_cls[0][0] == '180':
if real_angle_flag:
ret_angle = 180
else:
ret_angle = 90
return ret_angle
与前面代码比较,在ret_angle=270时增加了一个顺时针旋转90度后再判断方向的操作,并在get_real_rotation_flag函数中增加了一个统计长宽比大于1和小于1计数的操作,为了确保如下这种情况也能正确识别:
总结
1、在90度方向上,准确率相比之前的13%提升至86.5%
2、在270度方向上,准确率为94%,较之前85%提升了9%
3、在0度和180度方向上,准确率分别为93.5%、89%
4、在所有方向上的平均准确率为90.75%,效果还不错
5、如果你有更好的优化方案,欢迎随时私信,感激不尽
相关文章: