关键代码
from paddleocr import PaddleOCR
from PIL import Image
ocr = PaddleOCR(lang="ch", show_log=False)
img = Image.open('only.jpg')
result = ocr.ocr(np.array(img))
def sort_paddle_result(result,val = 10):
'''
对paddle返回的结果排序,默认纵坐标不超过10的为一行
Args:
result:paddle返回原始结果
val :
Returns:
'''
result.sort(key=lambda x: x[0][0][1]) # y排序
first_dot = result[0][0][0]
other_dots = [x[0][0] for x in result[1:]]
sort_dots = [[first_dot]] # 第一个点 在第一行
index = 0
for dot in other_dots:
if sort_dots[index] == [] or abs(dot[1] - np.mean([x[1] for x in sort_dots[index]])) < val: # 同一行
sort_dots[index].append(dot)
else: # 第二行
index += 1
sort_dots.append([dot])
print('共有%s行' % len(sort_dots))
sort_result = []
for dot in sort_dots: # [item for sublist in sort_dots for item in sublist]:
# ocr_result = [x for x in result if x[0][0] == dot]
ocr_result = [x for x in result if x[0][0] in dot]
sort_result.append(ocr_result)
for sublist in sort_result:
sublist.sort()
sort_result = [item for sublist in sort_result for item in sublist] # 返回原始格式
print(sort_result)
return sort_result
识别结果
共有3行
[[[[24.0, 20.0], [109.0, 18.0], [110.0, 41.0], [25.0, 43.0]],
('你真的懂', 0.9879447221755981)],
[[[144.0, 16.0], [165.0, 16.0], [165.0, 32.0], [144.0, 32.0]],
('唯', 0.9866647720336914)],
[[[211.0, 22.0], [275.0, 22.0], [275.0, 44.0], [211.0, 44.0]],
('的定义', 0.8930945992469788)],
[[[25.0, 57.0], [144.0, 51.0], [144.0, 69.0], [26.0, 75.0]],
('并不简单如呼吸', 0.9770209193229675)],
[[[28.0, 97.0], [163.0, 94.0], [163.0, 113.0], [29.0, 116.0]],
('你真的希望你能重清', 0.9331009984016418)],
[[[180.0, 91.0], [298.0, 91.0], [298.0, 106.0], [180.0, 106.0]],
('闭上眼睛用心在听', 0.8978151082992554)]]