《ICDAR2013&2015数据解析》
文档分析与识别国际会议(International Conference on Document Analysis and Recognition,ICDAR)
ICDAR数据集是OCR流程常用的benchmark,包含文字目标检测的标注及OCR识别的标注结果,2013和2015的区别是2013给出的目标检测gt是水平框,而2015给出的是四边形(8个值)对歪斜的文本更为有效。本文给出解析数据的脚本,包括将目标框画到原图的功能和切出子图的功能。
Key Words:ICDAR数据解析
Beijing, 2022
直接上代码
import os
import cv2
import numpy as np
# task debug view
# 1. 如何读取图像
# 2. 如何存储图像
# 3. 如何把目标框画到图像上
img_2013_dir = "/Users/tianshu/Downloads/ICDAR2013"
gt_2013_dir = "/Users/tianshu/Downloads/ICDAR2013gt"
img_2015_dir = "/Users/tianshu/Downloads/ICDAR2015"
gt_2015_dir = "/Users/tianshu/Downloads/ICDAR2015gt"
# for 2013
# im_name_list = os.listdir(img_2013_dir)
# gt_name_list = ["gt_" + name.split('.')[0] + ".txt" for name in im_name_list]
im_name_list = os.listdir(img_2015_dir)
gt_name_list = ["gt_" + name.split('.')[0] + ".txt" for name in im_name_list]
# debug view
def icdar_2013_debug_view():
for im_name, gt_name in zip(im_name_list, gt_name_list):
im = cv2.imread(os.path.join(img_2013_dir, im_name))
gt_list = list()
with open(os.path.join(gt_2013_dir, gt_name)) as f:
gt_list = f.readlines()
gt_list = [gt.strip().split(',') for gt in gt_list]
for gt in gt_list:
x1, y1, x2, y2 = gt[:4]
# draw
im = cv2.rectangle(im, (int(x1), int(y1)), (int(x2), int(y2)), [0, 0, 255], 3)
cv2.imwrite(os.path.join("/Users/tianshu/Downloads/icdar2013_debug_view", im_name), im)
# cut image patch for OCR
def icdar_2013_cut():
idx = 0
for im_name, gt_name in zip(im_name_list, gt_name_list):
im = cv2.imread(os.path.join(img_2013_dir, im_name))
gt_list = list()
with open(os.path.join(gt_2013_dir, gt_name)) as f:
gt_list = f.readlines()
gt_list = [gt.strip().split(',') for gt in gt_list]
sub_idx = 0
for gt in gt_list:
x1, y1, x2, y2 = gt[:4] # 拿到两个点
label = ''.join(gt[4:])
label = label.replace('"', '')
print(label) ## label`
# im = cv2.rectangle(im, (int(x1), int(y1)), (int(x2), int(y2)), [0, 0, 255], 3)
patch = im[int(y1):int(y2), int(x1):int(x2), :]
im_patch_name = im_name.split('.')[0] + "_%s_%s_%s.jpg" % (label, idx, sub_idx)
cv2.imwrite(os.path.join("/Users/tianshu/Downloads/icdar2013_cut", im_patch_name), patch)
sub_idx += 1
idx += 1
# PIL Image.open()
# for icdar 2015
# 水平框 -> 四边形
# shape 4 -> shape 8
# 优点: 可以更好的贴合歪斜的文本
def icdar_2015_debug_view():
for im_name, gt_name in zip(im_name_list, gt_name_list):
im = cv2.imread(os.path.join(img_2015_dir, im_name))
gt_list = list()
with open(os.path.join(gt_2015_dir, gt_name)) as f:
gt_list = f.readlines()
gt_list = [gt.strip().split(',') for gt in gt_list]
for gt in gt_list:
if "\ufeff" in gt[0]:
gt[0] = gt[0].replace("\ufeff", "")
print(gt)
x1, y1, x2, y2, x3, y3, x4, y4 = gt[:8]
polyggon = np.array([[[int(x1), int(y1)], [int(x2), int(y2)], [int(x3), int(y3)], [int(x4), int(y4)]]],
dtype=np.int32)
im = cv2.polylines(im, polyggon, True, [0, 0, 255], 4)
# cv2.contours
# how to draw polygon on image
# draw
# im = cv2.rectangle(im, (int(x1), int(y1)), (int(x2), int(y2)), [0, 0, 255], 3)
cv2.imwrite(os.path.join("/Users/tianshu/Downloads/idcar2015_debug_view", im_name), im)
def icdar_2015_polygon_cut():
# 1. 多边形的最小外接矩形(获得多边形的角度)
# 2. 整图转正
# 3. 切取所有文本框
for im_name, gt_name in zip(im_name_list, gt_name_list):
# if "img_22.jpg" not in im_name:
# continue
im = cv2.imread(os.path.join(img_2015_dir, im_name))
height, width = im.shape[:2]
gt_list = list()
with open(os.path.join(gt_2015_dir, gt_name)) as f:
gt_list = f.readlines()
gt_list = [gt.strip().split(',') for gt in gt_list]
sub_idx = 0
for gt in gt_list:
if "\ufeff" in gt[0]:
gt[0] = gt[0].replace("\ufeff", "")
# print(gt)
x1, y1, x2, y2, x3, y3, x4, y4 = gt[:8]
polyggon = np.array([[[int(x1), int(y1)], [int(x2), int(y2)], [int(x3), int(y3)], [int(x4), int(y4)]]],
dtype=np.int32)
label = ''.join(gt[8:])
label = label.replace('"', '')
# 最小外接矩形
(x, y), (w, h), theta = cv2.minAreaRect(polyggon)
print(theta)
if theta > 45:
# 直接对大图转正,以上面的最小中心点转正
M = cv2.getRotationMatrix2D((int(x), int(y)), -(90 - theta), 1)
img_rot = cv2.warpAffine(im, M, (width, height))
# 裁图
img_crop = cv2.getRectSubPix(img_rot, (int(h), int(w)), (int(x), int(y)))
else:
# 直接对大图转正,以上面的最小中心点转正
M = cv2.getRotationMatrix2D((int(x), int(y)), theta, 1)
img_rot = cv2.warpAffine(im, M, (width, height))
# 裁图
img_crop = cv2.getRectSubPix(img_rot, (int(w), int(h)), (int(x), int(y)))
im_patch_name = im_name.split('.')[0] + "_%s_%s.jpg" % (label, sub_idx)
cv2.imwrite(os.path.join("/Users/tianshu/Downloads/icdar2015_cut", im_patch_name), img_crop)
sub_idx += 1
# cv2.contours
# how to draw polygon on image
# draw
# im = cv2.rectangle(im, (int(x1), int(y1)), (int(x2), int(y2)), [0, 0, 255], 3)
if __name__ == '__main__':
# icdar_2015_debug_view()
icdar_2015_polygon_cut()