ICDAR2013&2015数据解析

《ICDAR2013&2015数据解析》

  文档分析与识别国际会议(International Conference on Document Analysis and Recognition,ICDAR)
ICDAR数据集是OCR流程常用的benchmark,包含文字目标检测的标注及OCR识别的标注结果,2013和2015的区别是2013给出的目标检测gt是水平框,而2015给出的是四边形(8个值)对歪斜的文本更为有效。本文给出解析数据的脚本,包括将目标框画到原图的功能和切出子图的功能。


Key Words:ICDAR数据解析


Beijing, 2022

直接上代码

import os
import cv2
import numpy as np

# task debug view

# 1. 如何读取图像
# 2. 如何存储图像
# 3. 如何把目标框画到图像上

img_2013_dir = "/Users/tianshu/Downloads/ICDAR2013"
gt_2013_dir = "/Users/tianshu/Downloads/ICDAR2013gt"

img_2015_dir = "/Users/tianshu/Downloads/ICDAR2015"
gt_2015_dir = "/Users/tianshu/Downloads/ICDAR2015gt"

# for 2013
# im_name_list = os.listdir(img_2013_dir)
# gt_name_list = ["gt_" + name.split('.')[0] + ".txt" for name in im_name_list]

im_name_list = os.listdir(img_2015_dir)
gt_name_list = ["gt_" + name.split('.')[0] + ".txt" for name in im_name_list]


# debug view
def icdar_2013_debug_view():
    for im_name, gt_name in zip(im_name_list, gt_name_list):
        im = cv2.imread(os.path.join(img_2013_dir, im_name))
        gt_list = list()
        with open(os.path.join(gt_2013_dir, gt_name)) as f:
            gt_list = f.readlines()
        gt_list = [gt.strip().split(',') for gt in gt_list]
        for gt in gt_list:
            x1, y1, x2, y2 = gt[:4]
            # draw
            im = cv2.rectangle(im, (int(x1), int(y1)), (int(x2), int(y2)), [0, 0, 255], 3)

        cv2.imwrite(os.path.join("/Users/tianshu/Downloads/icdar2013_debug_view", im_name), im)


# cut image patch for OCR
def icdar_2013_cut():
    idx = 0
    for im_name, gt_name in zip(im_name_list, gt_name_list):
        im = cv2.imread(os.path.join(img_2013_dir, im_name))
        gt_list = list()
        with open(os.path.join(gt_2013_dir, gt_name)) as f:
            gt_list = f.readlines()
        gt_list = [gt.strip().split(',') for gt in gt_list]
        sub_idx = 0
        for gt in gt_list:
            x1, y1, x2, y2 = gt[:4]  # 拿到两个点
            label = ''.join(gt[4:])
            label = label.replace('"', '')
            print(label)  ## label`
            # im = cv2.rectangle(im, (int(x1), int(y1)), (int(x2), int(y2)), [0, 0, 255], 3)
            patch = im[int(y1):int(y2), int(x1):int(x2), :]
            im_patch_name = im_name.split('.')[0] + "_%s_%s_%s.jpg" % (label, idx, sub_idx)
            cv2.imwrite(os.path.join("/Users/tianshu/Downloads/icdar2013_cut", im_patch_name), patch)
            sub_idx += 1
        idx += 1


# PIL Image.open()

# for icdar 2015
# 水平框 -> 四边形
# shape 4 -> shape 8
# 优点: 可以更好的贴合歪斜的文本

def icdar_2015_debug_view():
    for im_name, gt_name in zip(im_name_list, gt_name_list):
        im = cv2.imread(os.path.join(img_2015_dir, im_name))
        gt_list = list()
        with open(os.path.join(gt_2015_dir, gt_name)) as f:
            gt_list = f.readlines()
        gt_list = [gt.strip().split(',') for gt in gt_list]
        for gt in gt_list:
            if "\ufeff" in gt[0]:
                gt[0] = gt[0].replace("\ufeff", "")
            print(gt)
            x1, y1, x2, y2, x3, y3, x4, y4 = gt[:8]
            polyggon = np.array([[[int(x1), int(y1)], [int(x2), int(y2)], [int(x3), int(y3)], [int(x4), int(y4)]]],
                                dtype=np.int32)

            im = cv2.polylines(im, polyggon, True, [0, 0, 255], 4)
            # cv2.contours
            # how to draw polygon on image
            # draw
            # im = cv2.rectangle(im, (int(x1), int(y1)), (int(x2), int(y2)), [0, 0, 255], 3)
        cv2.imwrite(os.path.join("/Users/tianshu/Downloads/idcar2015_debug_view", im_name), im)


def icdar_2015_polygon_cut():
    # 1. 多边形的最小外接矩形(获得多边形的角度)
    # 2. 整图转正
    # 3. 切取所有文本框
    for im_name, gt_name in zip(im_name_list, gt_name_list):
        # if "img_22.jpg" not in im_name:
        #     continue
        im = cv2.imread(os.path.join(img_2015_dir, im_name))
        height, width = im.shape[:2]
        gt_list = list()
        with open(os.path.join(gt_2015_dir, gt_name)) as f:
            gt_list = f.readlines()
        gt_list = [gt.strip().split(',') for gt in gt_list]
        sub_idx = 0
        for gt in gt_list:
            if "\ufeff" in gt[0]:
                gt[0] = gt[0].replace("\ufeff", "")
            # print(gt)
            x1, y1, x2, y2, x3, y3, x4, y4 = gt[:8]
            polyggon = np.array([[[int(x1), int(y1)], [int(x2), int(y2)], [int(x3), int(y3)], [int(x4), int(y4)]]],
                                dtype=np.int32)

            label = ''.join(gt[8:])
            label = label.replace('"', '')

            # 最小外接矩形
            (x, y), (w, h), theta = cv2.minAreaRect(polyggon)
            print(theta)
            if theta > 45:
                # 直接对大图转正,以上面的最小中心点转正
                M = cv2.getRotationMatrix2D((int(x), int(y)), -(90 - theta), 1)
                img_rot = cv2.warpAffine(im, M, (width, height))
                # 裁图
                img_crop = cv2.getRectSubPix(img_rot, (int(h), int(w)), (int(x), int(y)))
            else:
                # 直接对大图转正,以上面的最小中心点转正
                M = cv2.getRotationMatrix2D((int(x), int(y)), theta, 1)
                img_rot = cv2.warpAffine(im, M, (width, height))
                # 裁图
                img_crop = cv2.getRectSubPix(img_rot, (int(w), int(h)), (int(x), int(y)))

            im_patch_name = im_name.split('.')[0] + "_%s_%s.jpg" % (label, sub_idx)
            cv2.imwrite(os.path.join("/Users/tianshu/Downloads/icdar2015_cut", im_patch_name), img_crop)
            sub_idx += 1

            # cv2.contours
            # how to draw polygon on image
            # draw
            # im = cv2.rectangle(im, (int(x1), int(y1)), (int(x2), int(y2)), [0, 0, 255], 3)


if __name__ == '__main__':
    # icdar_2015_debug_view()
    icdar_2015_polygon_cut()
  • 0
    点赞
  • 6
    收藏
    觉得还不错? 一键收藏
  • 2
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值