SynthText in the Wild Dataset 转 total_text数据集格式

最新推荐文章于 2024-08-10 08:18:44 发布

qq_39529154

最新推荐文章于 2024-08-10 08:18:44 发布

阅读量1.6k

点赞数

分类专栏：深度学习文章标签：深度学习

本文链接：https://blog.csdn.net/qq_39529154/article/details/108689908

版权

深度学习专栏收录该内容

2 篇文章

订阅专栏

SynthText in the Wild Dataset 数据集下载地址

https://www.robots.ox.ac.uk/~vgg/data/scenetext/

需求

https://github.com/MhLiao/DB 需要将SynthText 转换为total_text数据集格式，作为预训练模型

代码（目前在图片上加上了红色的方框，可以注释掉）

# -*- coding: utf-8 -*-#

# -------------------------------------------------------------------------------
# Name:         synthText2TotalText
# Description:
# Author:       zx
# Date:         2020/9/18
# -------------------------------------------------------------------------------

import scipy.io as sio
import cv2
import os
from tqdm import tqdm

dataFile = './SynthText/SynthText/gt.mat'
data_images_file = "./SynthText/SynthText"

saveFile = "./total_text2/"
train_images_file = saveFile + "train_images"
train_images_gts = saveFile + "train_gts"
train_images_list_txt = "train_list.txt"
if not os.path.exists(saveFile):
    os.makedirs(saveFile)
if not os.path.exists(train_images_file):
    os.makedirs(train_images_file)
if not os.path.exists(train_images_gts):
    os.makedirs(train_images_gts)


def convert2txt():
    fh_r = open(os.path.join(saveFile, train_images_list_txt), 'w', encoding='utf-8')
    data = sio.loadmat(dataFile)
    for i in tqdm(range(len(data["imnames"][0]))):
        # print(os.path.join(data_images_file, "{}".format(data["imnames"][0][i][0])))
        # read image
        image = cv2.imread(os.path.join(data_images_file, "{}".format(data["imnames"][0][i][0])))
        # save image
        # cv2.imwrite(os.path.join(train_images_file, "{}".format(data["imnames"][0][i][0].split("/")[1])), image)
        # write train_list.txt
        fh_r.write("{}".format(data["imnames"][0][i][0].split("/")[1]) + '\n')
        # write train_gts
        fh_gt = open(os.path.join(train_images_gts, "{}.txt".format(data["imnames"][0][i][0].split("/")[1])), 'w',
                     encoding='utf-8')
        # get word list
        rec = data['wordBB'][0][i]
        txt_str = ""
        for words in data["txt"][0][i]:
            txt_str += " " + " ".join([w.strip() for w in words.split("\n")])
        txt_str = txt_str.strip().split(" ")
        # # get word list
        # print(data["txt"][0][i])
        print(txt_str)
        print(len(txt_str), len(rec[0][0]))
        for j in range(len(rec[0][0])):
            x1 = int(rec[0][0][j])
            y1 = int(rec[1][0][j])
            x2 = int(rec[0][1][j])
            y2 = int(rec[1][1][j])
            x3 = int(rec[0][2][j])
            y3 = int(rec[1][2][j])
            x4 = int(rec[0][3][j])
            y4 = int(rec[1][3][j])
            cv2.rectangle(image, (x1, y2), (x3, y3), (0, 0, 255), 4)
            fh_gt.write(str(x1) + "," + str(y1) + "," + str(x2) + "," + str(y2)
                        + "," + str(x3) + "," + str(y3) + "," + str(x4) + "," + str(y4) + "," + txt_str[j] + '\n')
        fh_gt.close()
        cv2.imwrite(os.path.join(train_images_file, "{}".format(data["imnames"][0][i][0].split("/")[1])), image)
        if i == 10:
            break
    fh_r.close()


convert2txt()