最近在做自然场景的OCR检测识别算法,对于检测来说,需要对文件图片进行ground truth坐标点label的标注,刚开始使用的是标注精灵助手,标注好boundingbox后导出的是json格式的文件信息,但是CTPN、PSE等文字检测算法的训练需要点坐标信息的txt格式文件,因此(重点了):
当你准备做自然场景下的文字检测算法时,类似于CTPN/PSE等,你使用了标注精灵助手标注好了文字位置的bounding box信息并导出了json格式的文件,你可以使用如下脚本(python3)将json文件,批量转成训练时方便使用的点坐标txt文件。
代码如下:
#-*- coding:utf-8 _*-
"""
@author:xxx
@file: read_json.py
@time: 2019/06/05
"""
# 根据标注精灵标好导出的json文件生成txt
import json
import os
import glob
from tqdm import tqdm
def get_files(path,_ends=['*.json']):
all_files = []
for _end in _ends:
files = glob.glob(os.path.join(path,_end))
all_files.extend(files)
file_num = len(all_files)
return all_files,file_num
def get_text_mark(file_path):
with open(file_path,'r',encoding='utf-8') as fid:
result_dict = json.load(fid)
obj = result_dict['outputs']['object']
all_text_mark = []
for obj_item in obj:
text = obj_item['name']
try:
coords = obj_item['polygon']
try:
output_coord = [int(float(coords['x1'])), int(float(coords['y1'])), int(float(coords['x2']))
, int(float(coords['y2'])), int(float(coords['x3'])), int(float(coords['y3'])),
int(float(coords['x4'])), int(float(coords['y4']))]
except:
continue
except:
coords = obj_item['bndbox']
try:
output_coord = [int(float(coords['xmin'])), int(float(coords['ymin'])), int(float(coords['xmax']))
, int(float(coords['ymin'])), int(float(coords['xmax'])), int(float(coords['ymax'])),
int(float(coords['xmin'])), int(float(coords['ymax']))]
except:
continue
output_text = ''
for item in output_coord:
output_text = output_text+str(item)+','
output_text+=text
all_text_mark.append(output_text)
return all_text_mark
def write_to_txt(out_txt_path,one_file_all_mark):
#windows
with open(os.path.join(out_txt_path,file.split('\\')
[-1].split('.')[0]+'.txt'),'a+',encoding='utf-8') as fid:
##linux
# with open(os.path.join(out_txt_path, file.split('/')
# [-1].split('.')[0] + '.txt'), 'a+', encoding='utf-8') as fid:
for item in one_file_all_mark:
fid.write(item+'\n')
if __name__=="__main__":
json_path = r'xxxxxx'
out_txt_path = r'xxxxxx'
files,files_len=get_files(json_path)
bar = tqdm(total=files_len)
for file in files:
bar.update(1)
print(file)
try:
one_file_all_mark = get_text_mark(file)
except:
print(file)
continue
write_to_txt(out_txt_path,one_file_all_mark)
bar.close()
注:该脚本在windows下和linux下都可以正常运行,但是需要实际需要修改write_to_txt()方法里的备注部分。