json和xml转换为txt
本文仅做学习记录使用,若侵权,留言秒删
import xml.etree.ElementTree as ET
import pickle
import os
from os import listdir, getcwd
from os.path import join
import cv2
import json
# sets = ['train', 'test', 'val']
sets = ['train', 'val']
classes = ["formula"]
def convert(size, box):
dw = 1. / size[0]
dh = 1. / size[1]
x = (box[0] + box[2]) / 2.0
y = (box[1] + box[3]) / 2.0
w = box[2] - box[0]
h = box[3] - box[1]
x = x * dw
w = w * dw
y = y * dh
h = h * dh
return (x, y, w, h)
def convert_annotation(image_id, id = [], index_f="xml"):
in_file = open('data/Annotations/%s.%s' % (image_id[:-4] , index_f))
out_file = open('data/labels/%s.txt' % (image_id[:-4]), 'w')
if index_f == "json":
json_data = json.load(in_file)
shapes = json_data["shapes"]
w, h = json_data['imageWidth'], json_data['imageHeight']
# if data[-2]:
# w = int(data['imageWidth'])
# h = int(data['imageHeight'])
# else:
# jpg_img_patch = Image_root + image_id + '.jpg'
# jpg_img = cv2.imread(jpg_img_patch)
# h, w, _ = jpg_img.shape # cv2读取的图片大小格式是w,h
for obj in shapes:
cls_id = obj["label"]
id.append(cls_id)
if cls_id == "19":
print(in_file)
xmlbox = obj['points']
b = (float(xmlbox[0][0]), # xmin
float(xmlbox[0][1]), # ymin
float(xmlbox[1][0]), # xmax
float(xmlbox[1][1])) # ymax
bb = convert((w, h), b)
out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
elif index_f == "xml":
xml_data = ET.parse(in_file)
root = xml_data.getroot()
width = int(root.findtext("size/width"))
height = int(root.findtext("size/height"))
for obj in root.findall("object"):
difficult = obj.findtext("difficult")
class_name = obj.findtext("name")
if class_name not in classes and int(difficult) == 1:
continue
cls_id = classes.index(class_name)
bbox = (float(obj.findtext("bndbox/xmin")),
float(obj.findtext("bndbox/ymin")),
float(obj.findtext("bndbox/xmax")),
float(obj.findtext("bndbox/ymax")))
bb = convert((width,height),bbox)
out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
def xml_to_txt():
id = []
for image_set in sets:
if not os.path.exists('data/labels/'):
os.makedirs('data/labels/')
image_ids = open('data/ImageSets/%s.txt' % image_set).read().strip().split()
list_file = open('data/math_express_%s.txt' % (image_set), 'w')
for image_id in image_ids:
list_file.write('data/images//%s\n' % (image_id)) # 数据路径,可在这里修改,存放在这里并写到txt文件中
convert_annotation(image_id, id,index_f = "xml")
list_file.close()
def json_to_txt():
id = []
for image_set in sets:
if not os.path.exists('data/labels/'):
os.makedirs('data/labels/')
image_ids = open('data/ImageSets/%s.txt' % image_set).read().strip().split()
list_file = open('data/math_express_%s.txt' % (image_set), 'w')
for image_id in image_ids:
list_file.write('data/images//%s\n' % (image_id)) # 数据路径,可在这里修改,存放在这里并写到txt文件中
convert_annotation(image_id, id, index_f="json")
list_file.close()
if __name__ == "__main__":
xml_to_txt()
文件路径构造如下: