在训练目标检测模型的时候一般使用labelimg标注的图像生产.xml格式的标注文件。有时候需要用到coco格式json标注文件,需要xml转json的脚本。
执行该脚本会读取Annotations下的.xml文件并解析其中的类别及boundbox的坐标,最后生成instances.json的文件。
#coding:utf-8
pip install lxml
import os
import glob
import json
import shutil
import numpy as np
import xml.etree.ElementTree as ET
path2 = “.”
START_BOUNDING_BOX_ID = 1
def get(root, name):
return root.findall(name)
def get_and_check(root, name, length):
vars = root.findall(name)
if len(vars) == 0:
raise NotImplementedError(‘Can not find %s in %s.’%(name, root.tag))
if length > 0 and len(vars) != length:
raise NotImplementedError(‘The size of %s is supposed to be %d, but is %d.’%(name, length, len(vars)))
if length == 1:
vars = vars[0]
return vars
def convert(xml_list, json_file):
json_dict = {“images”: [], “type”: “instances”, “annotations”: [], “categories”: []}
categories = pre_define_categories.copy()
bnd_id = START_BOUNDING_BOX_ID
all_categories = {}
for index, line in enumerate(xml_list):
# print(“Processing %s”%(line))
xml_f = line
tree = ET.parse(xml_f)
root = tree.g