数据转换之voc格式的xml文件转成coco格式的json文件
import tqdm
import os
import sys
import json
import cv2
import argparse
import numpy as np
import xml.etree.ElementTree as ET
def get_img_path(path, extend=".jpg"):
img_list = []
for fpath, dirs, fs in os.walk(path):
for f in fs:
img_path = os.path.join(fpath, f)
if os.path.dirname(img_path) == os.getcwd():
continue
if not os.path.isfile(img_path):
continue
file_name, file_extend = os.path.splitext(os.path.basename(img_path))
if file_extend == extend:
img_list.append(img_path)
return img_list
def get(root, name):
return root.findall(name)
def get_and_check(root, name, length):
vars = root.findall(name)
if len(vars) == 0:
raise NotImplementedError('Can not find %s in %s.' % (name, root.tag))
if length > 0 and len(vars) != length:
raise NotImplementedError('The size of %s is supposed to be %d, but is %d.' % (name, length, len(vars)))
if length == 1:
vars = vars[0]
return vars
START_BOUNDING_BOX_ID = 1
def convert(img_list, json_file, message="converting"):
if not len(img_list):
print("empty img list, cannot {}".format(message))
return
json_dict = {"images": [], "type": "instances", "annotations": [], "categories": []}
categories = pre_define_categories.copy()
bnd_id = START_BOUNDING_BOX_ID
all_categories = {}
for index, img_p in enumerate(tqdm.tqdm(img_list, message)):
img_f = img_p.replace("\\", "/")
filename = img_p.split(os.path.basename(data_path) + "/")[1]
image_id = 20190000001 + index
xml_f = img_f
for im_for in img_format:
xml_f = xml_f.replace(im_for, ".xml")
if not os.path.isfile(xml_f):
print("cannot find xml {} consider it as a background".format(xml_f))
img = cv2.imread(img_f)
height, width = img.shape[:2]
image = {'file_name': filename, 'height': height, 'width': width, 'id': image_id}
json_dict['images'].append(image)
background_img.append(img_f)
continue
tree = ET.parse(xml_f)
root = tree.getroot()
tmp_category = []
for obj in get(root, 'object'):
tmp_category.append(classes[0] if args.one_cls else get_and_check(obj, 'name', 1).text)
intersection = [i for i in tmp_category if i in classes]
if only_care_pre_define_categories and len(intersection) == 0:
print("[warning] {} some of category {} not in 'pre_define_categories'({}) skip it"
"".format(xml_f, tmp_category, pre_define_categories))
continue
size = get_and_check(root, 'size', 1)
width = int(get_and_check(size, 'width', 1).text)
height = int(get_and_check(size, 'height', 1).text)
image = {'file_name': filename, 'height': height, 'width': width, 'id': image_id}
json_dict['images'].append(image)
for obj in get(root, 'object'):
category = classes[0] if args.one_cls else get_and_check(obj, 'name', 1).text
if category in all_categories:
all_categories[category] += 1
else:
all_categories[category] = 1
if category not in categories:
if only_care_pre_define_categories:
print("[warning] category '{}' not in 'pre_define_categories'({}) skip it"
"".format(category, pre_define_categories))
continue
new_id = len(categories) + 1
print("[warning] category '{}' not in 'pre_define_categories'({}), create new id: {} automatically"
"".format(category, pre_define_categories, new_id))
categories[category] = new_id
category_id = categories[category]
bndbox = get_and_check(obj, 'bndbox', 1)
xmin = int(float(get_and_check(bndbox, 'xmin', 1).text))
ymin = int(float(get_and_check(bndbox, 'ymin', 1).text))
xmax = int(float(get_and_check(bndbox, 'xmax', 1).text))
ymax = int(float(get_and_check(bndbox, 'ymax', 1).text))
assert (xmax > xmin), "xmax <= xmin, {}".format(img_p)
assert (ymax > ymin), "ymax <= ymin, {}".format(img_p)
o_width = abs(xmax - xmin)
o_height = abs(ymax - ymin)
ann = {'area': o_width * o_height, 'iscrowd': 0, 'image_id': image_id,
'bbox': [xmin, ymin, o_width, o_height],
'category_id': category_id, 'id': bnd_id, 'ignore': 0,
'segmentation': []}
json_dict['annotations'].append(ann)
bnd_id = bnd_id + 1
for cate, cid in categories.items():
cat = {'supercategory': 'none', 'id': cid, 'name': cate}
json_dict['categories'].append(cat)
json_fp = open(json_file, 'w')
json_str = json.dumps(json_dict, indent=2)
json_fp.write(json_str)
json_fp.close()
print("------------create {} done--------------".format(json_file))
print("-->>> find {} categories: {} \n-->>> your pre_define_categories {}: {}".format(len(all_categories), set(list(
all_categories.keys())), len(pre_define_categories), set(list(pre_define_categories.keys()))))
if set(list(all_categories.keys())) == set(list(pre_define_categories.keys())):
print("they are same")
else:
print("they are different")
print("category: id --> {}".format(categories))
print("available images number: {}".format(len(json_dict["images"])))
print("save annotation to: {}".format(json_file))
if __name__ == '__main__':
sys.path.append(".")
data_path = "/data1/voc_2_coco/"
classes = ['label1', 'label2']
parser = argparse.ArgumentParser(description="")
parser.add_argument("--save_ann", type=str, default='')
parser.add_argument("--one_cls", action='store_true', help="convert all labels to one class")
args = parser.parse_args()
img_format = [".jpeg", ".jpg"]
only_care_pre_define_categories = True
if args.one_cls:
assert len(classes) == 1, "length of 'classes' should be 1 not {}({})".format(len(classes), classes)
print("[WARNING] ==>> all annotation labels will be convert to one class: {}".format(classes))
if args.save_ann == "":
save_json_train = data_path + '/instances_train2014.json'
save_json_val = data_path + '/instances_val2014.json'
else:
os.makedirs(args.save_ann, exist_ok=True)
save_json_train = args.save_ann + '/instances_train2014.json'
save_json_val = args.save_ann + '/instances_val2014.json'
background_img = []
train_img_dirs, test_img_dirs = [], []
for folder_name in os.listdir(data_path):
folder_path = os.path.join(data_path, folder_name)
if os.path.isfile(folder_path):
continue
if "test" in folder_name:
assert "train" not in folder_name, "'train' and 'test' should not be in folder '{}'".format(folder_name)
test_img_dirs.append(folder_path)
elif "train" in folder_name:
assert "test" not in folder_name, "'train' or 'test' should be in folder name '{}'".format(folder_name)
train_img_dirs.append(folder_path)
else:
print("[WARNING] ==>> ignore folder {}, 'train' or 'test' should be in folder name".format(folder_name))
print("train path: {}, test path: {}".format(train_img_dirs, test_img_dirs))
pre_define_categories = {}
for i, cls in enumerate(classes):
pre_define_categories[cls] = i + 1
img_list_train, img_list_val = [], []
for idx, train_dir in enumerate(train_img_dirs):
for im_format in img_format:
img_list_train += get_img_path(train_dir, im_format)
print("{}/{} reading train images in: {}, total num: {}".format(idx, len(train_img_dirs), train_dir,
len(img_list_train)))
print("-" * 50)
for idx, test_dir in enumerate(test_img_dirs):
for im_format in img_format:
img_list_val += get_img_path(test_dir, im_format)
print("{}/{} reading test images in: {}, total num: {}".format(idx, len(test_img_dirs), test_dir,
len(img_list_val)))
print("voc to coco ...")
convert(img_list_train, save_json_train, "convert train")
print("-" * 100)
convert(img_list_val, save_json_val, "convert val")
if len(background_img):
print("background img number {}".format(len(background_img)))
voc格式的xml文件转成yolov45版本适用的txt文件
import xml.etree.ElementTree as ET
import pickle
import os
from os import listdir, getcwd
from os.path import join
def convert(size, box):
x_center = (box[0] + box[1]) / 2.0
y_center = (box[2] + box[3]) / 2.0
x = x_center / size[0]
y = y_center / size[1]
w = (box[1] - box[0]) / size[0]
h = (box[3] - box[2]) / size[1]
return (x, y, w, h)
def convert_annotation(xml_files_path, save_txt_files_path, classes):
xml_files = os.listdir(xml_files_path)
print(xml_files)
for xml_name in xml_files:
print(xml_name)
xml_file = os.path.join(xml_files_path, xml_name)
out_txt_path = os.path.join(save_txt_files_path, xml_name.split('.')[0] + '.txt')
out_txt_f = open(out_txt_path, 'w')
tree = ET.parse(xml_file)
root = tree.getroot()
size = root.find('size')
w = int(size.find('width').text)
h = int(size.find('height').text)
for obj in root.iter('object'):
difficult = obj.find('difficult').text
cls = obj.find('name').text
if cls not in classes or int(difficult) == 1:
continue
cls_id = classes.index(cls)
xmlbox = obj.find('bndbox')
b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text),
float(xmlbox.find('ymax').text))
print(w, h, b)
bb = convert((w, h), b)
out_txt_f.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
if __name__ == "__main__":
classes1 = ['hand_with_goods', 'hand_without_goods']
xml_files1 = r'E:\hand_data\xml\xml'
save_txt_files1 = r'E:\hand_data\txt_label'
convert_annotation(xml_files1, save_txt_files1, classes1)
json格式标注文件转换成voc格式的xml文件
因为某些标注软件在拉框时左上角和右下角坐标存在颠倒(主要是从左上角开始拉框与右下角开始拉框不一致导致),所以在转xml文件时需要将其中从右下角开始拉框的进行矫正
import sys
from xml.etree import ElementTree
from xml.etree.ElementTree import Element, SubElement
from lxml import etree
import codecs
import json
import os
import sys
import tqdm
sys.path.append(".")
XML_EXT = '.xml'
ENCODE_METHOD = 'utf-8'
class PascalVocWriter:
def __init__(self, foldername, filename, imgSize,databaseSrc='Unknown', localImgPath=None , usrname = None):
self.foldername = foldername
self.filename = filename
self.databaseSrc = databaseSrc
self.imgSize = imgSize
self.boxlist = []
self.localImgPath = localImgPath
self.verified = False
self.usr = usrname
def prettify(self, elem):
"""
Return a pretty-printed XML string for the Element.
"""
rough_string = ElementTree.tostring(elem, 'utf8')
root = etree.fromstring(rough_string)
return etree.tostring(root, pretty_print=True, encoding=ENCODE_METHOD).replace(" ".encode(), "\t".encode())
def genXML(self):
"""
Return XML root
"""
if self.filename is None or \
self.foldername is None or \
self.imgSize is None:
return None
top = Element('annotation')
if self.verified:
top.set('verified', 'yes')
user = SubElement(top , 'usr')
user.text = str(self.usr)
folder = SubElement(top, 'folder')
folder.text = self.foldername
filename = SubElement(top, 'filename')
filename.text = self.filename
if self.localImgPath is not None:
localImgPath = SubElement(top, 'path')
localImgPath.text = self.localImgPath
source = SubElement(top, 'source')
database = SubElement(source, 'database')
database.text = self.databaseSrc
size_part = SubElement(top, 'size')
width = SubElement(size_part, 'width')
height = SubElement(size_part, 'height')
depth = SubElement(size_part, 'depth')
width.text = str(self.imgSize[1])
height.text = str(self.imgSize[0])
if len(self.imgSize) == 3:
depth.text = str(self.imgSize[2])
else:
depth.text = '1'
segmented = SubElement(top, 'segmented')
segmented.text = '0'
return top
def addBndBox(self, xmin, ymin, xmax, ymax, name, difficult):
bndbox = {'xmin': xmin, 'ymin': ymin, 'xmax': xmax, 'ymax': ymax}
bndbox['name'] = name
bndbox['difficult'] = difficult
self.boxlist.append(bndbox)
def appendObjects(self, top):
for each_object in self.boxlist:
object_item = SubElement(top, 'object')
name = SubElement(object_item, 'name')
try:
name.text = unicode(each_object['name'])
except NameError:
name.text = each_object['name']
pose = SubElement(object_item, 'pose')
pose.text = "Unspecified"
truncated = SubElement(object_item, 'truncated')
if int(each_object['ymax']) == int(self.imgSize[0]) or (int(each_object['ymin'])== 1):
truncated.text = "1"
elif (int(each_object['xmax'])==int(self.imgSize[1])) or (int(each_object['xmin'])== 1):
truncated.text = "1"
else:
truncated.text = "0"
difficult = SubElement(object_item, 'difficult')
difficult.text = str( bool(each_object['difficult']) & 1 )
bndbox = SubElement(object_item, 'bndbox')
xmin = SubElement(bndbox, 'xmin')
xmin.text = str(each_object['xmin'])
ymin = SubElement(bndbox, 'ymin')
ymin.text = str(each_object['ymin'])
xmax = SubElement(bndbox, 'xmax')
xmax.text = str(each_object['xmax'])
ymax = SubElement(bndbox, 'ymax')
ymax.text = str(each_object['ymax'])
def save(self, targetFile=None):
root = self.genXML()
self.appendObjects(root)
out_file = None
if targetFile is None:
out_file = codecs.open(
self.filename + XML_EXT, 'w', encoding=ENCODE_METHOD)
else:
out_file = codecs.open(targetFile, 'w', encoding=ENCODE_METHOD)
prettifyResult = self.prettify(root)
out_file.write(prettifyResult.decode('utf8'))
out_file.close()
class PascalVocReader:
def __init__(self, filepath):
self.shapes = []
self.filepath = filepath
self.verified = False
try:
self.parseXML()
except:
pass
def getShapes(self):
return self.shapes
def addShape(self, label, bndbox, difficult):
xmin = int(bndbox.find('xmin').text)
ymin = int(bndbox.find('ymin').text)
xmax = int(bndbox.find('xmax').text)
ymax = int(bndbox.find('ymax').text)
points = [(xmin, ymin), (xmax, ymin), (xmax, ymax), (xmin, ymax)]
self.shapes.append((label, points, None, None, difficult))
def parseXML(self):
assert self.filepath.endswith(XML_EXT), "Unsupport file format"
parser = etree.XMLParser(encoding=ENCODE_METHOD)
xmltree = ElementTree.parse(self.filepath, parser=parser).getroot()
filename = xmltree.find('filename').text
try:
verified = xmltree.attrib['verified']
if verified == 'yes':
self.verified = True
except KeyError:
self.verified = False
for object_iter in xmltree.findall('object'):
bndbox = object_iter.find("bndbox")
label = object_iter.find('name').text
difficult = False
if object_iter.find('difficult') is not None:
difficult = bool(int(object_iter.find('difficult').text))
self.addShape(label, bndbox, difficult)
return True
data_path = r"E:\hand_data\json"
classes = ['label1', 'label2']
def get_img_path(path, extend=".jpg"):
img_list = []
for fpath, dirs, fs in os.walk(path):
for f in fs:
img_path = os.path.join(fpath, f)
if os.path.dirname(img_path) == os.getcwd():
continue
if not os.path.isfile(img_path):
continue
file_name, file_extend = os.path.splitext(os.path.basename(img_path))
if file_extend == extend:
img_list.append(img_path)
return img_list
def correct_box(points, img_h, img_w, json_fie):
(x1, y1), (x2, y2) = points
x1, x2 = int(min(x1, x2)), int(max(x1, x2))
y1, y2 = int(min(y1, y2)), int(max(y1, y2))
assert x1 < x2, "error x1 < x2 {} {} {}".format(points, [(x1, y1), (x2, y2)], json_fie)
assert y1 < y2, "error y1 < y2 {} {} {}".format(points, [(x1, y1), (x2, y2)], json_fie)
x1 = max(0, min(x1, img_w - 1))
x2 = max(0, min(x2, img_w - 1))
y1 = max(0, min(y1, img_h - 1))
y2 = max(0, min(y2, img_h - 1))
return x1, y1, x2, y2
def convert_to_xml(json_file):
f = open(json_file, "r")
setting = json.load(f)
f.close()
image_name = setting['imagePath']
img_h = setting["imageHeight"]
img_w = setting["imageWidth"]
shapes = setting['shapes']
print(image_name)
labels = []
result = []
for i in range(len(shapes)):
points = shapes[i]['points']
assert len(points) == 2, "error points {}".format(points, json_file)
label = shapes[i]["label"]
if label not in classes:
print("error label '{}' {}".format(label, json_file))
x1, y1, x2, y2 = correct_box(points, img_h, img_w, json_file)
labels.append(label)
result.append([label, [x1, y1, x2, y2]])
save_xml(result, json_file.replace(".json", ".xml"), os.path.basename(image_name), img_h, img_w)
return labels
def save_xml(results, save_p, file_name, height, width):
writer = PascalVocWriter("./", file_name, (height, width, 3), localImgPath="./", usrname="MingZhang")
for res in results:
label, bbox = res
x1, y1, x2, y2 = bbox
writer.addBndBox(x1, y1, x2, y2, label, 0)
writer.save(targetFile=save_p)
def main():
file_list = []
for folder in os.listdir(data_path):
if os.path.isfile(data_path + "/" + folder):
continue
print(folder)
file_list += get_img_path(path=data_path + "/" + folder, extend=".json")
print("json number: {} in {}".format(len(file_list), data_path))
labels = []
for idx, json_file in enumerate(tqdm.tqdm(file_list)):
res = convert_to_xml(json_file)
if len(res) == 0:
print("not annotation {}".format(json_file))
continue
for r in res:
labels.append(r)
labels_set = set(labels)
labels_set = sorted(labels_set)
print("-" * 50)
print("number classes:", len(labels_set))
print(labels_set)
for l in labels_set:
print("==>> label: '{}', number: {}".format(l, labels.count(l)))
if __name__ == "__main__":
main()
先记录一下上面几种常用数据格式转换,如有问题欢迎指正