json和xml标注文件相互转换

最新推荐文章于 2024-08-09 19:41:01 发布
迟钝皮纳德
最新推荐文章于 2024-08-09 19:41:01 发布
阅读量519
点赞数 10
文章标签： json xml YOLO 目标检测
本文链接：https://blog.csdn.net/m0_53253879/article/details/136695737
版权
文章介绍了如何使用Python脚本将json和xml格式的标注文件在Bubbliiiing的YOLO网络训练中互相转换，提供了json2xml和xml2json的函数实现，便于适应不同标注软件的数据处理。
摘要由CSDN通过智能技术生成
为了适配Bubbliiiing的yolo网络代码训练自己的数据集，使用不同的标注软件会生成json或xml两种标注文件，为了方便我整理了一份相互转化的代码，不废话直接贴：
import os
import numpy as np
import json
from shutil import copyfile
from tqdm import tqdm
from xml.etree.ElementTree import parse
# ##################################################################
# 获取地址文件名，例如输入 "./data/test1.jpg" 返回 "test1"            
# ##################################################################
def get_root_file_name(root1):

    s0, s1 = -1, -1
    for i in range(len(root1)):
        if root1[i] == "/":
            s0 = i
        if root1[i] == ".":
            s1 = i
    return root1[s0+1:s1]  

# ##################################################################
# json转xml文件，
# json_file: json文件地址
# xml_root: 转换完xml文件存储地址
# ##################################################################
def json2xml(json_file, xml_root):

    if not json_file.endswith(".json"):
        return
    
    if not os.path.exists(json_file):
        return

    json_data = json.load(open(json_file,"r",encoding="utf-8"))
    file_name = get_root_file_name(json_file)

    with open(xml_root+file_name+".xml", 'w', encoding='utf-8') as xml_f:
        xml_f.write('<annotation>\n')
        xml_f.write(' <folder>' + 'JPEGImages' + '</folder>\n')#训练时我的训练图片是放在JPEGImages下的
        xml_f.write(' <filename>' + file_name + ".jpg" + '</filename>\n')
        xml_f.write(' <source>\n')
        xml_f.write('  <database>The Defect Detection</database>\n')
        xml_f.write('  <annotation>Defect Detection</annotation>\n')
        xml_f.write('  <image>flickr</image>\n')
        xml_f.write('  <flickrid>NULL</flickrid>\n')
        xml_f.write(' </source>\n')
        xml_f.write(' <size>\n')
        xml_f.write('  <width>'+ str(json_data["imageWidth"]) + '</width>\n')
        xml_f.write('  <height>'+ str(json_data["imageHeight"]) + '</height>\n')
        xml_f.write('  <depth>' + str(3) + '</depth>\n')
        xml_f.write(' </size>\n')
        xml_f.write('  <segmented>0</segmented>\n')

        for multi in json_data["shapes"]:
            points = np.array(multi["points"])
            xmin = min(points[:,0])
            xmax = max(points[:,0])
            ymin = min(points[:,1])
            ymax = max(points[:,1])
            label = multi["label"]
            if xmax <= xmin:
                pass
            elif ymax <= ymin:
                pass
            else:
                xml_f.write(' <object>\n')
                # xml.write('  <name>'+json_file["shapes"][0]["label"]+'</name>\n')
                xml_f.write('  <name>'+label+'</name>\n')
                xml_f.write('  <pose>Unspecified</pose>\n')
                xml_f.write('  <truncated>1</truncated>\n')
                xml_f.write('  <difficult>0</difficult>\n')
                xml_f.write('  <bndbox>\n')
                xml_f.write('   <xmin>' + str(xmin) + '</xmin>\n')
                xml_f.write('   <ymin>' + str(ymin) + '</ymin>\n')
                xml_f.write('   <xmax>' + str(xmax) + '</xmax>\n')
                xml_f.write('   <ymax>' + str(ymax) + '</ymax>\n')
                xml_f.write('  </bndbox>\n')
                xml_f.write(' </object>\n')
                # print(label)
        xml_f.write('</annotation>')

# ##################################################################
# xml转json文件
# xml_ file: xml文件地址
# json_root: 转换完json文件存储地址
# ##################################################################
def xml2json(xml_file, json_root):
    file_name = get_root_file_name(xml_file)
    
    if not xml_file.endswith(".xml"):
        return
    
    if not os.path.exists(xml_file):
        return
    
    tree = parse(xml_file)  #获取ElementTree
    root = tree.getroot()   #获取根元素

    for obj in root.iter('size'):
        imageHeight = obj.find('width').text
        imageWidth = obj.find('height').text

    labels, x_0, y_0, x_1, y_1 = [], [], [], [], []

    for obj in root.iter('object'):
        label_now = obj.find('name').text
        labels.append(label_now)

        for pixels in obj.iter('bndbox'):
            x_0.append(pixels.find('xmin').text)
            x_1.append(pixels.find('xmax').text)
            y_0.append(pixels.find('ymin').text)
            y_1.append(pixels.find('ymax').text)

    # print(labels)
    # print(x_0, x_1, y_0, y_1)
    with open(json_root+file_name+".json", 'w', encoding='utf-8') as json_f:
        json_f.write("{\n")

        json_f.write("  \"version\": \"0.3.3\",\n")
        json_f.write("  \"flags\": {},\n")
        json_f.write("  \"shapes\": [\n")
        for i in range(len(labels)):
            
            json_f.write("    {\n")

            json_f.write("      \"label\": \""+labels[i]+"\",\n")
            json_f.write("      \"text\": \"\",\n")
            json_f.write("      \"points\": [\n")

            json_f.write("        [\n")
            json_f.write("          "+str(x_0[i])+",\n")
            json_f.write("          "+str(y_0[i])+"\n")
            json_f.write("        ],\n")
            json_f.write("        [\n")
            json_f.write("          "+str(x_1[i])+",\n")
            json_f.write("          "+str(y_1[i])+"\n")
            json_f.write("        ]\n")
            json_f.write("      ],\n")

            json_f.write("      \"group_id\": null,\n")    
            json_f.write("      \"shape_type\": \"rectangle\",\n")
            json_f.write("      \"flags\": {}\n")  

            json_f.write("    }")
            if i != len(labels)-1:
                json_f.write(",")
            json_f.write("\n")

        json_f.write("  ],\n")
        json_f.write("  \"imagePath\": \""+file_name+".jpg\",\n")
        json_f.write("  \"imageData\": null,\n")
        json_f.write("  \"imageHeight\": "+str(imageHeight)+",\n")
        json_f.write("  \"imageWidth\": "+str(imageWidth)+"\n")

        
        json_f.write("}\n")


# ##################################################################
# 批量json2xml转换，默认删除文件名空格并将图像文件重新复制一份
# json_root: json文件存储地址，只会检索json文件
# jpg_root: 源图像存储地址
# new_xml_root: 生成xml文件存储地址
# new_jpg_root: 复制图像地址，若为""则不复制
# isDelSpace: 是否删除源文件空格，默认为删除，适配VOC数据集标注模式
# ##################################################################
def json2xml_batch(json_root, jpg_root, new_xml_root, new_jpg_root, isDelSpace=True):
    # 容错操作
    if not os.path.exists(json_root) or not os.path.exists(jpg_root): 
        print("No such folder!")
        return 
    if not os.path.exists(new_xml_root):
        os.makedirs(new_xml_root)
    if not os.path.exists(new_jpg_root) and new_jpg_root != "":
        os.makedirs(new_jpg_root)
    
    # 转换xml文件
    print("Start conterting annotations...")
    for root,dirs,files in os.walk(json_root):
        for file in tqdm(files):
            if not file.endswith(".json"):
                continue

            json_file = file
            if isDelSpace:
                file_name = get_root_file_name(file.replace(" ",""))
            else:
                file_name = get_root_file_name(file)
            # print(json_root+json_file)
            json_data = json.load(open(json_root+json_file,"r",encoding="utf-8"))
            

            with open(new_xml_root+file_name+".xml", 'w', encoding='utf-8') as xml_f:
                xml_f.write('<annotation>\n')
                xml_f.write(' <folder>' + 'JPEGImages' + '</folder>\n')#训练时我的训练图片是放在JPEGImages下的
                xml_f.write(' <filename>' + file_name + ".jpg" + '</filename>\n')
                xml_f.write(' <source>\n')
                xml_f.write('  <database>The Defect Detection</database>\n')
                xml_f.write('  <annotation>Defect Detection</annotation>\n')
                xml_f.write('  <image>flickr</image>\n')
                xml_f.write('  <flickrid>NULL</flickrid>\n')
                xml_f.write(' </source>\n')
                xml_f.write(' <size>\n')
                xml_f.write('  <width>'+ str(json_data["imageWidth"]) + '</width>\n')
                xml_f.write('  <height>'+ str(json_data["imageHeight"]) + '</height>\n')
                xml_f.write('  <depth>' + str(3) + '</depth>\n')
                xml_f.write(' </size>\n')
                xml_f.write('  <segmented>0</segmented>\n')

                for multi in json_data["shapes"]:
                    points = np.array(multi["points"])
                    xmin = min(points[:,0])
                    xmax = max(points[:,0])
                    ymin = min(points[:,1])
                    ymax = max(points[:,1])
                    label = multi["label"]
                    if xmax <= xmin:
                        pass
                    elif ymax <= ymin:
                        pass
                    else:
                        xml_f.write(' <object>\n')
                        # xml.write('  <name>'+json_file["shapes"][0]["label"]+'</name>\n')
                        xml_f.write('  <name>'+label+'</name>\n')
                        xml_f.write('  <pose>Unspecified</pose>\n')
                        xml_f.write('  <truncated>1</truncated>\n')
                        xml_f.write('  <difficult>0</difficult>\n')
                        xml_f.write('  <bndbox>\n')
                        xml_f.write('   <xmin>' + str(xmin) + '</xmin>\n')
                        xml_f.write('   <ymin>' + str(ymin) + '</ymin>\n')
                        xml_f.write('   <xmax>' + str(xmax) + '</xmax>\n')
                        xml_f.write('   <ymax>' + str(ymax) + '</ymax>\n')
                        xml_f.write('  </bndbox>\n')
                        xml_f.write(' </object>\n')
                        # print(label)
                xml_f.write('</annotation>')
    print("Conterting annotations done!")
    # 复制图像
    print("Copy images...")
    for root,dirs,files in os.walk(jpg_root):
        for file in tqdm(files):
            if not file.endswith(".jpg"):
                continue

            if isDelSpace:
                copyfile(jpg_root+file, new_jpg_root+file.replace(" ",""))
            else:
                copyfile(jpg_root+file, new_jpg_root+file)

            # print(new_jpg_root+file)
    print("Images copied!")
# ##################################################################
# 批量json2xml转换，默认删除文件名空格并将图像文件重新复制一份
# json_root: json文件存储地址，只会检索json文件
# jpg_root: 源图像存储地址
# new_xml_root: 生成xml文件存储地址
# new_jpg_root: 复制图像地址，若为""则不复制
# ##################################################################
def xml2json_batch(xml_root, jpg_root, new_json_root, new_jpg_root):
    # 容错操作
    if not os.path.exists(xml_root) or not os.path.exists(jpg_root): 
        print("No such folder!")
        return 
    if not os.path.exists(new_json_root):
        os.makedirs(new_json_root)
    if not os.path.exists(new_jpg_root) and new_jpg_root != "":
        os.makedirs(new_jpg_root)
    
    # 转换xml文件
    print("Start conterting annotations...")
    for root,dirs,files in os.walk(xml_root):
        for file in tqdm(files):
            if not file.endswith(".xml"):
                continue
            
            xml2json(xml_root+file, new_json_root)
    print("Conterting annotations done!")
    # 复制图像
    print("Copy images...")
    for root,dirs,files in os.walk(jpg_root):
        for file in tqdm(files):
            if not file.endswith(".jpg"):
                continue
            copyfile(jpg_root+file, new_jpg_root+file)
    print("Images copied!")


if __name__ == "__main__":
	# json批量转xml
    json_root = "./json_file/"
    jpg_root = "./json_file/"
    new_xml_root = "./xml_file/"
    new_jpg_root = "./xml_file/"
    json2xml_batch(json_root, jpg_root, new_xml_root, new_jpg_root)
	
	# xml批量转json
    # xml_root = "./xml_file/"
    # jpg_root = "./xml_file/"
    # new_json_root = "./json_file/"
    # new_jpg_root = "./json_file/"
    # xml2json_batch(xml_root, jpg_root, new_json_root, new_jpg_root)