目标检测框标注格式转换，支持json（createML格式），xml(VOC格式)，txt(yolo格式)三种格式的互相转换，含转换工具和python源码

最新推荐文章于 2024-01-17 17:32:54 发布

子非鱼_tan

最新推荐文章于 2024-01-17 17:32:54 发布

阅读量1.4k

点赞数 1

文章标签： python 目标检测 json xml YOLO

本文链接：https://blog.csdn.net/tangxiangyu0313/article/details/132357483

版权

1 前言

目标检测框标注格式目前常用的有3种，分别是json（createML格式），xml(VOC格式)，txt(yolo格式),这里用python实现3种格式之间的互相转换，并且最后提供了python打包的exe小工具。

2 实现方式

2.1 解码json（createML格式）标注文件

下面是解码json格式的函数，读取json文件，将json文件的标注信息转换为一个字典形式，便于后面的转换工作：

def get_json_info(src_path, src_image_path, classes_name):
    # print(src_path)
    json_info = json.load(open(src_path, "r", encoding="utf-8"))
    for i, bbox in enumerate(json_info["shapes"]):
        if bbox['label'] not in classes_name:
            del json_info["shapes"][i]
    return json_info

2.2 解码xml(VOC格式)标注文件

下面是解码xml格式的函数，读取xml文件，将xml文件的标注信息转换为一个字典形式，便于后面的转换工作：

def get_xml_info(src_path, src_image_path, classes_name):
    in_file = open(src_path, "r", encoding='utf-8')
    tree = ET.parse(in_file)
    root = tree.getroot()

    xml_info = dict()
    xml_info["shapes"] = list()
    xml_info["imagePath"] = root.find("filename").text

    size = root.find("size")
    if int(size.find("height").text) == 0 or int(size.find("width").text) == 0:
        img = cv2.imdecode(np.fromfile(src_image_path, dtype=np.uint8), -1)
        # cv2.imencode('.jpg', img)[1].tofile(path)
        xml_info["imageHeight"], xml_info["imageWidth"], _ = img.shape
    else:
        xml_info["imageHeight"] = int(size.find("height").text)
        xml_info["imageWidth"] = int(size.find("width").text)
    if int(xml_info["imageHeight"]) == 0 or int(xml_info["imageWidth"]) == 0:
        print(src_path)
    for obj in root.iter("object"):
        if obj.find('name').text not in classes_name:
        # if obj.find('name').text not in classes_name.keys():
            continue
        bbox_shape = dict()

        bbox_shape["label"] = obj.find('name').text
        # bbox_shape["label"] =classes_name[obj.find('name').text]
        bndbox = obj.find("bndbox")
        xmin = float(bndbox.find("xmin").text)
        xmax = float(bndbox.find("xmax").text)
        ymin = float(bndbox.find("ymin").text)
        ymax = float(bndbox.find("ymax").text)


        bbox_shape["points"] = [[xmax, ymax], [xmin, ymin]]
        xml_info["shapes"].append(bbox_shape)
    return xml_info

2.3 解码txt(yolo格式)标注文件

下面是解码txt格式的函数，读取txt文件，将txt文件的标注信息转换为一个字典形式，便于后面的转换工作：

def get_txt_info(src_path, src_image_path, classes_name):
    with open(src_path, "r", encoding="utf-8") as f:
        lines = list(map(lambda x: x[:-1].split(" "), f.readlines()))

    txt_info = dict()
    txt_info["shapes"] = list()

    txt_info["imagePath"] = os.path.split(src_image_path)[-1]

    if os.path.isfile(src_image_path):
        img = cv2.imdecode(np.fromfile(src_image_path, dtype=np.uint8), -1)
        # cv2.imencode('.' + os.path.split(src_path)[-1], img)[1].tofile(tar_path)
        txt_info["imageHeight"], txt_info["imageWidth"], _ = img.shape
    else:
        print("图片{}不存在，需要根据图片获取图片高宽".format(src_image_path))
        # exit()
    for line in lines:
        bbox_shape = dict()

        bbox_shape["label"] = classes_name[int(line[0])]

        # print(line)
        cx = float(line[1]) * txt_info["imageWidth"]
        cy = float(line[2]) * txt_info["imageHeight"]
        w = float(line[3]) * txt_info["imageWidth"]
        h = float(line[4]) * txt_info["imageHeight"]

        xmin = cx - w / 2
        xmax = cx + w / 2
        ymin = cy - h / 2
        ymax = cy + h / 2
        # print([xmax, xmin, ymax, ymin])
        bbox_shape["points"] = [[xmax, ymax], [xmin, ymin]]
        
        txt_info["shapes"].append(bbox_shape)
    return txt_info

2.4 编码json（createML格式）标注文件

下面是编码json格式的函数，将上述解码成字典形式的标注信息编码成json（createML格式）标注文件。

def to_json(target_path, info,classes_name=[]):
    json_info = dict()
    json_info["version"] = "4.5.10"
    json_info["flags"] = dict()
    json_info["shapes"] = list()


    for bbox in info["shapes"]:
        json_bbox = dict()
        json_bbox["label"] = bbox["label"]
        json_bbox["points"] = bbox["points"]
        json_bbox["group_id"] = None
        json_bbox["shape_type"] = "rectangle"
        json_bbox["flags"] = dict()
        json_info["shapes"].append(json_bbox)
    json_info["imagePath"] = info["imagePath"]
    json_info["imageData"] = None
    json_info["imageHeight"] = info["imageHeight"]
    json_info["imageWidth"] = info["imageWidth"]
    # print(json_info)
    with open(target_path, "w") as f:
        f.write(json.dumps(json_info, indent=2, separators=(',', ': ')))
    return

2.5 编码xml(VOC格式)标注文件

下面是编码xml格式的函数，将上述解码成字典形式的标注信息编码成xml(VOC格式)标注文件。

def to_xml(target_path, info, classes_name=[]):
    annotation = ET.Element("annotation")


    tree = ET.ElementTree(annotation)


    folder = ET.SubElement(annotation, 'folder')
    folder.text = 'images'


    filename = ET.SubElement(annotation, 'filename')
    filename.text = info["imagePath"]


    size = ET.SubElement(annotation, 'size')


    width = ET.SubElement(size, "width")
    width.text = str(info["imageWidth"])


    height = ET.SubElement(size, "height")
    height.text = str(info["imageHeight"])
    depth = ET.SubElement(size, "depth")
    depth.text = str(3)


    for bbox_shape in info["shapes"]:  # 有多个框


        points = bbox_shape["points"]
        points = np.array(points)
        label = bbox_shape["label"]
        # print(label)
        xmin_value = min(points[:, 0])
        xmax_value = max(points[:, 0])
        ymin_value = min(points[:, 1])
        ymax_value = max(points[:, 1])
        # print(xmin_value)
        if xmax_value <= xmin_value:
            pass
        elif ymax_value <= ymin_value:
            pass
        else:
            object = ET.SubElement(annotation, "object")
            name = ET.SubElement(object, "name")
            name.text = label


            pose = ET.SubElement(object, "pose")
            pose.text = "Unspecified"


            truncated = ET.SubElement(object, "truncated")
            truncated.text = str(0)


            difficult = ET.SubElement(object, "difficult")
            difficult.text = str(0)


            bndbox = ET.SubElement(object, "bndbox")


            xmin = ET.SubElement(bndbox, "xmin")
            xmin.text = str(int(xmin_value))
            xmax = ET.SubElement(bndbox, "xmax")
            xmax.text = str(int(xmax_value))
            ymin = ET.SubElement(bndbox, "ymin")
            ymin.text = str(int(ymin_value))
            ymax = ET.SubElement(bndbox, "ymax")
            ymax.text = str(int(ymax_value))


    pretty_xml(annotation, '\t', '\n')
    tree.write(target_path, encoding="utf-8", xml_declaration=True)
    
    return

def pretty_xml(element, indent, newline, level=0):  #
    if element: 
        if (element.text is None) or element.text.isspace():  
            element.text = newline + indent * (level + 1)
        else:
            element.text = newline + indent * (level + 1) + element.text.strip() + newline + indent * (level + 1)
          
    temp = list(element)  
    for subelement in temp:
        if temp.index(subelement) < (len(temp) - 1): 
            subelement.tail = newline + indent * (level + 1)
        else: 
            subelement.tail = newline + indent * level
        pretty_xml(subelement, indent, newline, level=level + 1)

2.6 编码txt(yolo格式)标注文件

下面是编码txt格式的函数，将上述解码成字典形式的标注信息编码成txt(yolo格式)标注文件。

def to_txt(target_path, info, classes_name=[]):
    if classes_name == []:
        print("classes_name不能为空，需要传入classes_name参数")
        return
    with open(target_path, "w") as f:
        for bbox in info["shapes"]:
            class_id = str(classes_name.index(bbox["label"]))


            points = np.array(bbox["points"])


            xmin = min(points[:, 0])
            xmax = max(points[:, 0])
            ymin = min(points[:, 1])
            ymax = max(points[:, 1])


            dw = 1.0 / info["imageWidth"]
            dh = 1.0 / info["imageHeight"]


            cx = (xmax + xmin) / 2.0
            cy = (ymax + ymin) / 2.0
            w = xmax - xmin
            h = ymax - ymin


            cx = str(cx * dw)
            cy = str(cy * dh)
            w = str(w * dw)
            h = str(h * dh)

            f.write(class_id + " " + cx + " " + cy + " " + w + " " + h + "\n")

3. 总代码

上述编码解码文件写好后，剩下的就是使用这些函数，完成转换了，下面就使用这些函数的一个总代码。

import os


import math
import numpy as np


import cv2


import shutil


import json
from xml.etree import ElementTree as ET




def pretty_xml(element, indent, newline, level=0):  #
    if element: 
        if (element.text is None) or element.text.isspace():  
            element.text = newline + indent * (level + 1)
        else:
            element.text = newline + indent * (level + 1) + element.text.strip() + newline + indent * (level + 1)
          
    temp = list(element)  
    for subelement in temp:
        if temp.index(subelement) < (len(temp) - 1): 
            subelement.tail = newline + indent * (level + 1)
        else: 
            subelement.tail = newline + indent * level
        pretty_xml(subelement, indent, newline, level=level + 1) 




def get_json_info(src_path, src_image_path, classes_name):
    # print(src_path)


    json_info = json.load(open(src_path, "r", encoding="utf-8"))


    for i, bbox in enumerate(json_info["shapes"]):
        if bbox['label'] not in classes_name:
            del json_info["shapes"][i]


    return json_info




def get_txt_info(src_path, src_image_path, classes_name):
    with open(src_path, "r", encoding="utf-8") as f:
        lines = list(map(lambda x: x[:-1].split(" "), f.readlines()))

    txt_info = dict()
    txt_info["shapes"] = list()

    txt_info["imagePath"] = os.path.split(src_image_path)[-1]

    if os.path.isfile(src_image_path):
        img = cv2.imdecode(np.fromfile(src_image_path, dtype=np.uint8), -1)
        # cv2.imencode('.' + os.path.split(src_path)[-1], img)[1].tofile(tar_path)
        txt_info["imageHeight"], txt_info["imageWidth"], _ = img.shape
    else:
        print("图片{}不存在，需要根据图片获取图片高宽".format(src_image_path))
        # exit()
    for line in lines:
        bbox_shape = dict()

        bbox_shape["label"] = classes_name[int(line[0])]

        # print(line)
        cx = float(line[1]) * txt_info["imageWidth"]
        cy = float(line[2]) * txt_info["imageHeight"]
        w = float(line[3]) * txt_info["imageWidth"]
        h = float(line[4]) * txt_info["imageHeight"]


        xmin = cx - w / 2
        xmax = cx + w / 2
        ymin = cy - h / 2
        ymax = cy + h / 2
        # print([xmax, xmin, ymax, ymin])
        bbox_shape["points"] = [[xmax, ymax], [xmin, ymin]]


        txt_info["shapes"].append(bbox_shape)
    return txt_info




def get_xml_info(src_path, src_image_path, classes_name):
    in_file = open(src_path, "r", encoding='utf-8')
    tree = ET.parse(in_file)
    root = tree.getroot()


    xml_info = dict()
    xml_info["shapes"] = list()
    xml_info["imagePath"] = root.find("filename").text


    size = root.find("size")
    if int(size.find("height").text) == 0 or int(size.find("width").text) == 0:
        img = cv2.imdecode(np.fromfile(src_image_path, dtype=np.uint8), -1)
        # cv2.imencode('.jpg', img)[1].tofile(path)
        xml_info["imageHeight"], xml_info["imageWidth"], _ = img.shape
    else:
        xml_info["imageHeight"] = int(size.find("height").text)
        xml_info["imageWidth"] = int(size.find("width").text)
    if int(xml_info["imageHeight"]) == 0 or int(xml_info["imageWidth"]) == 0:
        print(src_path)
    for obj in root.iter("object"):
        if obj.find('name').text not in classes_name:
        # if obj.find('name').text not in classes_name.keys():
            continue
        bbox_shape = dict()
        

        bbox_shape["label"] = obj.find('name').text
        # bbox_shape["label"] =classes_name[obj.find('name').text]
        bndbox = obj.find("bndbox")
        xmin = float(bndbox.find("xmin").text)
        xmax = float(bndbox.find("xmax").text)
        ymin = float(bndbox.find("ymin").text)
        ymax = float(bndbox.find("ymax").text)


        bbox_shape["points"] = [[xmax, ymax], [xmin, ymin]]
        xml_info["shapes"].append(bbox_shape)
    return xml_info




def to_json(target_path, info,classes_name=[]):
    json_info = dict()
    json_info["version"] = "4.5.10"
    json_info["flags"] = dict()
    json_info["shapes"] = list()


    for bbox in info["shapes"]:
        json_bbox = dict()
        json_bbox["label"] = bbox["label"]
        json_bbox["points"] = bbox["points"]
        json_bbox["group_id"] = None
        json_bbox["shape_type"] = "rectangle"
        json_bbox["flags"] = dict()
        json_info["shapes"].append(json_bbox)
    json_info["imagePath"] = info["imagePath"]
    json_info["imageData"] = None
    json_info["imageHeight"] = info["imageHeight"]
    json_info["imageWidth"] = info["imageWidth"]
    # print(json_info)
    with open(target_path, "w") as f:
        f.write(json.dumps(json_info, indent=2, separators=(',', ': ')))




def to_xml(target_path, info, classes_name=[]):
    annotation = ET.Element("annotation")


    tree = ET.ElementTree(annotation)


    folder = ET.SubElement(annotation, 'folder')
    folder.text = 'images'


    filename = ET.SubElement(annotation, 'filename')
    filename.text = info["imagePath"]


    size = ET.SubElement(annotation, 'size')


    width = ET.SubElement(size, "width")
    width.text = str(info["imageWidth"])


    height = ET.SubElement(size, "height")
    height.text = str(info["imageHeight"])
    depth = ET.SubElement(size, "depth")
    depth.text = str(3)


    for bbox_shape in info["shapes"]:  # 有多个框


        points = bbox_shape["points"]
        points = np.array(points)
        label = bbox_shape["label"]
        # print(label)
        xmin_value = min(points[:, 0])
        xmax_value = max(points[:, 0])
        ymin_value = min(points[:, 1])
        ymax_value = max(points[:, 1])
        # print(xmin_value)
        if xmax_value <= xmin_value:
            pass
        elif ymax_value <= ymin_value:
            pass
        else:
            object = ET.SubElement(annotation, "object")
            name = ET.SubElement(object, "name")
            name.text = label


            pose = ET.SubElement(object, "pose")
            pose.text = "Unspecified"


            truncated = ET.SubElement(object, "truncated")
            truncated.text = str(0)


            difficult = ET.SubElement(object, "difficult")
            difficult.text = str(0)


            bndbox = ET.SubElement(object, "bndbox")


            xmin = ET.SubElement(bndbox, "xmin")
            xmin.text = str(int(xmin_value))
            xmax = ET.SubElement(bndbox, "xmax")
            xmax.text = str(int(xmax_value))
            ymin = ET.SubElement(bndbox, "ymin")
            ymin.text = str(int(ymin_value))
            ymax = ET.SubElement(bndbox, "ymax")
            ymax.text = str(int(ymax_value))


    pretty_xml(annotation, '\t', '\n')
    tree.write(target_path, encoding="utf-8", xml_declaration=True)
    
    return




def to_txt(target_path, info, classes_name=[]):
    if classes_name == []:
        print("classes_name不能为空，需要传入classes_name参数")
        return
    with open(target_path, "w") as f:
        for bbox in info["shapes"]:
            class_id = str(classes_name.index(bbox["label"]))


            points = np.array(bbox["points"])


            xmin = min(points[:, 0])
            xmax = max(points[:, 0])
            ymin = min(points[:, 1])
            ymax = max(points[:, 1])


            dw = 1.0 / info["imageWidth"]
            dh = 1.0 / info["imageHeight"]


            cx = (xmax + xmin) / 2.0
            cy = (ymax + ymin) / 2.0
            w = xmax - xmin
            h = ymax - ymin


            cx = str(cx * dw)
            cy = str(cy * dh)
            w = str(w * dw)
            h = str(h * dh)


            f.write(class_id + " " + cx + " " + cy + " " + w + " " + h + "\n")

def makedir(path):
    if not os.path.exists(path):
        os.makedirs(path)
    

if __name__ == "__main__":
 
    
    
    
    classes_name = ["smoke", "wire"]
    classes_name.append(classes_name)
    src = r""
    tar = src  
    src_label_format = "txt"
    tar_label_format = "xml"
    
    is_copyimg = False
    is_getnolabelimg =False
    
    for phase in ["train", "val"]:
        
        tar_labels_dir = os.path.join(tar, "labels_" + tar_label_format, phase)
        makedir(tar_labels_dir)
        if is_copyimg:
            tar_images_dir =  os.path.join(tar, "images", phase)
            makedir(tar_images_dir)
        if is_getnolabelimg:
            noLableDir =  os.path.join(tar, "noLabelDir", phase)
            makedir(noLableDir)
 
        for image_name in os.listdir(os.path.join(src, "images", phase)):
            # print(image_name)
            image_path = os.path.join(src, "images", phase, image_name)
            
            src_label_name = os.path.splitext(image_name)[0] + "." + src_label_format
            tar_label_name = os.path.splitext(image_name)[0] + "." + tar_label_format
           
            src_label_path = os.path.join(src, "labels_" + src_label_format, phase, src_label_name)
           
            print(src_label_path)
            if not os.path.exists(src_label_path):
                info = []
            try:
                info = eval("get_{}_info".format(src_label_format))(src_label_path, image_path, classes_name)
                print(info)
            except:
                info = []
            if info != [] and info["shapes"] == []:
                info = []
            print(info)
            if info != []:
                l = [shape["label"] for shape in info["shapes"]]
                if "del" in l:
                    shutil.move(image_path, "./res")
                    shutil.move(src_label_path, "./res_label")
                    continue

                
            if info:
                tar_label_path = os.path.join(tar, "labels_" + tar_label_format, phase, tar_label_name)
                eval("to_{}".format(tar_label_format))(tar_label_path, info, classes_name)

            if is_copyimg:
                if is_getnolabelimg:
                    if info:
                        shutil.copy(image_path, tar_images_dir)
                    else:
                        shutil.copy(image_path, noLableDir)
                else:
                    shutil.copy(image_path, tar_images_dir)