自学python（3）:python处理各种标注文件的格式转换(json，txt，xml相互转化)

vitalgirl

已于 2022-04-30 15:42:30 修改

阅读量2.2k

点赞数 1

文章标签：深度学习神经网络

于 2021-04-22 14:38:56 首次发布

本文链接：https://blog.csdn.net/qq_34885993/article/details/116013225

版权

#txt转xml

# coding: utf-8
# author: HXY
# 2020-4-17

"""
该脚本用于visdrone数据处理；
将annatations文件夹中的txt标签文件转换为XML文件；
txt标签内容为：
<bbox_left>,<bbox_top>,<bbox_width>,<bbox_height>,<score>,<object_category>,<truncation>,<occlusion>
类别：
ignored regions(0), pedestrian(1),
people(2), bicycle(3), car(4), van(5),
truck(6), tricycle(7), awning-tricycle(8),
bus(9), motor(10), others(11)
"""

import os
import cv2
import time
from xml.dom import minidom

name_dict = {'0': 'ignored regions', '1': 'pedestrian', '2': 'people',
             '3': 'bicycle', '4': 'car', '5': 'van', '6': 'truck',
             '7': 'tricycle', '8': 'awning-tricycle', '9': 'bus',
             '10': 'motor', '11': 'others'}


def transfer_to_xml(pic, txt, file_name):
    xml_save_path = 'F:/bling/data/VisDrone2019-DET-train/Annotations_XML'  # 生成的xml文件存储的文件夹
    if not os.path.exists(xml_save_path):
        os.mkdir(xml_save_path)

    img = cv2.imread(pic)
    img_w = img.shape[1]
    img_h = img.shape[0]
    img_d = img.shape[2]
    doc = minidom.Document()

    annotation = doc.createElement("annotation")
    doc.appendChild(annotation)
    folder = doc.createElement('folder')
    folder.appendChild(doc.createTextNode('visdrone'))
    annotation.appendChild(folder)

    filename = doc.createElement('filename')
    filename.appendChild(doc.createTextNode(file_name))
    annotation.appendChild(filename)

    source = doc.createElement('source')
    database = doc.createElement('database')
    database.appendChild(doc.createTextNode("Unknown"))
    source.appendChild(database)

    annotation.appendChild(source)

    size = doc.createElement('size')
    width = doc.createElement('width')
    width.appendChild(doc.createTextNode(str(img_w)))
    size.appendChild(width)
    height = doc.createElement('height')
    height.appendChild(doc.createTextNode(str(img_h)))
    size.appendChild(height)
    depth = doc.createElement('depth')
    depth.appendChild(doc.createTextNode(str(img_d)))
    size.appendChild(depth)
    annotation.appendChild(size)

    segmented = doc.createElement('segmented')
    segmented.appendChild(doc.createTextNode("0"))
    annotation.appendChild(segmented)

    with open(txt, 'r') as f:
        lines = [f.readlines()]
        for line in lines:
            for boxes in line:
                box = boxes.strip('/n')
                box = box.split(',')
                x_min = box[0]
                y_min = box[1]
                x_max = int(box[0]) + int(box[2])
                y_max = int(box[1]) + int(box[3])
                object_name = name_dict[box[5]]

                # if object_name is 'ignored regions' or 'others':
                #     continue

                object = doc.createElement('object')
                nm = doc.createElement('name')
                nm.appendChild(doc.createTextNode(object_name))
                object.appendChild(nm)
                pose = doc.createElement('pose')
                pose.appendChild(doc.createTextNode("Unspecified"))
                object.appendChild(pose)
                truncated = doc.createElement('truncated')
                truncated.appendChild(doc.createTextNode("1"))
                object.appendChild(truncated)
                difficult = doc.createElement('difficult')
                difficult.appendChild(doc.createTextNode("0"))
                object.appendChild(difficult)
                bndbox = doc.createElement('bndbox')
                xmin = doc.createElement('xmin')
                xmin.appendChild(doc.createTextNode(x_min))
                bndbox.appendChild(xmin)
                ymin = doc.createElement('ymin')
                ymin.appendChild(doc.createTextNode(y_min))
                bndbox.appendChild(ymin)
                xmax = doc.createElement('xmax')
                xmax.appendChild(doc.createTextNode(str(x_max)))
                bndbox.appendChild(xmax)
                ymax = doc.createElement('ymax')
                ymax.appendChild(doc.createTextNode(str(y_max)))
                bndbox.appendChild(ymax)
                object.appendChild(bndbox)
                annotation.appendChild(object)
                with open(os.path.join(xml_save_path, file_name + '.xml'), 'w') as x:
                    x.write(doc.toprettyxml())
                x.close()
    f.close()


if __name__ == '__main__':
    t = time.time()
    print('Transfer .txt to .xml...ing....')
    txt_folder = 'F:/bling/data/VisDrone2019-DET-train/annotations'  # visdrone txt标签文件夹
    txt_file = os.listdir(txt_folder)
    img_folder = 'F:/bling/data/VisDrone2019-DET-train/images'  # visdrone 照片所在文件夹

    for txt in txt_file:
        txt_full_path = os.path.join(txt_folder, txt)
        img_full_path = os.path.join(img_folder, txt.split('.')[0] + '.jpg')

        try:
            transfer_to_xml(img_full_path, txt_full_path, txt.split('.')[0])
        except Exception as e:
            print(e)

    print("Transfer .txt to .XML sucessed. costed: {:.3f}s...".format(time.time() - t))

#xml转txt

import os
import xml.etree.ElementTree as ET

dirpath = 'F:/bling/data/xml/'     #原来存放xml文件的目录
newdir = 'F:/bling/data/txt/'  #修改label后形成的txt目录

if not os.path.exists(newdir):
    os.makedirs(newdir)

for fp in os.listdir(dirpath):

    root = ET.parse(os.path.join(dirpath,fp)).getroot()

    xmin, ymin, xmax, ymax = 0,0,0,0
    sz = root.find('size')
    
    width = float(sz[0].text)
    height = float(sz[1].text)
    filename = root.find('filename').text
    for child in root.findall('object'):         #找到图片中的所有框
        #print(child.find('name').text)
    
        sub = child.find('bndbox')               #找到框的标注值并进行读取
        label = child.find('name').text
        xmin = float(sub[0].text)
        ymin = float(sub[1].text)
        xmax = float(sub[2].text)
        ymax = float(sub[3].text)
        try:                                     #转换成yolov3的标签格式，需要归一化到（0-1）的范围内
            x_center = (xmin + xmax) / (2 * width)
            y_center = (ymin + ymax) / (2 * height)
            w = (xmax - xmin) / width
            h = (ymax - ymin) / height
        except ZeroDivisionError:
            print(filename,'的 width有问题')

        with open(os.path.join(newdir, fp.split('.')[0]+'.txt'), 'a+') as f:
            f.write(' '.join([str(label), str(x_center), str(y_center), str(w), str(h) + '\n']))

print('ok')

#json转xml

import xmltodict
import json
import os
# json to xml
def jsonToXml(json_str):
    try:
        xml_str=""
        xml_str = xmltodict.unparse(json_str, encoding='utf-8')
    except:
        xml_str = xmltodict.unparse({'request': json_str}, encoding='utf-8')
    finally:
        return xml_str

def json_to_xml(json_path,xml_path):
    if(os.path.exists(xml_path)==False):
        os.makedirs(xml_path)
    dir = os.listdir(json_path)
    for file in dir:
        file_list=file.split(".")
        with open(os.path.join(json_path,file), 'r') as load_f:
            load_dict = json.load(load_f)
        json_result = jsonToXml(load_dict)
        f = open(os.path.join(xml_path,file_list[0]+".xml"), 'w', encoding="UTF-8")
        f.write(json_result)
        f.close()
if __name__ == '__main__':
    json_path=r"F:/bling/data/json"  #该目录为存放json文件的路径  ps:目录中只能存放json文件
    xml_path=r"F:/bling/data/train"   #该目录为放xml文件的路径
    json_to_xml(json_path,xml_path)

#xml转json

import cv2
import  xml.etree.ElementTree as ET 
import numpy as np
import os
import json
import shutil
import base64
'''
该脚本实现将xml类型标签(或者yolo格式标签)转为json格式标签
需要的数据：原始图像 原始xml标签（原始txt标签）

'''

# 解析数据集，输入单张图片路径，图片路径不能出现中文，因为是cv2读取的。和对应xml文件的路径
# 返回图片 该图所有的目标框[[x1,y1,x2,y2],....]  每个框的类别[label1, label2, label3,.....]  注意是label而不是索引
def parse_img_label(img_path, xml_path):  # 绝对路径
    img = cv2.imread(img_path)
    tree = ET.parse(xml_path) 
    root = tree.getroot()
    objs = root.findall('object')
    bboxes = []  # 坐标框
    h ,w = img.shape[0], img.shape[1]
    #gt_labels = []  # 标签名
    for obj in objs: # 遍历所有的目标
        label = obj[0].text  # <name>这个tag的值，即标签
        label = label.strip(' ')
        box = [int(obj[4][i].text) for i in range(4)]
        box.append(label)  # box的元素 x1 y1 x2 y2 类别
        bboxes.append(box)
    return img, bboxes

# 该函数用于将yolo的标签转回xml需要的标签。。即将归一化后的坐标转为原始的像素坐标
def convert_yolo_xml(box,img):  # 
    x,y,w,h = box[0], box[1], box[2], box[3]
    # 求出原始的x1 x2 y1 y2
    x2 = (2*x + w)*img.shape[1] /2
    x1 = x2 - w*img.shape[1]

    y2 = (2*y+h)*img.shape[0] /2
    y1 = y2 - h* img.shape[0]
    new_box = [x1,y1, x2, y2]
    new_box = list(map(int,new_box))
    return new_box

# 该函数用于解析yolo格式的数据集，即txt格式的标注 返回图像 边框坐标 真实标签名（不是索引，因此需要预先定义标签）
def parse_img_txt(img_path, txt_path):
    name_label = ['class0','class1','class2']  # 需要自己预先定义,它的顺序要和实际yolo格式的标签中0 1 2 3的标签对应 yolo标签的类别是索引 而不是名字
    img = cv2.imread(img_path)
    f = open(txt_path)
    bboxes = []
    for line in f.readlines():
        line = line.split(" ")
        if len(line) == 5:
            obj_label = name_label[int(line[0])] # 将类别索引转成其名字
            x = float(line[1])
            y = float(line[2])
            w = float(line[3])
            h = float(line[4])
            box = convert_yolo_xml([x,y,w,h], img)
            box.append(obj_label)
            bboxes.append(box)
    return img, bboxes



# 制作labelme格式的标签
# 参数说明 img_name： 图像文件名称 
# txt_name: 标签文件的绝对路径，注意是绝对路径
# prefix： 图像文件的上级目录名。即形如/home/xjzh/data/ 而img_name是其下的文件名，如00001.jpg
# prefix+img_name即为图像的绝对路径。不该路径能出现中文，否则cv2读取会有问题
# 
def get_json(img_name, txt_name, prefix, yolo=False):
    # 图片名 标签名 前缀
    label_dict = {}  # json字典，依次填充它的value 
    label_dict["imagePath"] = prefix + img_name  # 图片路径
    label_dict["fillColor"] = [255,0,0,128]  # 目标区域的填充颜色 RGBA
    label_dict["lineColor"] = [0,255,0,128]  # 线条颜色
    label_dict["flag"] = {}
    label_dict["version"] = "3.16.7"  # 版本号，随便
    with open(prefix + img_name,"rb") as f:
        img_data = f.read()
        base64_data = base64.b64encode(img_data)
        base64_str = str(base64_data, 'utf-8')
        label_dict["imageData"] = base64_str  # labelme的json文件存放了图像的base64编码。这样如果图像路径有问题仍然能够打开文件

    img, gt_box = parse_img_label(prefix + img_name, txt_name) if not yolo else parse_img_txt(prefix + img_name, txt_name)  # 读取真实数据
    
    label_dict["imageHeight"] = img.shape[0]  # 高度
    label_dict["imageWidth"] = img.shape[1]

    shape_list = [] # 存放标注信息的列表，它是 shapes这个键的值。里面是一个列表，每个元素又是一个字典，字典内容是该标注的类型 颜色 坐标点等等
    #label_dict["shapes"] = [] # 列表，每个元素是字典。
    # box的元素 x1 y1 x2 y2 类别
    for box in gt_box:
        shape_dict = {}  # 表示一个目标的字典
        shape_dict["shape_type"] = "rectangle"  # 因为xml或yolo格式标签是矩形框标注，因此是rectangle
        shape_dict["fill_color"] = None  #该类型的填充颜色 
        shape_dict["line_color"] = None  # 线条颜色 可以设置，或者根据标签名自己预先设定labe_color_dict
        shape_dict["flags"] = {}
        shape_dict["label"] = box[-1] # 标签名  
        shape_dict["points"] = [[box[0],box[1]], [box[2], box[3]]] 
        # 通常contours是长度为1的列表，如果有分块，可能就有多个  # [[x1,y1], [x2,y2]...]的列表
        shape_list.append(shape_dict)
    
    label_dict["shapes"] = shape_list  #
    return label_dict

imgs_path = "F:/bling/data/images/"  # 图像路径
xmls_path ="F:/bling/data/xml/" # xml文件路径

img_path = os.listdir(imgs_path)
out_json = 'F:/bling/data/json/'  # 保存的json文件路径

for nums, path in enumerate(img_path):
    if nums %200==0:
        print(f"processed {nums} images")
    xml_path = xmls_path + path.replace('jpg','xml')  # xml文件的绝对路径
    label_dict = get_json(path, xml_path,prefix=imgs_path)  # 
    with open(out_json + path.replace("jpg","json"),'w') as f: # 写入一个json文件
        f.write(json.dumps(label_dict, ensure_ascii=False, indent=4, separators=(',', ':')))

vitalgirl

关注

1
点赞
踩
21

收藏

觉得还不错? 一键收藏
4
评论
自学python（3）:python处理各种标注文件的格式转换(json，txt，xml相互转化)

#txt转xml# coding: utf-8# author: HXY# 2020-4-17"""该脚本用于visdrone数据处理；将annatations文件夹中的txt标签文件转换为XML文件；txt标签内容为：<bbox_left>,<bbox_top>,<bbox_width>,<bbox_height>,<score>,<object_category>,<truncation>,<
复制链接

扫一扫