python将FLIR数据集中json标签转换成xml文件--仅限目标检测

1.新建一个json文件,从已有的json文件中提取需要用到的类别,比如在下文代码中,提取了四个类别包括person、car、bus、truck等,代码如下所示

import json

'''
    挑选出想要的类别
'''

className = {
    1: 'person', 
    3: 'car',
    6: 'bus',
    8: 'truck',
}

# coco数据集标签的路径
coco_json_path = 'C:\\Users\\Administrator\\Desktop\\FLIR_ADAS_1_3\\train\\thermal_annotations.json'
# 保存为新的标签(只含有想要的类别)
save_path = "C:\\Users\\Administrator\\Desktop\\FLIR_ADAS_1_3\\train\\COCO_train.json"

classNum = [1, 3, 6, 8]

def writeNum(Num):
    with open(save_path, "a+") as f:
        f.write(str(Num))

inputfile = []
inner = {}
# 向test.json文件写入内容
with open(coco_json_path, "r+") as f:
    allData = json.load(f)
    data = allData["annotations"]
    print(data[1])
    print("read ready")

for i in data:
    if i['category_id'] in classNum:
        inner = {
            "filename": str(int(i["image_id"]) + 1).zfill(5),  # 文件名长度为6 比如 001265
            "name": className[i["category_id"]],
            "bndbox": i["bbox"]
        }
        inputfile.append(inner)
inputfile = json.dumps(inputfile)
writeNum(inputfile)

2.将训练图片中包含了四种类别的图片提取出来,存放在新的文件夹

import json
import os

nameStr = []

"""
    推荐备份一下COCO数据集,方便以后使用,我这里备份了COCO数据集,
    这个py文件的作用就是把COCO数据中,不想要的图片删掉,只留想要的图片。
    (这里想要的图片是指step1中选的类别图片)
"""
# COCO_train.json 刚才保存的想要的类别
with open("C:\\Users\\Administrator\\Desktop\\FLIR_ADAS_1_3\\train\\COCO_train.json", "r+") as f:
    data = json.load(f)
    print("read ready")

for i in data:
    imgName = "FLIR_" + str(i["filename"]) + ".jpeg"
    nameStr.append(imgName)

nameStr = set(nameStr)

# 你们保存的MSCOCO数据集的路径(图片文件路径)
path = "C:\\Users\\Administrator\\Desktop\\FLIR_ADAS_1_3\\train\\train_jpg_file\\"

for file in os.listdir(path):
    if (file not in nameStr): os.remove(path + file)

3.将图片和json文件一一对应生成xml文件

# -*- coding:utf-8-*-

import xml.dom
import xml.dom.minidom
import os
# from PIL import Image
import cv2
import json

# xml文件规范定义


_IMAGE_PATH = 'C:\\Users\\Administrator\\Desktop\\FLIR_ADAS_1_3\\train'

_INDENT = '' * 4
_NEW_LINE = '\n'
_FOLDER_NODE = 'traindata'
_ROOT_NODE = 'annotation'
_DATABASE_NAME = 'traindata'
_ANNOTATION = 'traindata'
# = 'SyGoing_CSDN'
_SEGMENTED = '0'
_DIFFICULT = '0'
_TRUNCATED = '0'
_POSE = 'Unspecified'

# _IMAGE_COPY_PATH= 'JPEGImages'
_ANNOTATION_SAVE_PATH = 'C:\\Users\\Administrator\\Desktop\\FLIR_ADAS_1_3\\train\Annotations\\'


# _IMAGE_CHANNEL= 3

# 封装创建节点的过程
def createElementNode(doc, tag, attr):  # 创建一个元素节点
    element_node = doc.createElement(tag)

    # 创建一个文本节点
    text_node = doc.createTextNode(attr)

    # 将文本节点作为元素节点的子节点
    element_node.appendChild(text_node)

    return element_node


# 封装添加一个子节点
def createChildNode(doc, tag, attr, parent_node):
    child_node = createElementNode(doc, tag, attr)

    parent_node.appendChild(child_node)


# object节点比较特殊
def createObjectNode(doc, attrs):
    object_node = doc.createElement('object')

    midname = attrs['name']

    if midname != 'person':
        midname = 'car'

    createChildNode(doc, 'name', midname,
                    object_node)

    # createChildNode(doc, 'name',attrs['name'],
    #                object_node)

    createChildNode(doc, 'pose',
                    _POSE, object_node)

    createChildNode(doc, 'truncated',
                    _TRUNCATED, object_node)

    createChildNode(doc, 'difficult',
                    _DIFFICULT, object_node)

    bndbox_node = doc.createElement('bndbox')

    createChildNode(doc, 'xmin', str(int(attrs['bndbox'][0])),
                    bndbox_node)

    createChildNode(doc, 'ymin', str(int(attrs['bndbox'][1])),
                    bndbox_node)

    createChildNode(doc, 'xmax', str(int(attrs['bndbox'][0] + attrs['bndbox'][2])),
                    bndbox_node)

    createChildNode(doc, 'ymax', str(int(attrs['bndbox'][1] + attrs['bndbox'][3])),
                    bndbox_node)

    object_node.appendChild(bndbox_node)

    return object_node


# 将documentElement写入XML文件
def writeXMLFile(doc, filename):
    tmpfile = open('tmp.xml', 'w')

    doc.writexml(tmpfile, addindent='' * 4, newl='\n', encoding='utf-8')

    tmpfile.close()

    # 删除第一行默认添加的标记

    fin = open('tmp.xml')
    # print(filename)
    fout = open(filename, 'w')
    # print(os.path.dirname(fout))

    lines = fin.readlines()

    for line in lines[1:]:

        if line.split():
            fout.writelines(line)

            # new_lines =''.join(lines[1:])

        # fout.write(new_lines)

    fin.close()

    fout.close()


if __name__ == "__main__":
    ##读取图片列表
    img_path = "C:\\Users\\Administrator\\Desktop\\FLIR_ADAS_1_3\\train\\traindata\\"
    fileList = os.listdir(img_path)
    if fileList == 0:
        os._exit(-1)

    f=open("C:\\Users\\Administrator\\Desktop\\FLIR_ADAS_1_3\\train\\annptations.json", "r")
    ann_data = json.load(f)

    current_dirpath = os.path.dirname(os.path.abspath('__file__'))

    #if notos.path.exists(_ANNOTATION_SAVE_PATH):
     #   os.mkdir(_ANNOTATION_SAVE_PATH)

        # if notos.path.exists(_IMAGE_COPY_PATH):
    #    os.mkdir(_IMAGE_COPY_PATH)

    for imageName in fileList:
        print(imageName)
        saveName = imageName.strip(".jpeg")
        print(saveName)

        xml_file_name = os.path.join(_ANNOTATION_SAVE_PATH, (saveName + '.xml'))

        img = cv2.imread(os.path.join(img_path, imageName))
        print(os.path.join(img_path, imageName))
        # cv2.imshow(img)
        height, width, channel = img.shape
        print(height, width, channel)



        my_dom = xml.dom.getDOMImplementation()

        doc = my_dom.createDocument(None, _ROOT_NODE, None)

        # 获得根节点
        root_node = doc.documentElement

        # folder节点

        createChildNode(doc, 'folder', _FOLDER_NODE, root_node)

        # filename节点

        createChildNode(doc, 'filename', saveName + '.jpg', root_node)

        # source节点

        source_node = doc.createElement('source')

        # source的子节点

        createChildNode(doc, 'database', _DATABASE_NAME, source_node)

        createChildNode(doc, 'annotation', _ANNOTATION, source_node)

        createChildNode(doc, 'image', 'flickr', source_node)

        createChildNode(doc, 'flickrid', 'NULL', source_node)

        root_node.appendChild(source_node)

        # owner节点

        owner_node = doc.createElement('owner')

        # owner的子节点

        createChildNode(doc, 'flickrid', 'NULL', owner_node)

        #createChildNode(doc, 'name', _AUTHOR, owner_node)

        root_node.appendChild(owner_node)

        # size节点

        size_node = doc.createElement('size')

        createChildNode(doc, 'width', str(width), size_node)

        createChildNode(doc, 'height', str(height), size_node)

        createChildNode(doc, 'depth', str(channel), size_node)

        root_node.appendChild(size_node)

        # segmented节点

        createChildNode(doc, 'segmented', _SEGMENTED, root_node)

        for ann in ann_data:
            imgName = "FLIR_" + str(ann["filename"])
            cname = saveName;
            if (saveName == imgName):
                # object节点
                object_node = createObjectNode(doc, ann)
                root_node.appendChild(object_node)
            else:
                continue

            # 构建XML文件名称

        print(xml_file_name)
        writeXMLFile(doc, xml_file_name)

# 创建XML文件

# createXMLFile(attrs, width,height, xml_file_name)

# # 写入文件
#

 

  • 1
    点赞
  • 18
    收藏
    觉得还不错? 一键收藏
  • 11
    评论
评论 11
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值