python实现json格式转voc数据集格式

首先来了解一下voc数据集格式:

<annotation>
	<folder>VOC2007</folder>   #图片来源
	<filename>000001.jpg</filename> #图片名称
	<source>                   #图片来源相关信息(不重要)
		<database>The VOC2007 Database</database>
		<annotation>PASCAL VOC2007</annotation>
		<image>flickr</image>
		<flickrid>341012865</flickrid>
	</source>
	<owner>                    #所有者信息
		<flickrid>Fried Camels</flickrid>
		<name>Jinky the Fruit Bat</name>
	</owner>
	<size>    #图像尺寸
		<width>353</width>
		<height>500</height>
		<depth>3</depth>
	</size>
	<segmented>0</segmented> #是否用于分割(在图像物体识别中01无所谓)
	<object>   #标注个体
		<name>dog</name>  #物体类别
		<pose>Unspecified</pose>  #拍摄角度
		<truncated>1</truncated>  #是否被截断(0表示完整)
		<difficult>0</difficult>  #目标是否难以识别(0表示容易识别)
		<bndbox>  #物体bbox位置,以证明为左上角及右下角,详情证明查看本栏目另一篇标点文章
			<xmin>48</xmin>
			<ymin>240</ymin>
			<xmax>195</xmax>
			<ymax>371</ymax>
		</bndbox>
	</object>
</annotation>

再来看一下json的格式:其中label为物体类别,boxes中点位置为左上角及右下角


{
    "shape": [
        {
            "label": "xxx",
            "boxes": [
                xxx,
                xxx,
                xxx,
                xxx
            ],
            "points": null
        }
    ],
    "imagePath": "xxxxxxxxxxxx.jpg"
}

代码:

import json
import os
import cv2

headstr = """\
<annotation>
    <folder>VOC</folder>
    <filename>%s</filename>
    <source>
        <database>Database</database>
    </source>
    <size>
        <width>%d</width>
        <height>%d</height>
        <depth>%d</depth>
    </size>
    <segmented>0</segmented>
"""
objstr = """\
    <object>
        <name>%s</name>
        <pose>Unspecified</pose>
        <truncated>0</truncated>
        <difficult>0</difficult>
        <bndbox>
            <xmin>%d</xmin>
            <ymin>%d</ymin>
            <xmax>%d</xmax>
            <ymax>%d</ymax>
        </bndbox>
    </object>
"""

tailstr = '''\
</annotation>
'''


def write_xml(anno_path, head, objs, tail):
    f = open(anno_path, "w")
    f.write(head)
    for obj in objs:
        f.write(objstr % (obj[0], obj[1], obj[2], obj[3], obj[4]))
    f.write(tail)


def json_to_xml(jpg_path,json_path, xml_path):
    if not os.path.exists(xml_path): #创建xml生成路径
        os.makedirs(xml_path)
    json_dir = os.listdir(json_path) #获取json列表
    jpg_dir = os.listdir(jpg_path)  #获取图片列表
    for file in json_dir:
        file_list = file.split(".")
        with open(os.path.join(json_path, file), 'r') as load_f:
            load_dict = json.load(load_f)

        shape = load_dict["shape"] #通过字典关键字获取内容

        # headstr
        for file_jpg in jpg_dir: #获取图片信息
            jpg_list = file_jpg.split(".")
            if jpg_list[0] == file_list[0]:
                img = cv2.imread("E:/data/pig/train_img/"+file_jpg)
                sp = img.shape # 获取照片信息
                height = sp[0]  # 长
                width = sp[1]  # 宽
                depth = sp[2]  # 通道数
                break
            else:
                continue

        points= shape[0]["points"]
        # print("file_list:",file_list)
        # print("sp:",sp,"\theight:",height,"\twidth:",width,"\tdepth:",depth)

        xml_name = file.replace("json", "xml") #文件名后缀替换
        filename = file.replace("json","jpg")

        head = headstr % (filename, width, height, depth) #向头部添加信息

        # objstr
        dataset = []
        # 依次获取目标信息
        for region in shape:
            # print(region)
            name = region["label"]
            # print(name)
            xmin = region["boxes"][0]
            ymin = region["boxes"][1]
            xmax = region["boxes"][2]
            ymax = region["boxes"][3]
                print("xmin:",xmin,"\tymin:",ymin,"\txmax:",xmax,"\tymax:",ymax)
            dataset.append([name, xmin, ymin, xmax, ymax])
        tail = tailstr

        write_xml(os.path.join(xml_path, xml_name), head, dataset, tail)


if __name__ == '__main__':
    jpg_path = "E:/data/img" # 该目录为存放jpg文件路径
    json_path = "E:/data/json"  # 该目录为存放json文件路径
    xml_path = "E:/data/xml"  # 该目录为放xml文件路径
    json_to_xml(jpg_path,json_path, xml_path)

参考内容:

https://blog.csdn.net/u013832707/article/details/80060327

http://blog.sina.com.cn/s/blog_154a715de0102zrfh.html

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值