训练数据转换为PASCAL VOC2007

数据

我们已有标注数据个数为json保存的,现在训练代码使用的标注格式为PASCAL VOC2007,为了不修改代码,将数据转换到PASCAL VOC2007的xml格式。

转换代码

import os
import json
from lxml.etree import Element, SubElement, tostring, ElementTree
from xml.dom import minidom
# 从文件夹下获取json文件名
def ReadFileDir(path):
    file = []
    dirs = os.listdir(path)                    # 获取指定路径下的文件
    for i in dirs:                             # 循环读取路径下的文件并筛选输出
        if os.path.splitext(i)[1] == ".json":   # 筛选json文件
            file.append(i)
    return file
# 加载json文件
def ReadJson(path):
    with open(path,'r') as load_f:
        load_dict = json.load(load_f)
    return load_dict
# xml创建一个子项
def subElement(root, tag, text):
    ele = SubElement(root, tag)
    if text != "":
        ele.text = text
    return ele
# 保存为xml文件,并格式化
def saveXML(root, filename, indent="\t", newl="\n", encoding="utf-8"):
    rawText = tostring(root)
    dom = minidom.parseString(rawText)
    with open(filename, 'w') as f:
        dom.writexml(f, "", indent, newl, encoding)
# 基础信息保存在xml中
def make_xml(image_name, width, height):
    node_root = Element('annotation')
    subElement(node_root, "folder", "widerface")
    subElement(node_root, "filename", image_name)
    node_source = subElement(node_root, "source", "")
    subElement(node_source, "database", "wider face Database")
    subElement(node_source, "annotation", "PASCAL VOC2007")
    subElement(node_source, "image", "flickr")
    subElement(node_source, "flickrid", "-1")
    node_owner = subElement(node_root, "owner", "")
    subElement(node_owner, "flickrid", "yanyu")
    subElement(node_owner, "name", "yanyu")
    subElement(node_root, "segmented", "0")
    node_size = subElement(node_root, "size", "")
    subElement(node_size, "width", str(width))
    subElement(node_size, "height", str(height))
    subElement(node_size, "depth", "3")
    return node_root
if __name__ == "__main__":
    path = '/home/q/train/Data/images/hxlx'
    save_xml_dir = 'xml'
    jsonfile = ReadFileDir(path)

    for i, v in enumerate(jsonfile):
        dict = ReadJson(os.path.join(path, v))

        if 'photo_id' in dict:
            image_id = dict['photo_id']
            image_name = image_id + ".jpg"
        else:
            continue
        if 'width' in dict:
            width = dict['width']
        else:
            continue
        if 'height' in dict:
            height = dict['height']
        else:
            continue
        
        node_root = make_xml(image_name, width, height)

        if 'croppers' in dict:
            croppers = dict['croppers']
            for i, crop_dict in enumerate(croppers):
                print (crop_dict)

                xmin = int(crop_dict['x'] * width)
                ymin = int(crop_dict['y'] * height)
                xmax = int((crop_dict['x'] + crop_dict['width']) * width)
                ymax = int((crop_dict['x'] + crop_dict['height']) * height)

                node_object = subElement(node_root, "object", "")
                subElement(node_object, "name", crop_dict["cropper_type"])
                subElement(node_object, "pose", 'Unspecified')
                subElement(node_object, "truncated", '1')
                subElement(node_object, "difficult", '0')
                node_bndbox = subElement(node_object, "bndbox", "")
                subElement(node_bndbox, "xmin", str(xmin))
                subElement(node_bndbox, "ymin", str(ymin))
                subElement(node_bndbox, "xmax", str(xmax))
                subElement(node_bndbox, "ymax", str(ymax))
                subElement(node_object, "has_lm", '0')

        
        # 保存xml文件
        saveXML(node_root, os.path.join(save_xml_dir, image_id + ".xml"))

具体使用根据自己的数据来添加

创建img_list.txt

import os

def ReadFileDir(path, pattern=".json"):
    file = []
    pfile = []
    dirs = os.listdir(path)
    for i in dirs:
        if os.path.splitext(i)[1] == pattern:
            file.append(i)
            pfile.append(os.path.join(path, i))
    return file, pfile

def RemoveFile(path):
    if os.path.exists(path):
        os.remove(path)
    else:
        print('no such file:%s' % path)

if __name__ == '__main__':
    path = 'hxlx'
    _, pfile = ReadFileDir(path, '.xml')

    path = 'image_list.txt'
    RemoveFile(path)
    f = open(path, 'a')
    for i, file in enumerate(pfile):
        line = file.split('.')[0]+'.jpg ' + file
        f.write("{}\n".format(line))
    f.close()
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值