如何将自己的数据转换为Pascal voc2017数据集标注格式

最新推荐文章于 2024-07-23 00:03:14 发布

Cowry5

最新推荐文章于 2024-07-23 00:03:14 发布

阅读量3.7k

点赞数

分类专栏：杂文章标签： voc

本文链接：https://blog.csdn.net/Cowry5/article/details/80773058

版权

杂专栏收录该内容

5 篇文章 0 订阅

订阅专栏

首先我自己的数据集格式为filename lable xmin ymin xmax ymax，如果一张图片有两个bbox，是另起一行的，按前面这个格式，不是接着写在同一行的。

方法1 通过别的模板转换

VOC数据集的xml格式为：

<annotation>
  <folder>VOC2007</folder>
  <filename>000002.jpg</filename>
  <size>
    <width>335</width>
    <height>500</height>
    <depth>3</depth>
  </size>
  <object>
    <name>cat</name>
    <pose>Unspecified</pose>
    <truncated>0</truncated>
    <difficult>0</difficult>
    <bndbox>
      <xmin>139</xmin>
      <ymin>200</ymin>
      <xmax>207</xmax>
      <ymax>301</ymax>
    </bndbox>
  </object>
</annotation>

所以我们把它当作模板然后修改成自己的数据集。注意object 可能不止一个。

转换代码：

"""conver to xml"""
import copy
from lxml.etree import Element, SubElement, tostring, ElementTree
import cv2

# 修改为你自己的路径
template_file = 'anno.xml'
target_dir = 'Annotations/'
image_dir = 'train/'  # 图片文件夹
train_file = 'train.txt'  # 存储了图片信息的txt文件

with open(train_file) as f:
    trainfiles = f.readlines()  # 标注数据 格式(filename label x_min y_min x_max y_max)

file_names = []

for line in trainfiles:
    trainFile = line.split()
    file_name = trainFile[0]

    # 如果没有重复，则顺利进行。这给的数据集一张图片的多个框没有写在一起，我艹。
    if file_name not in file_names:
        file_names.append(file_name)
        lable = trainFile[1]
        xmin = trainFile[2]
        ymin = trainFile[3]
        xmax = trainFile[4]
        ymax = trainFile[5]

        tree = ElementTree()
        tree.parse(template_file)
        root = tree.getroot()

        # filename
        root.find('filename').text = file_name

        # size
        sz = root.find('size')
        im = cv2.imread(image_dir + file_name)
        sz.find('height').text = str(im.shape[0])
        sz.find('width').text = str(im.shape[1])
        sz.find('depth').text = str(im.shape[2])

        # object 因为我的数据集都只有一个框
        obj = root.find('object')

        obj.find('name').text = lable
        bb = obj.find('bndbox')
        bb.find('xmin').text = xmin
        bb.find('ymin').text = ymin
        bb.find('xmax').text = xmax
        bb.find('ymax').text = ymax

    # 如果重复，则需要添加object框
    else:
        lable = trainFile[1]
        xmin = trainFile[2]
        ymin = trainFile[3]
        xmax = trainFile[4]
        ymax = trainFile[5]

        obj_ori = root.find('object')

        obj = copy.deepcopy(obj_ori)  # 注意这里深拷贝

        obj.find('name').text = lable
        bb = obj.find('bndbox')
        bb.find('xmin').text = xmin
        bb.find('ymin').text = ymin
        bb.find('xmax').text = xmax
        bb.find('ymax').text = ymax
        root.append(obj)

    xml_file = file_name.replace('jpg', 'xml')

    tree.write(target_dir + xml_file, encoding='utf-8')

方法2 直接生成

def save_xml(image_name, bbox, save_dir='./VOC2007/Annotations', width=1609, height=500, channel=3):
      '''
    :param image_name:图片名
    :param bbox:对应的bbox
    :param save_dir:
    :param width:这个是图片的宽度，博主使用的数据集是固定的大小的，所以设置默认
    :param height:这个是图片的高度，博主使用的数据集是固定的大小的，所以设置默认
    :param channel:这个是图片的通道，博主使用的数据集是固定的大小的，所以设置默认
    :return:
    '''
    from lxml.etree import Element, SubElement, tostring
    from xml.dom.minidom import parseString

    node_root = Element('annotation')

    node_folder = SubElement(node_root, 'folder')
    node_folder.text = 'JPEGImages'

    node_filename = SubElement(node_root, 'filename')
    node_filename.text = image_name

    node_size = SubElement(node_root, 'size')
    node_width = SubElement(node_size, 'width')
    node_width.text = '%s' % width

    node_height = SubElement(node_size, 'height')
    node_height.text = '%s' % height

    node_depth = SubElement(node_size, 'depth')
    node_depth.text = '%s' % channel

    for x, y, w, h in bbox:
        left, top, right, bottom = x, y, x + w, y + h
        node_object = SubElement(node_root, 'object')
        node_name = SubElement(node_object, 'name')
        node_name.text = 'car'
        node_difficult = SubElement(node_object, 'difficult')
        node_difficult.text = '0'
        node_bndbox = SubElement(node_object, 'bndbox')
        node_xmin = SubElement(node_bndbox, 'xmin')
        node_xmin.text = '%s' % left
        node_ymin = SubElement(node_bndbox, 'ymin')
        node_ymin.text = '%s' % top
        node_xmax = SubElement(node_bndbox, 'xmax')
        node_xmax.text = '%s' % right
        node_ymax = SubElement(node_bndbox, 'ymax')
        node_ymax.text = '%s' % bottom

    xml = tostring(node_root, pretty_print=True)  
    dom = parseString(xml)

    save_xml = os.path.join(save_dir, image_name.replace('jpg', 'xml'))
    with open(save_xml, 'wb') as f:
        f.write(xml)

    return


def change2xml(label_dict={}):
    for image in label_dict.keys():
        image_name = os.path.split(image)[-1]
        bbox = label_dict.get(image, [])
        save_xml(image_name, bbox)
    return


if __name__ == '__main__':
    # step 2
    # make_voc_dir()

    # step 3
    # label_dict = utils.read_csv(csv_path=r'./train_b.csv',
    #                             pre_dir=r'/home/matthew/dataset')
    # rename_image(label_dict)

    # step 3
    label_dict = utils.read_csv(csv_path=r'./rename_train_b.csv',
                                pre_dir=r'/home/matthew/VOC2007/JPEGImages')
    change2xml(label_dict)

参考链接：