首先我自己的数据集格式为filename lable xmin ymin xmax ymax
,如果一张图片有两个bbox,是另起一行的,按前面这个格式,不是接着写在同一行的。
方法1 通过别的模板转换
VOC数据集的xml格式为:
<annotation>
<folder>VOC2007</folder>
<filename>000002.jpg</filename>
<size>
<width>335</width>
<height>500</height>
<depth>3</depth>
</size>
<object>
<name>cat</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>139</xmin>
<ymin>200</ymin>
<xmax>207</xmax>
<ymax>301</ymax>
</bndbox>
</object>
</annotation>
所以我们把它当作模板然后修改成自己的数据集。注意object
可能不止一个。
转换代码:
"""conver to xml"""
import copy
from lxml.etree import Element, SubElement, tostring, ElementTree
import cv2
# 修改为你自己的路径
template_file = 'anno.xml'
target_dir = 'Annotations/'
image_dir = 'train/' # 图片文件夹
train_file = 'train.txt' # 存储了图片信息的txt文件
with open(train_file) as f:
trainfiles = f.readlines() # 标注数据 格式(filename label x_min y_min x_max y_max)
file_names = []
for line in trainfiles:
trainFile = line.split()
file_name = trainFile[0]
# 如果没有重复,则顺利进行。这给的数据集一张图片的多个框没有写在一起,我艹。
if file_name not in file_names:
file_names.append(file_name)
lable = trainFile[1]
xmin = trainFile[2]
ymin = trainFile[3]
xmax = trainFile[4]
ymax = trainFile[5]
tree = ElementTree()
tree.parse(template_file)
root = tree.getroot()
# filename
root.find('filename').text = file_name
# size
sz = root.find('size')
im = cv2.imread(image_dir + file_name)
sz.find('height').text = str(im.shape[0])
sz.find('width').text = str(im.shape[1])
sz.find('depth').text = str(im.shape[2])
# object 因为我的数据集都只有一个框
obj = root.find('object')
obj.find('name').text = lable
bb = obj.find('bndbox')
bb.find('xmin').text = xmin
bb.find('ymin').text = ymin
bb.find('xmax').text = xmax
bb.find('ymax').text = ymax
# 如果重复,则需要添加object框
else:
lable = trainFile[1]
xmin = trainFile[2]
ymin = trainFile[3]
xmax = trainFile[4]
ymax = trainFile[5]
obj_ori = root.find('object')
obj = copy.deepcopy(obj_ori) # 注意这里深拷贝
obj.find('name').text = lable
bb = obj.find('bndbox')
bb.find('xmin').text = xmin
bb.find('ymin').text = ymin
bb.find('xmax').text = xmax
bb.find('ymax').text = ymax
root.append(obj)
xml_file = file_name.replace('jpg', 'xml')
tree.write(target_dir + xml_file, encoding='utf-8')
方法2 直接生成
def save_xml(image_name, bbox, save_dir='./VOC2007/Annotations', width=1609, height=500, channel=3):
'''
:param image_name:图片名
:param bbox:对应的bbox
:param save_dir:
:param width:这个是图片的宽度,博主使用的数据集是固定的大小的,所以设置默认
:param height:这个是图片的高度,博主使用的数据集是固定的大小的,所以设置默认
:param channel:这个是图片的通道,博主使用的数据集是固定的大小的,所以设置默认
:return:
'''
from lxml.etree import Element, SubElement, tostring
from xml.dom.minidom import parseString
node_root = Element('annotation')
node_folder = SubElement(node_root, 'folder')
node_folder.text = 'JPEGImages'
node_filename = SubElement(node_root, 'filename')
node_filename.text = image_name
node_size = SubElement(node_root, 'size')
node_width = SubElement(node_size, 'width')
node_width.text = '%s' % width
node_height = SubElement(node_size, 'height')
node_height.text = '%s' % height
node_depth = SubElement(node_size, 'depth')
node_depth.text = '%s' % channel
for x, y, w, h in bbox:
left, top, right, bottom = x, y, x + w, y + h
node_object = SubElement(node_root, 'object')
node_name = SubElement(node_object, 'name')
node_name.text = 'car'
node_difficult = SubElement(node_object, 'difficult')
node_difficult.text = '0'
node_bndbox = SubElement(node_object, 'bndbox')
node_xmin = SubElement(node_bndbox, 'xmin')
node_xmin.text = '%s' % left
node_ymin = SubElement(node_bndbox, 'ymin')
node_ymin.text = '%s' % top
node_xmax = SubElement(node_bndbox, 'xmax')
node_xmax.text = '%s' % right
node_ymax = SubElement(node_bndbox, 'ymax')
node_ymax.text = '%s' % bottom
xml = tostring(node_root, pretty_print=True)
dom = parseString(xml)
save_xml = os.path.join(save_dir, image_name.replace('jpg', 'xml'))
with open(save_xml, 'wb') as f:
f.write(xml)
return
def change2xml(label_dict={}):
for image in label_dict.keys():
image_name = os.path.split(image)[-1]
bbox = label_dict.get(image, [])
save_xml(image_name, bbox)
return
if __name__ == '__main__':
# step 2
# make_voc_dir()
# step 3
# label_dict = utils.read_csv(csv_path=r'./train_b.csv',
# pre_dir=r'/home/matthew/dataset')
# rename_image(label_dict)
# step 3
label_dict = utils.read_csv(csv_path=r'./rename_train_b.csv',
pre_dir=r'/home/matthew/VOC2007/JPEGImages')
change2xml(label_dict)
参考链接: