文件结构
代码实现
修改为自己的xml数据集和类别,代码执行完毕后会在txt文件夹下生成用于pytorch训练的.txt文件。
import os
import xml.etree.ElementTree as ET
def ConverCoordinate(imgshape, bbox):
# 将xml像素坐标转换为txt归一化后的坐标
xmin, xmax, ymin, ymax = bbox
width = imgshape[0]
height = imgshape[1]
dw = 1. / width
dh = 1. / height
x = (xmin + xmax) / 2.0
y = (ymin + ymax) / 2.0
w = xmax - xmin
h = ymax - ymin
# 归一化
x = x * dw
y = y * dh
w = w * dw
h = h * dh
return (x,y,w,h)
def readxml(image_set, filename):
outfile = open('{}/txt/{}.txt'.format(image_set, filename), 'w')
filetree = ET.parse('{}/Annotations/{}.xml'.format(image_set, filename))
root = filetree.getroot()
size = root.find('size')
width = int(size.find('width').text)
height = int(size.find('height').text)
imgshape = (width, height)
for obj in root.findall('object'):
# 获取类别名,判断是否在classes中,不存在则跳过。
obj_name = obj.find('name').text
if obj_name not in classes:
continue
obj_id = classes.index(obj_name)
# 获取每个obj的bbox框的左上和右下坐标
bbox = obj.find('bndbox')
xmin = float(bbox.find('xmin').text)
xmax = float(bbox.find('xmax').text)
ymin = float(bbox.find('ymin').text)
ymax = float(bbox.find('ymax').text)
bbox_coor = (xmin, xmax, ymin, ymax)
txtvalue = ConverCoordinate(imgshape, bbox_coor)
outfile.write('{}'.format(obj_id) + ' ' + ' '.join([str(i) for i in txtvalue]) + '\n')
if __name__ == '__main__':
# 超参数
image_set = 'train'
classes = ['person', 'root']
# 配置JPEG文件路径
localdir = os.getcwd()
datasetdir = os.path.join(localdir, image_set)
JPEGImagefiledir = os.path.join(datasetdir, 'JPEGImages')
for filename in os.listdir(JPEGImagefiledir):
readxml(image_set, filename[:-4])
转换原理
参照yolov5的官网数据转换文档https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data