制备自己的voc格式数据集，用于yolox训练

找不到了呀

已于 2024-07-03 16:38:38 修改

阅读量84

点赞数 3

文章标签：人工智能

于 2024-06-29 22:26:53 首次发布

本文链接：https://blog.csdn.net/qq_40377540/article/details/140071740

版权

yolo标签制作成voc格式的xml标注文件

示例


import os
from lxml.etree import Element, SubElement, tostring, ElementTree
from xml.dom.minidom import parseString
from PIL import Image
from PIL import ImageFilter
import cv2
import math
from math import sin, cos, pi, floor, ceil, tan, sqrt

# # 示例
# info_pt = {
#     'img_folder': image_folder,  # 数据集名称
#     'img_filename': image_filename,  # 文件名称
#     'img_path': os.path.join(image_path, image_filename + '.jpg'),  # 加载图像路径
#     'img': img,  # 图像
#     'img_points_fix': img_points_fix,  # 角点信息
#     'img_class': img_class,  # 标签类别
#
#     'img_xml': os.path.join(save_xml_path, image_filename + '.xml'),  # 存储xml位置
#
#     'pt_minx': pt_minx,  # 角点标签区域bbox
#     'pt_miny': pt_miny,
#     'pt_maxx': pt_maxx,
#     'pt_maxy': pt_maxy,
# }

def remove_empty_lines(input_file, output_file):
    with open(input_file, 'r') as file:
        lines = [line for line in file if line.strip()]

    with open(output_file, 'w') as file:
        file.writelines(lines)


# bbox信息和图像名称
def make_xml(info_pt):

    img_folder = info_pt['img_folder']
    img_filename = info_pt['img_filename']
    img_path = info_pt['img_path']

    img = info_pt['img']
    img_w, img_h, img_channels = img.shape

    img_points = info_pt['img_points']
    img_class = info_pt['img_class']
    pt_minx = info_pt['pt_minx']
    pt_miny = info_pt['pt_miny']
    pt_maxx = info_pt['pt_maxx']
    pt_maxy = info_pt['pt_maxy']

    img_xml = info_pt['img_xml']

    #
    node_root = Element('annotation')
    # 存储数据集名称
    node_folder = SubElement(node_root, 'folder')
    node_folder.text = img_folder
    # 处理图像
    node_filename = SubElement(node_root, 'filename')
    node_filename.text = img_filename + '.jpg'
    # 图像加载位置
    node_path = SubElement(node_root, 'path')
    node_path.text = img_path

    # 目标个数
    node_object_num = SubElement(node_root, 'object_num')
    node_object_num.text = str(len(img_points))
    # 图像信息
    node_size = SubElement(node_root, 'size')
    node_width = SubElement(node_size, 'width')
    node_width.text = str(img_w)

    node_height = SubElement(node_size, 'height')
    node_height.text = str(img_h)

    node_depth = SubElement(node_size, 'depth')
    node_depth.text = str(img_channels)
    # 获取每个目标信息，生成标签
    for i in range(len(img_points)):
        node_object = SubElement(node_root, 'object')
        node_name = SubElement(node_object, 'name')
        node_name.text = img_class
        node_difficult = SubElement(node_object, 'difficult')
        node_difficult.text = '0'
        node_pt_center = SubElement(node_object, 'pt_center')
        node_pt_center.text = str(img_points[i])

        node_bndbox = SubElement(node_object, 'bndbox')
        node_xmin = SubElement(node_bndbox, 'xmin')
        node_xmin.text = str(pt_minx[i])
        node_ymin = SubElement(node_bndbox, 'ymin')
        node_ymin.text = str(pt_miny[i])
        node_xmax = SubElement(node_bndbox, 'xmax')
        node_xmax.text = str(pt_maxx[i])
        node_ymax = SubElement(node_bndbox, 'ymax')
        node_ymax.text = str(pt_maxy[i])

    xml = tostring(node_root,pretty_print = True)
    dom = parseString(xml)

    # 存储xml文件
    with open(img_xml, 'w', encoding='utf-8') as f:
        f.write(dom.toprettyxml(indent='\t'))

    remove_empty_lines(img_xml, img_xml)

    # dom.write(r'./path/to/dataset/test/Annotations/output.xml', encoding='utf-8', xml_declaration=True)
    # 打印查看结果
    # print(xml)

    return dom


if __name__ == '__main__':

    image_folder = 'voc'
    image_filename = 'fix2'
    image_path = r'./path/to/dataset/test/img/img_fix'
    img = cv2.imread(os.path.join(image_path, image_filename + '.jpg'))

    # img = cv2.imread(r'E:\project\dataset\CCS-main\path\to\dataset\test\img\img_fix\fix1.jpg')
    # cv2.imshow('img', img)
    # print(img.shape)
    # cv2.waitKey()

    # 存储候选区域点集
    img_points_fix = [[197, 197], [197, 213], [197, 229], [197, 245], [197, 261], [197, 277],
                      [213, 197], [213, 213], [213, 229], [213, 245], [213, 261], [213, 277],
                      [229, 197], [229, 213], [229, 229], [229, 245], [229, 261], [229, 277],
                      [245, 197], [245, 213], [245, 229], [245, 245], [245, 261], [245, 277],
                      [261, 197], [261, 213], [261, 229], [261, 245], [261, 261], [261, 277]]


    img_class= 'chessboard_coner'

    save_xml_path = r'./path/to/dataset/test/Annotations'

    checkboard_block_size = 20
    k = 3
    coner_len_half = math.ceil(checkboard_block_size / k)

    pt_minx, pt_miny, pt_maxx, pt_maxy = [[] for _ in range(4)]

    # 获取voc格式xml需要的参数
    for point_fix in img_points_fix:
        pt_minx.append(point_fix[0] - coner_len_half)
        pt_miny.append(point_fix[1] - coner_len_half)
        pt_maxx.append(point_fix[0] + coner_len_half)
        pt_maxy.append(point_fix[1] + coner_len_half)


    info_pt = {
        'img_folder' : image_folder,
        'img_filename' : image_filename,
        'img_path': os.path.join(image_path , image_filename + '.jpg'),  # 加载图像路径
        'img': img,
        'img_points': img_points_fix,
        'img_class': img_class,

        'img_xml' : os.path.join(save_xml_path, image_filename + '.xml'),

        'pt_minx': pt_minx,
        'pt_miny': pt_miny,
        'pt_maxx': pt_maxx,
        'pt_maxy': pt_maxy,
    }

    dom = make_xml(info_pt)

    # with open(info_pt['img_xml'], 'w', encoding='utf-8') as f:
    #     f.write(dom.toprettyxml(indent='\t'))
    #
    # remove_empty_lines(info_pt['img_xml'], info_pt['img_xml'])