深度学习——xml格式转txt格式数据集

转换标签文件:xml格式转为txt格式

import os.path
import xml.etree.ElementTree as ET
import os
import random

# class_names = ['palm', 'stone', 'scissor', 'awesome', 'heartB', 'OK', 'ROCK', 'one', 'swear', 'thanks', 'heartA',
#                'heartC', 'good', 'bad', 'pray', 'call', 'take_picture', 'salute']
class_names = ['hat', 'person']
xmlpath = './VOC2028/Annotations/'  # 原xml路径
txtpath = './labels/'  # 转换后txt文件存放路径
if not os.path.exists(txtpath):
    os.makedirs(txtpath)
files = []

for root, dirs, files in os.walk(xmlpath):
    None

print(xmlpath)
number = len(files)
print(number)
i = 0
while i < number:

    name = files[i][0:-4]
    xml_name = name + ".xml"
    txt_name = name + ".txt"
    xml_file_name = xmlpath + xml_name
    txt_file_name = txtpath + txt_name

    xml_file = open(xml_file_name, encoding='utf-8')
    tree = ET.parse(xml_file)
    root = tree.getroot()
    filename = root.find('filename').text

    image_name = root.find('filename').text
    w = int(root.find('size').find('width').text)
    h = int(root.find('size').find('height').text)

    f_txt = open(txt_file_name, 'w+')
    content = ""

    first = True

    for obj in root.iter('object'):

        name = obj.find('name').text
        class_num = class_names.index(name)

        xmlbox = obj.find('bndbox')

        x1 = int(xmlbox.find('xmin').text)
        x2 = int(xmlbox.find('xmax').text)
        y1 = int(xmlbox.find('ymin').text)
        y2 = int(xmlbox.find('ymax').text)

        if first:
            content += str(class_num) + " " + \
                       str((x1 + x2) / 2 / w) + " " + str((y1 + y2) / 2 / h) + " " + \
                       str((x2 - x1) / w) + " " + str((y2 - y1) / h)
            first = False
        else:
            content += "\n" + \
                       str(class_num) + " " + \
                       str((x1 + x2) / 2 / w) + " " + str((y1 + y2) / 2 / h) + " " + \
                       str((x2 - x1) / w) + " " + str((y2 - y1) / h)

    # print(str(i / (number - 1) * 100) + "%\n")
    print(content)
    f_txt.write(content)
    f_txt.close()
    xml_file.close()
    i += 1

print("done!")

数据集划分

'''
    脚本功能:划分数据集
'''
import os
import random
import shutil
import time


def copyFile(fileDir, origion_path1, class_name):
    name = class_name
    path = origion_path1
    image_list = os.listdir(fileDir)  # 获取图片的原始路径
    image_number = len(image_list)
    train_number = int(image_number * train_rate)
    train_sample = random.sample(image_list, train_number)  # 从image_list中随机获取0.7比例的图像.
    test_sample = list(set(image_list) - set(train_sample))
    sample = [train_sample, test_sample]

    # 复制图像到目标文件夹
    for k in range(len(save_dir)):
        if os.path.isdir(save_dir[k]) and os.path.isdir(save_dir1[k]):
            for name in sample[k]:
                name1 = name.split(".")[0] + '.txt'
                shutil.copy(os.path.join(fileDir, name), os.path.join(save_dir[k], name))
                shutil.copy(os.path.join(path, name1), os.path.join(save_dir1[k], name1))
        else:
            os.makedirs(save_dir[k])
            os.makedirs(save_dir1[k])
            for name in sample[k]:
                name1 = name.split(".")[0] + '.txt'
                shutil.copy(os.path.join(fileDir, name), os.path.join(save_dir[k], name))
                shutil.copy(os.path.join(path, name1), os.path.join(save_dir1[k], name1))


if __name__ == '__main__':
    time_start = time.time()

    # 原始数据集路径
    origion_path = './images'          #原始图片路径
    origion_path1 = './labels'         #原始标签路径

    # 保存路径
    save_train_img = './coco/images/train'  #训练集图片路径
    save_val_img = './coco/images/val'      #验证集图片路径
    save_train_labels = './coco/labels/train'   #训练集标签路径
    save_val_labels = './coco/labels/val'       #验证集标签路径
    save_dir = [save_train_img, save_val_img]
    save_dir1 = [save_train_labels, save_val_labels]

    # 训练集比例
    train_rate = 0.7

    # 数据集类别及数量
    file_list = os.listdir(origion_path)
    num_classes = len(file_list)
    for i in range(num_classes):
        class_name = file_list[i]
    copyFile(origion_path, origion_path1, class_name)
    print('划分完毕!')
    time_end = time.time()
    print('---------------')
    print('训练集和测试集划分共耗时%s!' % (time_end - time_start))

  • 2
    点赞
  • 9
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值