生成VOC2012数据集

1、创建目录如下所示:

VOC2012

---Annotations

---ImageSets

    ---Main

---JPEGImages

 

2、标定图片放到文件夹VOC2012/JPEGImages,使用labelImg工具进行标定,将标定后的xml文件剪切到文件夹VOC2012/Annotations

 

3、脚本修改xml文件中红色方框内容

 

4、脚本在VOC2012/JPEGImages/Main文件夹生成:类别名_train.txt和类别名_val.txt;同时生成trainval.txt、train.txt、val.test、test.txt

脚本程序如下所示,脚本放到文件夹VOC2012运行

import xml.etree.ElementTree as ET
import os
import random

import cv2 as cv


def change_image_format(old_format='.png', new_format='.jpg'):
    img_dir = "../VOC2012/JPEGImages/"

    files = os.listdir(img_dir)
    for img_file in files:
        if os.path.isfile(os.path.join(img_dir, img_file)):
            image_path = os.path.join(img_dir, img_file)
            # print(image_path)

            image = cv.imread(image_path)
            new_image_path = image_path.replace(old_format, new_format)
            cv.imwrite(new_image_path, image, [cv.IMWRITE_JPEG_QUALITY, 100])

            print("processed image : %s" % (new_image_path))


def xml_modification():
    ann_dir = "../VOC2012/Annotations/"
    img_dir = "C:\\Users\\mengkun\\Desktop\\tmp\\VOC2012\\JPEGImages\\"  # 改成自己数据集JPEGImages文件夹绝对路径

    files = os.listdir(ann_dir)
    for xml_file in files:
        if os.path.isfile(os.path.join(ann_dir, xml_file)):
            xml_path = os.path.join(ann_dir, xml_file)
            # print(xml_path)

            tree = ET.parse(xml_path)
            root = tree.getroot()

            for elem in root.iter('folder'):
                elem.text = 'voc2012'

            for elem in root.iter('filename'):
                pass

            for elem in root.iter('path'):
                path = elem.text
                filename = path.split('\\')[-1]
                new_path = img_dir + filename
                elem.text = new_path

            tree.write(xml_path)

            print("processed xml : %s" % (xml_path))


def generate_classes_txt():
    ann_dir = "../VOC2012/Annotations/"

    ok_train = open("../VOC2012/ImageSets/Main/ok_train.txt", 'w')  # 改成自己数据集类别
    ok_val = open("../VOC2012/ImageSets/Main/ok_val.txt", 'w')  # 改成自己数据集类别

    spot_train = open("../VOC2012/ImageSets/Main/spot_train.txt", 'w')  # 改成自己数据集类别
    spot_val = open("../VOC2012/ImageSets/Main/spot_val.txt", 'w')  # 改成自己数据集类别

    files = os.listdir(ann_dir)
    for xml_file in files:
        if os.path.isfile(os.path.join(ann_dir, xml_file)):
            xml_path = os.path.join(ann_dir, xml_file)
            # print(xml_path)

            tree = ET.parse(xml_path)
            root = tree.getroot()

            for elem in root.iter('filename'):
                filename = elem.text

            name_list = []
            for elem in root.iter('name'):
                name = elem.text
                name_list.append(name)

            if "ok" in name_list:
                ok_train.write(filename.replace(".jpg", " ") + str(1) + "\n")
                ok_val.write(filename.replace(".jpg", " ") + str(1) + "\n")
            else:
                ok_train.write(filename.replace(".jpg", " ") + str(-1) + "\n")
                ok_val.write(filename.replace(".jpg", " ") + str(-1) + "\n")

            if "spot" in name_list:
                spot_train.write(filename.replace(".jpg", " ") + str(1) + "\n")
                spot_val.write(filename.replace(".jpg", " ") + str(1) + "\n")
            else:
                spot_train.write(filename.replace(".jpg", " ") + str(-1) + "\n")
                spot_val.write(filename.replace(".jpg", " ") + str(-1) + "\n")

    ok_train.close()
    ok_val.close()
    spot_train.close()
    spot_val.close()


def generate_train_val_test_txt():
    xml_file_path = "../VOC2012/Annotations/"
    save_Path = "../VOC2012/ImageSets/Main/"

    trainval_percent = 0.9
    train_percent = 0.9

    total_xml = os.listdir(xml_file_path)
    num = len(total_xml)
    list = range(num)

    tv = int(num * trainval_percent)
    tr = int(tv * train_percent)
    trainval = random.sample(list, tv)
    train = random.sample(trainval, tr)

    print("train and val size", tv)
    print("train size", tr)

    ftrainval = open(os.path.join(save_Path, 'trainval.txt'), 'w')
    ftest = open(os.path.join(save_Path, 'test.txt'), 'w')
    ftrain = open(os.path.join(save_Path, 'train.txt'), 'w')
    fval = open(os.path.join(save_Path, 'val.txt'), 'w')

    for i in list:
        name = total_xml[i][:-4] + '\n'
        if i in trainval:
            ftrainval.write(name)
            if i in train:
                ftrain.write(name)
            else:
                fval.write(name)
        else:
            ftest.write(name)

    ftrainval.close()
    ftrain.close()
    fval.close()
    ftest.close()


xml_modification()
# generate_classes_txt()
generate_train_val_test_txt()

 

  • 2
    点赞
  • 16
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值