yolov7数据集制作方法，相当完善

老李小同志

已于 2022-12-08 18:58:06 修改

阅读量7.7k

点赞数 22

文章标签： python 人工智能目标检测深度学习

于 2022-11-24 15:50:04 首次发布

本文链接：https://blog.csdn.net/m0_61438337/article/details/128019914

版权

首先，用labelimg打标，我这边是打成VOC格式，会生成很多xml格式的文件。

接着，将你的所有图片放在JPEGImages文件夹中，将所有的xml格式的文件放在Annotations文件夹中，如图所示：

然后进行数据集的格式处理，这也是这篇文章的核心，按我的步骤走就OK

第一步，将xml格式的文件转成txt格式，并且会生成一个txt文件夹，所有的文件都会存入到这个txt文件夹中，代码如下：

需要改变的有类别列表，还有你的路径地址

import os.path
import xml.etree.ElementTree as ET

# 1. 将这个地方改成自己类别的列表
class_names = ['Catla', 'Kendai', 'Carp', 'Silver']
# 2. 将路径修改
xmlpath = r'F:\graduate_files\PAPER\yolov7\鱼类数据集\fish_small\Annotations/'  # 原xml路径
txtpath = r'F:\graduate_files\PAPER\yolov7\鱼类数据集\fish_small\txt/'  # 转换后txt文件存放路径

files = []
if not os.path.exists(txtpath):
    os.makedirs(txtpath)

for root, dirs, files in os.walk(xmlpath):
    None

number = len(files)
print(number)
i = 0
while i < number:

    name = files[i][0:-4]
    xml_name = name + ".xml"
    txt_name = name + ".txt"
    xml_file_name = xmlpath + xml_name
    txt_file_name = txtpath + txt_name

    xml_file = open(xml_file_name)
    tree = ET.parse(xml_file)
    root = tree.getroot()
    # filename = root.find('name').text

    # image_name = root.find('filename').text
    w = int(root.find('size').find('width').text)
    h = int(root.find('size').find('height').text)

    f_txt = open(txt_file_name, 'w+')
    content = ""

    first = True

    for obj in root.iter('object'):

        name = obj.find('name').text
        class_num = class_names.index(name)
        # class_num = 0

        xmlbox = obj.find('bndbox')

        x1 = int(xmlbox.find('xmin').text)
        x2 = int(xmlbox.find('xmax').text)
        y1 = int(xmlbox.find('ymin').text)
        y2 = int(xmlbox.find('ymax').text)

        if first:
            content += str(class_num) + " " + \
                       str((x1 + x2) / 2 / w) + " " + str((y1 + y2) / 2 / h) + " " + \
                       str((x2 - x1) / w) + " " + str((y2 - y1) / h)
            first = False
        else:
            content += "\n" + \
                       str(class_num) + " " + \
                       str((x1 + x2) / 2 / w) + " " + str((y1 + y2) / 2 / h) + " " + \
                       str((x2 - x1) / w) + " " + str((y2 - y1) / h)

    # print(str(i / (number - 1) * 100) + "%\n")
    print(content)
    f_txt.write(content)
    f_txt.close()
    xml_file.close()
    i += 1

第二步是train与val的划分，代码如下：

这时要在你的路径下创建ImageSets\Main，需要改变的是你的路径地址还有训练集验证集比例。

import os
import random

random.seed(0)
# 1. 将路径修改为自己的
xmlfilepath = r'F:\graduate_files\PAPER\yolov7\鱼类数据集\fish_small\Annotations/'
saveBasePath = r'F:\graduate_files\PAPER\yolov7\鱼类数据集\fish_small\ImageSets\Main/'

# ----------------------------------------------------------------------#
#   想要增加测试集修改trainval_percent
#   train_percent不需要修改
# ----------------------------------------------------------------------#
trainval_percent = 1
train_percent = 0.7

temp_xml = os.listdir(xmlfilepath)
total_xml = []
for xml in temp_xml:
    if xml.endswith(".xml"):
        total_xml.append(xml)

num = len(total_xml)
list = range(num)
tv = int(num * trainval_percent)
tr = int(tv * train_percent)
trainval = random.sample(list, tv)
train = random.sample(trainval, tr)

print("train and val size", tv)
print("traub suze", tr)
ftrainval = open(os.path.join(saveBasePath, 'trainval.txt'), 'w')
ftest = open(os.path.join(saveBasePath, 'test.txt'), 'w')
ftrain = open(os.path.join(saveBasePath, 'train.txt'), 'w')
fval = open(os.path.join(saveBasePath, 'val.txt'), 'w')

for i in list:
    name = total_xml[i][:-4] + '\n'
    if i in trainval:
        ftrainval.write(name)
        if i in train:
            ftrain.write(name)
        else:
            fval.write(name)
    else:
        ftest.write(name)

ftrainval.close()
ftrain.close()
fval.close()
ftest.close()

第三步是复制出图片和标签，生成yolov7需要的文件夹格式

路径怎么改，类比我的路径就可以了

import os
import shutil
from tqdm import tqdm

SPLIT_PATH = r"F:\graduate_files\PAPER\yolov7\鱼类数据集\fish_small\ImageSets\Main"
IMGS_PATH = r"F:\graduate_files\PAPER\yolov7\鱼类数据集\fish_small\JPEGImages"
TXTS_PATH = r"F:\graduate_files\PAPER\yolov7\鱼类数据集\fish_small\txt"

TO_IMGS_PATH = r'F:\graduate_files\PAPER\yolov7\鱼类数据集\fish_small\images'
TO_TXTS_PATH = r'F:\graduate_files\PAPER\yolov7\鱼类数据集\fish_small\labels'

data_split = ['train.txt', 'val.txt']
to_split = ['train2007', 'val2007']

for index, split in enumerate(data_split):
    split_path = os.path.join(SPLIT_PATH, split)

    to_imgs_path = os.path.join(TO_IMGS_PATH, to_split[index])
    if not os.path.exists(to_imgs_path):
        os.makedirs(to_imgs_path)

    to_txts_path = os.path.join(TO_TXTS_PATH, to_split[index])
    if not os.path.exists(to_txts_path):
        os.makedirs(to_txts_path)

    f = open(split_path, 'r')
    count = 1

    for line in tqdm(f.readlines(), desc="{} is copying".format(to_split[index])):
        # 复制图片
        src_img_path = os.path.join(IMGS_PATH, line.strip() + '.JPG')
        dst_img_path = os.path.join(to_imgs_path, line.strip() + '.JPG')
        if os.path.exists(src_img_path):
            shutil.copyfile(src_img_path, dst_img_path)
        else:
            print("error file: {}".format(src_img_path))

        # 复制txt标注文件
        src_txt_path = os.path.join(TXTS_PATH, line.strip() + '.txt')
        dst_txt_path = os.path.join(to_txts_path, line.strip() + '.txt')
        if os.path.exists(src_txt_path):
            shutil.copyfile(src_txt_path, dst_txt_path)
        else:
            print("error file: {}".format(src_txt_path))

第四步：创建出最终训练需要的train.txt和val.txt文件

# From Mr. Dinosaur

import os


def listdir(path, list_name):  # 传入存储的list
    for file in os.listdir(path):
        file_path = os.path.join(path, file)
        if os.path.isdir(file_path):
            listdir(file_path, list_name)
        else:
            list_name.append(file_path)


list_name = []
path = r'F:\graduate_files\PAPER\yolov7\鱼类数据集\fish_small\images\val2007'  # 文件夹路径
listdir(path, list_name)
print(list_name)

with open(r'F:\graduate_files\PAPER\yolov7\鱼类数据集\fish_small\val.txt', 'w') as f:  # 要存入的txt
    write = ''
    for i in list_name:
        write = write + str(i) + '\n'
    f.write(write)

最终你的文件夹中会有这些（选定的）