数据集选择
在选择改进实验数据集时需要注意的一些关键点:
- 数据集质量和准确性:确保数据集中的标签和边界框信息是准确的。错误的标签会导致模型学习不准确,难以泛化到新数据。
- ⭐数据平衡⭐:尽量保持数据集中各个类别的样本数量平衡。不平衡的数据集可能导致模型对某些类别的性能较差。
- ⭐领域适应⭐:如果您想要做小目标检测相关的实验,那需要选择VisDrone2019等小目标占比较多的公开数据集,确保数据集与目标领域相匹配。
- 数据多样性:确保数据集包含多种不同的目标类别和场景。多样性有助于模型更好地泛化,并适应不同的情况。
- 真实世界数据:使用真实世界的数据集,以反映实际应用场景。合成数据集可以用于预训练,但最终的改进应该在真实数据上进行。
- 数据集分布:了解数据集中物体类别的分布,以便更好地处理不平衡问题。
- 数据集清洗:定期清洗数据集,删除错误的标签和低质量的样本。
- 数据合法性:确保您的数据集采集和使用遵守法律和伦理规定,特别是在涉及隐私和个人信息的情况下。
至于是选择公开数据集还是自制数据集,取决于您自身的条件,如果有条件(无人机等工具)那完全可以自己制作数据集。总之,在改进 YOLO 模型之前,确保你的数据集经过充分的筛选和准备,以获得最佳的结果。
🔨 数据集划分
划分训练集、验证集和测试集的比例没有一个固定的标准,取决于数据集的大小和可用样本数量,不过要确保数据的随机性,避免数据的偏斜或重复。通常情况下,常见的比例是将数据集划分为训练集、验证集和测试集三部分。
-
训练集是用来训练模型的主要数据集。模型通过训练集学习数据的模式和特征,并调整参数来最小化预测误差。训练集应具有代表性,以涵盖数据的各种变化和情况。
-
验证集用于调整模型的超参数和进行模型选择。超参数是在模型训练之前设置的参数,如学习率、正则化强度等。通过使用验证集,在不同超参数设置下评估模型性能,可以选择最佳的超参数组合,以提高模型的泛化能力。
-
测试集用于评估最终模型的性能。模型在训练和验证期间没有接触到测试集数据,因此测试集提供了一个独立的评估指标,反映了模型在真实场景中的表现。测试集应该是隐藏的,模型在训练过程中不能使用测试集进行调整。
并且比例的选择如下:
- 一般来说,训练集占总数据的60-80%左右,用于模型的训练和参数调整。
- 验证集占总数据的10-20%左右,用于超参数的调整和模型选择。
- 测试集占总数据的10-20%左右,用于最终模型性能的评估。
所以,比例可以是7:2:1,6:2:2,8:1:1等情况。
以YOLOv8训练为例,图像文件存放在images
文件夹中,txt标签文件存放在labels
文件夹中,使用以下代码对其进行划分:
- mydata
- images
- 1.jpg
- 2.jpg
- ...
- labels
- 1.txt
- 2.txt
- ...
import argparse
import glob
from pathlib import Path
import random
import shutil
import os
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor
NUM_THREADS = min(8, max(1, os.cpu_count() - 1))
def run(func, this_iter, desc="Processing"):
with ThreadPoolExecutor(max_workers=NUM_THREADS, thread_name_prefix='MyThread') as executor:
results = list(
tqdm(executor.map(func, this_iter), total=len(this_iter), desc=desc)
)
return results
def split_dataset_into_train_val_test(
dataset_dir,
save_dir,
train_ratio=0.7,
val_ratio=0.2,
test_ratio=0.1,
im_suffix='jpg'
):
if isinstance(dataset_dir, str):
dataset_dir = Path(dataset_dir)
image_files = []
for suffix in im_suffix:
image_files += glob.glob(str(dataset_dir / 'images' / f"*.{suffix}"))
total_images = len(image_files)
random.shuffle(image_files)
train_split = int(total_images * train_ratio)
val_split = int(total_images * val_ratio)
# test_split = int(total_images * test_ratio)
if train_ratio + val_ratio == 1:
train_images = image_files[:train_split]
val_images = image_files[train_split:]
test_images = []
else:
train_images = image_files[:train_split]
val_images = image_files[train_split : train_split + val_split]
test_images = image_files[train_split + val_split :]
print('*'*25)
print(
"",
f"Total images: {total_images}\n",
f"Train images: {len(train_images)}\n",
f"Val images: {len(val_images)}\n",
f"Test images: {len(test_images)}"
)
print('*'*25)
split_paths = [("train", train_images), ("val", val_images), ("test", test_images)]
for split_name, images in split_paths:
split_dir = Path(save_dir) / split_name
for dir_name in ['images', 'labels']:
if not (split_dir / dir_name).exists():
(split_dir / dir_name).mkdir(exist_ok=True, parents=True)
args_list = [(image, dataset_dir, split_dir) for image in images]
run(process_image, args_list, desc=f"Creating {split_name} dataset")
print(f"Created {split_name} dataset with {len(images)} images.")
def process_image(args):
image_file, dataset_dir, split_dir = args
annotation_file = dataset_dir / 'labels' / f"{Path(image_file).stem}.txt"
assert annotation_file.exists(), f'{annotation_file} 不存在!'
if not has_objects(annotation_file):
return
shutil.copy(image_file, split_dir / "images" / Path(image_file).name)
shutil.copy(annotation_file, split_dir / "labels" / annotation_file.name)
def has_objects(annotation_path):
with open(annotation_path, "r") as f:
lines = f.readlines()
return len(lines) > 0
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('--data', default='./data') # 数据集Images路径
parser.add_argument('--save', default='./mydata') # 保存路径
parser.add_argument('--images_suffix', default=['jpg', 'png', 'jpeg'], help='images suffix') # 图片后缀名
opt = parser.parse_args()
split_dataset_into_train_val_test(
dataset_dir=opt.data,
save_dir=opt.save,
train_ratio=0.7,
val_ratio=0.2,
im_suffix=opt.images_suffix
)
🔨 数据转换
VOC转YOLO
您的数据集存放格式可以如下所示:
- mydata
- Annotations
- images
运行脚本后会生成labels文件夹(用于存放txt文件)和classes.txt文件(记录种类)
- mydata
- Annotations
- images
- labels
- classes.txt
实现代码
import glob
import os
import re
import xml.etree.ElementTree as ET
from pathlib import Path
import cv2
import numpy as np
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor
NUM_THREADS = min(8, max(1, os.cpu_count() - 1))
def run(func, this_iter, desc="Processing"):
with ThreadPoolExecutor(max_workers=NUM_THREADS, thread_name_prefix='MyThread') as executor:
results = list(
tqdm(executor.map(func, this_iter), total=len(this_iter), desc=desc)
)
return results
# XML坐标格式转换成yolo坐标格式
def convert(size, box):
dw = 1.0 / size[0]
dh = 1.0 / size[1]
x = (box[0] + box[1]) / 2.0
y = (box[2] + box[3]) / 2.0
w = box[1] - box[0]
h = box[3] - box[2]
x = x * dw
w = w * dw
y = y * dh
h = h * dh
return (x, y, w, h)
def get_xml_classes(xml_path):
f = open(xml_path) # xml文件路径
xml_text = f.read()
root = ET.fromstring(xml_text)
f.close()
for obj in root.iter("object"):
cls = obj.find("name").text
if cls not in xml_classes:
classes_file.write(cls + "\n")
xml_classes.append(cls)
# 标记文件格式转换
def convert_xml2yolo(img_path):
img_path = Path(img_path)
xml_name = re.sub(r"\.(jpg|png|jpeg)$", ".xml", img_path.name)
txt_name = re.sub(r"\.(jpg|png|jpeg)$", ".txt", img_path.name)
xml_path = Path(xml_target_path) / xml_name
txt_path = Path(save_path) / txt_name
if xml_path.exists():
out_file = open(txt_path, "w") # 转换后的txt文件存放路径
f = open(xml_path) # xml文件路径
xml_text = f.read()
root = ET.fromstring(xml_text)
f.close()
size = root.find("size")
w = int(size.find("width").text)
h = int(size.find("height").text)
if w == 0 or h == 0:
# problem_xml.append(str(img_path.name))
img = cv2.imdecode(np.fromfile(img_path, dtype=np.uint8), 1)
h, w, _ = img.shape
for obj in root.iter("object"):
cls = obj.find("name").text
if cls not in xml_classes:
print(cls)
continue
cls_id = xml_classes.index(cls)
xmlbox = obj.find("bndbox")
b = (
float(xmlbox.find("xmin").text),
float(xmlbox.find("xmax").text),
float(xmlbox.find("ymin").text),
float(xmlbox.find("ymax").text),
)
try:
bbox = convert((w, h), b)
except:
print(img_path)
out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bbox]) + "\n")
else:
print(f"{xml_path}不存在!")
if __name__ == "__main__":
xml_target_path = r"data\Annotations" # xml文件夹
save_path = r"data\labels" # 转换后的txt文件存放文件夹
images_path = r"data\images" # 图片文件夹
classes_file = open(Path(xml_target_path).parents[0] / "classes.txt", "w")
# -------------------------------------------- #
# 第一步 获得xml所有种类
# -------------------------------------------- #
assert (Path(xml_target_path)).exists(), "Annotations文件夹不存在"
xml_classes = []
xml_list = glob.glob(os.path.join(xml_target_path, "*.[x][m][l]*"))
run(get_xml_classes, xml_list)
print(Path(xml_target_path).parents[0])
print(xml_classes)
# -------------------------------------------- #
# 第二步 转换成YOLO txt
# -------------------------------------------- #
if not Path(save_path).exists():
Path(save_path).mkdir(parents=True)
file_list = glob.glob(os.path.join(images_path, "*.[jp][pn][gg]*"))
run(convert_xml2yolo, file_list)
YOLO转VOC
您的数据集存放格式可以如下所示:
- mydata
- images
- labels
运行脚本后会生成Annotations文件夹(用于存放xml文件)
- mydata
- Annotations
- images
- labels
实现代码
import glob
from pathlib import Path
from xml.dom.minidom import Document
import os
import cv2
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor
NUM_THREADS = min(8, max(1, os.cpu_count() - 1))
def run(func, this_iter, desc="Processing"):
with ThreadPoolExecutor(max_workers=NUM_THREADS, thread_name_prefix='MyThread') as executor:
results = list(
tqdm(executor.map(func, this_iter), total=len(this_iter), desc=desc)
)
return results
def makexml(file_name):
try:
name = Path(file_name).name
xmlBuilder = Document()
annotation = xmlBuilder.createElement("annotation") # 创建annotation标签
xmlBuilder.appendChild(annotation)
txtFile = open(txtPath + name)
txtList = txtFile.readlines()
img = cv2.imread(picPath + name[0:-4] + ".jpg")
Pheight, Pwidth, Pdepth = img.shape
folder = xmlBuilder.createElement("folder") # folder标签
foldercontent = xmlBuilder.createTextNode(folder_name)
folder.appendChild(foldercontent)
annotation.appendChild(folder) # folder标签结束
filename = xmlBuilder.createElement("filename") # filename标签
filenamecontent = xmlBuilder.createTextNode(name[0:-4] + ".jpg")
filename.appendChild(filenamecontent)
annotation.appendChild(filename) # filename标签结束
size = xmlBuilder.createElement("size") # size标签
width = xmlBuilder.createElement("width") # size子标签width
widthcontent = xmlBuilder.createTextNode(str(Pwidth))
width.appendChild(widthcontent)
size.appendChild(width) # size子标签width结束
height = xmlBuilder.createElement("height") # size子标签height
heightcontent = xmlBuilder.createTextNode(str(Pheight))
height.appendChild(heightcontent)
size.appendChild(height) # size子标签height结束
depth = xmlBuilder.createElement("depth") # size子标签depth
depthcontent = xmlBuilder.createTextNode(str(Pdepth))
depth.appendChild(depthcontent)
size.appendChild(depth) # size子标签depth结束
annotation.appendChild(size) # size标签结束
for j in txtList:
oneline = j.strip().split(" ")
object = xmlBuilder.createElement("object") # object 标签
picname = xmlBuilder.createElement("name") # name标签
namecontent = xmlBuilder.createTextNode(dic[oneline[0]])
picname.appendChild(namecontent)
object.appendChild(picname) # name标签结束
pose = xmlBuilder.createElement("pose") # pose标签
posecontent = xmlBuilder.createTextNode("Unspecified")
pose.appendChild(posecontent)
object.appendChild(pose) # pose标签结束
truncated = xmlBuilder.createElement("truncated") # truncated标签
truncatedContent = xmlBuilder.createTextNode("0")
truncated.appendChild(truncatedContent)
object.appendChild(truncated) # truncated标签结束
difficult = xmlBuilder.createElement("difficult") # difficult标签
difficultcontent = xmlBuilder.createTextNode("0")
difficult.appendChild(difficultcontent)
object.appendChild(difficult) # difficult标签结束
bndbox = xmlBuilder.createElement("bndbox") # bndbox标签
xmin = xmlBuilder.createElement("xmin") # xmin标签
mathData = int(((float(oneline[1])) * Pwidth + 1) - (float(oneline[3])) * 0.5 * Pwidth)
xminContent = xmlBuilder.createTextNode(str(mathData))
xmin.appendChild(xminContent)
bndbox.appendChild(xmin) # xmin标签结束
ymin = xmlBuilder.createElement("ymin") # ymin标签
mathData = int(((float(oneline[2])) * Pheight + 1) - (float(oneline[4])) * 0.5 * Pheight)
yminContent = xmlBuilder.createTextNode(str(mathData))
ymin.appendChild(yminContent)
bndbox.appendChild(ymin) # ymin标签结束
xmax = xmlBuilder.createElement("xmax") # xmax标签
mathData = int(((float(oneline[1])) * Pwidth + 1) + (float(oneline[3])) * 0.5 * Pwidth)
xmaxContent = xmlBuilder.createTextNode(str(mathData))
xmax.appendChild(xmaxContent)
bndbox.appendChild(xmax) # xmax标签结束
ymax = xmlBuilder.createElement("ymax") # ymax标签
mathData = int(((float(oneline[2])) * Pheight + 1) + (float(oneline[4])) * 0.5 * Pheight)
ymaxContent = xmlBuilder.createTextNode(str(mathData))
ymax.appendChild(ymaxContent)
bndbox.appendChild(ymax) # ymax标签结束
object.appendChild(bndbox) # bndbox标签结束
annotation.appendChild(object) # object标签结束
f = open(xmlPath + name[0:-4] + ".xml", 'w')
xmlBuilder.writexml(f, indent='\t', newl='\n', addindent='\t', encoding='utf-8')
f.close()
except Exception as e:
print(e)
def main(txtPath): # txt所在文件夹路径,xml文件保存路径,图片所在文件夹路径
"""此函数用于将yolo格式txt标注文件转换为voc格式xml标注文件
"""
# files = os.listdir(txtPath)
files = glob.glob(os.path.join(txtPath, '*.[t][x][t]*'))
run(makexml, files)
if __name__ == "__main__":
dic = {
'0': "Dead tree", # 创建字典用来对类型进行转换
'1': "Sick tree", # 此处的字典要与自己的classes.txt文件中的类对应,且顺序要一致
}
folder_name = "JPEGImages" # # folder标签,可更改
picPath = r"data/images/" # 图片所在文件夹路径,后面的/一定要带上
txtPath = r"data/labels/" # txt所在文件夹路径,后面的/一定要带上
xmlPath = r"data/Annotations/" # xml文件保存路径,后面的/一定要带上
assert (Path(picPath)).exists() or (Path(txtPath)).exists(), f"{picPath}或{txtPath}文件夹不存在"
if not Path(xmlPath).exists():
Path(xmlPath).mkdir(parents=True)
main(txtPath)
YOLO转COCO
您的数据集存放格式可以如下所示:
- mydata
- test
- images
- labels
- classes.txt
classes.txt
存放目标类别信息,注意顺序要对应。运行脚本后会在当前根目录下生成instances_val2017.json
文件夹
实现代码
'''
Date: 2023-10-18 10:41:52
LastEditors: xujiayue
LastEditTime: 2023-10-18 10:46:18
'''
import os
import cv2
import json
from tqdm import tqdm
# from sklearn.model_selection import train_test_split
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--root_dir', default=r'F:\ObjectDetection\Datasets\Experiment Datasets\archive721', type=str,
help="root path of images and labels, include ./images and ./labels and classes.txt")
parser.add_argument('--save_path', type=str, default='instances_val2017.json',
help="if not split the dataset, give a path to a json file")
arg = parser.parse_args()
def yolo2coco(arg):
root_path = arg.root_dir
print("Loading data from ", root_path)
assert os.path.exists(root_path)
originLabelsDir = os.path.join(root_path, 'test/labels')
originImagesDir = os.path.join(root_path, 'test/images')
with open(os.path.join(root_path, 'classes.txt')) as f:
classes = list(map(lambda x: x.strip(), f.readlines()))
# images dir name
indexes = os.listdir(originImagesDir)
dataset = {'categories': [], 'annotations': [], 'images': []}
for i, cls in enumerate(classes, 0):
dataset['categories'].append({'id': i, 'name': cls, 'supercategory': 'mark'})
# 标注的id
ann_id_cnt = 0
for k, index in enumerate(tqdm(indexes)):
# 支持 png jpg 格式的图片。
txtFile = index.replace('images', 'txt').replace('.jpg', '.txt').replace('.png', '.txt')
# 读取图像的宽和高
im = cv2.imread(os.path.join(originImagesDir, index))
height, width, _ = im.shape
# 添加图像的信息
if not os.path.exists(os.path.join(originLabelsDir, txtFile)):
# 如没标签,跳过,只保留图片信息。
continue
dataset['images'].append({'file_name': index,
'id': int(index[:-4]) if index[:-4].isnumeric() else index[:-4],
'width': width,
'height': height})
with open(os.path.join(originLabelsDir, txtFile), 'r') as fr:
labelList = fr.readlines()
for label in labelList:
label = label.strip().split()
x = float(label[1])
y = float(label[2])
w = float(label[3])
h = float(label[4])
# convert x,y,w,h to x1,y1,x2,y2
H, W, _ = im.shape
x1 = (x - w / 2) * W
y1 = (y - h / 2) * H
x2 = (x + w / 2) * W
y2 = (y + h / 2) * H
# 标签序号从0开始计算, coco2017数据集标号混乱,不管它了。
cls_id = int(label[0])
width = max(0, x2 - x1)
height = max(0, y2 - y1)
dataset['annotations'].append({
'area': width * height,
'bbox': [x1, y1, width, height],
'category_id': cls_id,
'id': ann_id_cnt,
'image_id': int(index[:-4]) if index[:-4].isnumeric() else index[:-4],
'iscrowd': 0,
# mask, 矩形是从左上角点按顺时针的四个顶点
'segmentation': [[x1, y1, x2, y1, x2, y2, x1, y2]]
})
ann_id_cnt += 1
# 保存结果
with open(arg.save_path, 'w') as f:
json.dump(dataset, f)
print('Save annotation to {}'.format(arg.save_path))
if __name__ == "__main__":
yolo2coco(arg)