yolov8训练detect(目标检测)数据集

目录

一、源码

二、数据集转换(voc转yolo)

2.1 原始数据集

三、训练

3.1 

3.2 


一、源码

官方源码

二、数据集转换(voc转yolo)

2.1 原始数据集

通过labelimg标注出来的数据集格式。JPEGImages-图片。Annotations-xml

2.2 数据集格式转换

在项目根目录下新建一个detect_voc2yolo.py文件,复制以下内容

import os, shutil
from tqdm import tqdm
from collections import Counter
import xml.etree.ElementTree as ET
from PIL import Image
import yaml
import random


class Dataset_Transforme_Yolov8:
    def __init__(self, jpg_path: str, xml_path: str, save_path: str, divide=False):
        self.jpg_path = jpg_path
        self.xml_path = xml_path
        self.save_path = save_path
        self.divide = divide

    def get_classes(self):
        '''
        统计路径下xml里的各类别标签数量
        '''
        names = []
        files_1 = []
        for root, dirs, files in os.walk(self.xml_path):
            for file in files:
                if ".xml" in file:
                    file = os.path.join(root, file)
                    subs = ET.parse(file).getroot().findall("object")
                    if len(subs) != 0:
                        files_1.append(file)
                    for sub in subs:
                        name = sub.find("name").text
                        names.append(name)
        result = Counter(names)
        return result, files_1

    def xml2txt(self, classes, file_path, txt_save_path, image_width, image_height):
        '''
        根据xml文件生成txt标签文件
        :param classes:类别列表
        :param file_path:xml文件路径
        :param image_width:图片宽度
        :param image_height:图片高度
        '''
        tree = ET.parse(file_path)
        root = tree.getroot()
        objects = root.findall('object')
        bboxes = []
        class_names = []
        for obj in objects:
            bbox = obj.find('bndbox')
            xmin = int(bbox.find('xmin').text)
            ymin = int(bbox.find('ymin').text)
            xmax = int(bbox.find('xmax').text)
            ymax = int(bbox.find('ymax').text)
            class_name = obj.find('name').text

            c1 = round((xmin + xmax) / (image_width * 2), 6)
            c2 = round((ymin + ymax) / (image_height * 2), 6)
            c3 = round((xmax - xmin) / image_width, 6)
            c4 = round((ymax - ymin) / image_height, 6)

            if class_name in classes:
                # print(class_name)
                bboxes.append([c1, c2, c3, c4])
                class_names.append(class_name)
            # 将数据写入到YOLO的TXT文件中
        with open(txt_save_path, 'w') as file:
            for bbox, class_name in zip(bboxes, class_names):
                file.write(
                    f"{classes.index(class_name)} {bbox[0]} {bbox[1]} {bbox[2]} {bbox[3]}\n")

    def data_split(self, full_list, train, val, shuffle, seed):
        """
        数据集拆分: 将列表full_list按比例划分为3个子列表
        :param full_list: 数据列表
        :param ratio: 比例
        :param shuffle: 是否打乱顺序
        :param seed: 随机种子
        :return:
        """
        n_total = len(full_list)
        offset_train = int(n_total * train)
        offset_val = int(n_total * val)
        offset_trainval = offset_train + offset_val

        if n_total == 0 or offset_train < 1 or train + val > 1.0:
            raise ValueError("划分出错,请检查列表与划分比例!参考格式train,val->0.7,0.1")

        if shuffle:
            random.seed(seed)
            random.shuffle(full_list)
        train_list = full_list[:offset_train]
        val_list = full_list[offset_train:offset_trainval]
        test_list = full_list[offset_trainval:]
        return train_list, val_list, test_list

    def voc2yolov8(self, train, val, shuffle=True, seed=10):
        """
        :param train: 训练集比例
        :param val: 验证集比例
        """
        obj_classes, files = self.get_classes()

        os.makedirs(os.path.join(self.save_path, "images/train"), exist_ok=True)
        os.makedirs(os.path.join(self.save_path, "images/val"), exist_ok=True)
        os.makedirs(os.path.join(self.save_path, "images/test"), exist_ok=True)

        os.makedirs(os.path.join(self.save_path, "labels/train"), exist_ok=True)
        os.makedirs(os.path.join(self.save_path, "labels/val"), exist_ok=True)
        os.makedirs(os.path.join(self.save_path, "labels/test"), exist_ok=True)

        classes = list(obj_classes.keys())
        train_list, val_list, test_list = self.data_split(files, train, val, shuffle, seed)
        for file in tqdm(files):
            if ".xml" in file:
                name = file.replace("\\", "/").split("/")[-1].split(".")[0]
                xml_file = file.replace("\\", "/")
                jpg_file = os.path.join(self.jpg_path, name + ".jpg").replace("\\", "/")
                img_w, img_h = Image.open(jpg_file).size

                if file in val_list:
                    txt_save_path = os.path.join(self.save_path, "labels/val", name + ".txt")
                    self.xml2txt(classes, xml_file, txt_save_path, img_w, img_h)
                elif file in test_list:
                    txt_save_path = os.path.join(self.save_path, "labels/test", name + ".txt")
                    self.xml2txt(classes, xml_file, txt_save_path, img_w, img_h)
                else:
                    txt_save_path = os.path.join(self.save_path, "labels/train", name + ".txt")
                    self.xml2txt(classes, xml_file, txt_save_path, img_w, img_h)

                if self.divide:
                    if file in val_list:
                        shutil.copy(jpg_file, os.path.join(self.save_path, "images/val", name + ".jpg"))
                    elif file in test_list:
                        shutil.copy(jpg_file, os.path.join(self.save_path, "images/test", name + ".jpg"))
                    else:
                        shutil.copy(jpg_file, os.path.join(self.save_path, "images/train", name + ".jpg"))

        # 编写yaml文件
        classes_txt = {i: classes[i] for i in range(len(classes))}  # 标签类别
        data = {
            'path': os.path.join(os.getcwd(), self.save_path),
            'train': "images/train",
            'val': "images/val",
            'test': "images/test",
            'names': classes_txt,
            'download': ''
        }
        with open(self.save_path + '/dataset.yaml', 'w', encoding="utf-8") as file:
            yaml.dump(data, file, allow_unicode=True)
        print("标签:", dict(obj_classes))
        print("有标签文件数量:", len(files))


if __name__ == '__main__':
    jpg_path = r"datasets/detect/JPEGImages"  # 你的图片路径
    xml_path = r"datasets/detect/Annotations"  # 你的xml路径
    save_path = r"datasets/detect"  # 保存数据路径
    deals = Dataset_Transforme_Yolov8(jpg_path, xml_path, save_path, divide=True)
    deals.voc2yolov8(0.9, 0.1)  # 训练集与验证集占比

运行后生成两个文件夹和一个yaml文件

images和labels是分好的数据集格式,yaml文件是用于训练的

至此,数据集转换完成

三、训练

3.1 

在ultralytics/models/yolo/detect,这个路径下创建一个train_my.py

将以下内容复制到新建的py文件

from ultralytics import YOLO

# Load a model
model = YOLO("yolov8n.pt")  # load a pretrained model (recommended for training)


# Train the model
results = model.train(task='detect',data="datasets/detect/dataset.yaml", model='yolov8n.pt', epochs=300, imgsz=640, batch=16, workers=0,
                      resume=False)

根据自己情况修改参数,或直接去ultralytics/cfg/default.yaml路径下修改,是 一样的。

注意,参数设置优先是results = model.train(task='detect',data="datasets/detect/dataset.yaml", model='yolov8n.pt', epochs=300, imgsz=640, batch=16, workers=0,                       resume=False)这个里面的。

如果采用default.yaml,则需要将上面代码的参数删除

eg:results = model.train()

3.2 

训练后结果会保存在ultralytics/models/yolo/detect/runs下面

  • 5
    点赞
  • 7
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值