YOLOv8-Detect训练CoCo数据集+自己的数据集

最新推荐文章于 2024-09-14 10:43:42 发布

_Mamba24

最新推荐文章于 2024-09-14 10:43:42 发布

阅读量4k

点赞数 26

分类专栏： YOLOv8 文章标签： YOLO 人工智能深度学习

本文链接：https://blog.csdn.net/qq_54134410/article/details/135992943

版权

YOLOv8 专栏收录该内容

6 篇文章 2 订阅

订阅专栏

2.2、准备用于YOLOv8-Detect训练的txt

2.2.1 将CoCo数据集Json格式转为Xml

2.2.2 将CoCo数据集Xml整合到一个大的txt文件

2.2.3 将自己标注的xml整合到一个大的txt文件

2.2.4 将生成的txt合并分成训练集和训练集

2.2.5 将大的txt分解成多个小txt，txt以图片名字命名

2.2.6 将大的txt中对应的路径的图片复制到images/train

2.2.7 验证生成的txt是否准确

3、创建配置文件

3.1、设置官方配置文件：default.yaml，可自行修改。

0、引言

本文是使用YOLOv8-Detect训练自己的数据集+CoCo数据集，数据集包含COCO数据集的人猫狗数据以及自己制作的人猫狗数据集，类别为0:person、1:cat、2:dog三类，大家可根据自己的数据集类别进行调整。

1、环境准备

可以参考这篇博客：深度学习环境搭建-CSDN博客

本文环境：

Windows10
python:3.10
cuda:11.6
pytorch:1.12.0
torchvision:0.13.0

2、数据集准备

2.1、创建数据集存放的文件夹

mydata
______images
____________train
_________________001.jpg
____________val
_________________002.jpg
______labels
____________train
_________________001.txt
____________val
_________________002.txt

本人的数据都存放在mydata文件夹中（自定义命名）

目录结构如下：images存放训练集和验证集图片，labels存放训练集和验证集txt

2.2、准备用于YOLOv8-Detect训练的txt

2.2.1 将CoCo数据集Json格式转为Xml

从官网下载CoCo数据集的json文件

import argparse, json
import cytoolz
from lxml import etree, objectify
import os, re
 
def instance2xml_base(anno):
    E = objectify.ElementMaker(annotate=False)
    anno_tree = E.annotation(
        E.folder('VOC2014_instance/{}'.format(anno['category_id'])),
        E.filename(anno['file_name']),
        E.source(
            E.database('MS COCO 2014'),
            E.annotation('MS COCO 2014'),
            E.image('Flickr'),
            E.url(anno['coco_url'])
        ),
        E.size(
            E.width(anno['width']),
            E.height(anno['height']),
            E.depth(3)
        ),
        E.segmented(0),
    )
    return anno_tree
 
def instance2xml_bbox(anno, bbox_type='xyxy'):
    """bbox_type: xyxy (xmin, ymin, xmax, ymax); xywh (xmin, ymin, width, height)"""
    assert bbox_type in ['xyxy', 'xywh']
    if bbox_type == 'xyxy':
        xmin, ymin, w, h = anno['bbox']
        xmax = xmin+w
        ymax = ymin+h
    else:
        xmin, ymin, xmax, ymax = anno['bbox']
    E = objectify.ElementMaker(annotate=False)
    anno_tree = E.object(
        E.name(anno['category_id']),
        E.bndbox(
            E.xmin(xmin),
            E.ymin(ymin),
            E.xmax(xmax),
            E.ymax(ymax)
        ),
        E.difficult(anno['iscrowd'])
    )
    return anno_tree
 
 
def parse_instance(content, outdir):
    # print('11111',content)
    categories = {d['id']: d['name'] for d in content['categories']}
    # merge images and annotations: id in images vs image_id in annotations
    merged_info_list = list(map(cytoolz.merge, cytoolz.join('id', content['images'], 'image_id', content['annotations'])))
 
    # print('111111111111',merged_info_list)
    # convert category id to name
    for instance in merged_info_list:
        instance['category_id'] = categories[instance['category_id']]
    # group by filename to pool all bbox in same file
    for name, groups in cytoolz.groupby('file_name', merged_info_list).items():
        anno_tree = instance2xml_base(groups[0])
        # if one file have multiple different objects, save it in each category sub-directory
        filenames = []
        # print(groups)
        for group in groups:
            filenames.append(os.path.join(outdir, re.sub(" ", "_", group['category_id']), os.path.splitext(name)[0] + ".xml"))
            anno_tree.append(instance2xml_bbox(group, bbox_type='xyxy'))
        for filename in filenames:
            etree.ElementTree(anno_tree).write(filename, pretty_print=True)
        print("Formating instance xml file {} done!".format(name))
 
def main(args):
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)
    content = json.load(open(args.anno_file, 'r'))
    if args.type == 'instance':
        # make subdirectories
        sub_dirs = [re.sub(" ", "_", cate['name']) for cate in content['categories']]
        for sub_dir in sub_dirs:
            sub_dir = os.path.join(args.output_dir, str(sub_dir))
            if not os.path.exists(sub_dir):
                os.makedirs(sub_dir)
        parse_instance(content, args.output_dir)
 
 
 
if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--anno_file", help="annotation file for object instance/keypoint", default=r'D:\CoCoData\annotations_14_17\instances_train2017.json')
    parser.add_argument("--type", type=str, default='instance', help="object instance or keypoint", choices=['instance', 'keypoint'])
    parser.add_argument("--output_dir", help="output directory for voc annotation xml file", default=r'D:\CoCoData\CoCoXml')
    args = parser.parse_args()
    main(args)

修改文件存放的路径和保存xml文件的路径。

运行后得到80个文件夹，分别为80类的xml文件：

再将需要类别的xml文件整合，得到训练集和验证集xml

2.2.2 将CoCo数据集Xml整合到一个大的txt文件

import os
import random
import xml.etree.ElementTree as ET

import glob


classes = ['person', 'cat', 'dog']

def convert(size, box):
    # print('size--',size)
    dw = 1. / size[0]
    dh = 1. / size[1]
    x = (box[0] + box[2]) / 2.0
    y = (box[1] + box[3]) / 2.0
    w = min(size[0],box[2] - box[0])
    h = min(size[1],box[3] - box[1])
    # print(x,y,w,h)
    x = x * dw
    w = w * dw
    y = y * dh
    h = h * dh
    return (x, y, w, h)
def convert_annotation(xml, list_file):
    in_file = open(os.path.join(xml), encoding='utf-8')
    tree = ET.parse(in_file)
    root = tree.getroot()


    xmlsize = root.find('size')

    w = int(xmlsize.find('width').text)
    h = int(xmlsize.find('height').text)
    # print(w,h)

    for obj in root.iter('object'):
        difficult = 0
        if obj.find('difficult') != None:
            difficult = obj.find('difficult').text
        cls = obj.find('name').text
        if cls not in classes or int(difficult) == 1:
            # print(cls, "------------------------------------\n")
            continue
        cls_id = classes.index(cls)
        # print(cls, cls_id)
        xmlbox = obj.find('bndbox')
        x0 = float(xmlbox.find('xmin').text)
        y0 = float(xmlbox.find('ymin').text)
        x1 = float(xmlbox.find('xmax').text)
        y1 = float(xmlbox.find('ymax').text)
        xmin = min(x0, x1)
        ymin = min(y0, y1)
        xmax = max(x0, x1)
        ymax = max(y0, y1)

        # b = (int(xmin), int(ymin), int(xmax), int(ymax))
        b = (float(xmin), float(ymin), float(xmax), float(ymax))
        # print((w, h))
        # if w==0 or h == 0 :
        #     print('11111111111')
        bb = convert((w, h), b)
        # print(bb)

        list_file.write(" "+str(cls_id) +"," + ",".join([str(a) for a in bb]))


if __name__ == "__main__":
    random.seed(0)

    # 图片路径
    cocoImgPath = r'I:\allShare\CoCoData\train2017'
    # Xml路径
    cocoXmlPath = r'G:\CoCopcd_xml\cocotrain_pcd'
    #txt保存路径
    txtsavepath = r'G:\Yolov8\ultralytics-main\datasets\mydata\coco_v8txt'
    fileWriteTxt = txtsavepath + '\\'+ cocoImgPath.split('\\')[-1] + '_v8.txt'
    xmls = glob.glob(os.path.join(cocoXmlPath, '*.xml'))
    list_file = open(fileWriteTxt, 'w', encoding='utf-8')

    for xml in xmls:
        img = xml.replace(cocoXmlPath, cocoImgPath).replace('.xml', '.jpg')
        if not os.path.exists(img):
            print(img, ' is not exit')
            continue
        # print(img)
        list_file.write(img)
        convert_annotation(xml, list_file)
        list_file.write('\n')
    list_file.close()

分别运行train2017和val2017后得到：

2.2.3 将自己标注的xml整合到一个大的txt文件

批量处理多个文件夹：

import os
import random
import xml.etree.ElementTree as ET

import glob


classes = ['person', 'cat', 'dog']

def convert(size, box):
    # print('size--',size)
    dw = 1. / size[0]
    dh = 1. / size[1]
    x = (box[0] + box[2]) / 2.0
    y = (box[1] + box[3]) / 2.0
    w = min(size[0],box[2] - box[0])
    h = min(size[1],box[3] - box[1])
    # print(x,y,w,h)
    x = x * dw
    w = w * dw
    y = y * dh
    h = h * dh
    return (x, y, w, h)
def convert_annotation(xml, list_file):
    in_file = open(os.path.join(xml), encoding='utf-8')
    tree = ET.parse(in_file)
    root = tree.getroot()


    xmlsize = root.find('size')

    w = int(xmlsize.find('width').text)
    h = int(xmlsize.find('height').text)
    # print(w,h)

    for obj in root.iter('object'):
        difficult = 0
        if obj.find('difficult') != None:
            difficult = obj.find('difficult').text
        cls = obj.find('name').text
        if cls not in classes or int(difficult) == 1:
            # print(cls, "------------------------------------\n")
            continue
        cls_id = classes.index(cls)
        # print(cls, cls_id)
        xmlbox = obj.find('bndbox')
        x0 = float(xmlbox.find('xmin').text)
        y0 = float(xmlbox.find('ymin').text)
        x1 = float(xmlbox.find('xmax').text)
        y1 = float(xmlbox.find('ymax').text)
        xmin = min(x0, x1)
        ymin = min(y0, y1)
        xmax = max(x0, x1)
        ymax = max(y0, y1)

        # b = (int(xmin), int(ymin), int(xmax), int(ymax))
        b = (float(xmin), float(ymin), float(xmax), float(ymax))
        # print((w, h))
        # if w==0 or h == 0 :
        #     print('11111111111')
        bb = convert((w, h), b)
        # print(bb)

        list_file.write(" "+str(cls_id) +"," + ",".join([str(a) for a in bb]))


if __name__ == "__main__":
    random.seed(0)

    pathdir = r'G:\dataset\selfXml'
    for path, dddddd, file_name in os.walk(pathdir):
        print(path)
        # 图片路径
        cocoImgPath = path
        # Xml路径
        cocoXmlPath = path
        #txt保存路径
        txtsavepath = r'G:\ultralytics-main\datasets\mydata\self_v8txt'
        fileWriteTxt = txtsavepath + '\\'+ cocoImgPath.split('\\')[-1] + '_v8.txt'
        xmls = glob.glob(os.path.join(cocoXmlPath, '*.xml'))
        list_file = open(fileWriteTxt, 'w', encoding='utf-8')

        for xml in xmls:
            img = xml.replace(cocoXmlPath, cocoImgPath).replace('.xml', '.jpg')
            if not os.path.exists(img):
                print(img, ' is not exit')
                continue
            # print(img)
            list_file.write(img)
            convert_annotation(xml, list_file)
            list_file.write('\n')
        list_file.close()

此代码运行后会得到一个空的txt，删除即可

运行以上代码可得到txt，内容如下：图片路径，classID，xmin，ymin，xmax，ymax （归一化）

2.2.4 将生成的txt合并分成训练集和训练集

将以上步骤得到的txt放在同一文件夹下

运行以下代码合并分割训练集和验证集，自行调整 ratio

import random
import os
import glob


# 转化数据
record_jpg_fold_root = r'G:\Yolov8\ultralytics-main\datasets\mydata\self_v8txt'

ratio = 0.05
# ratio = 1
record_jpg_files = os.listdir(record_jpg_fold_root)
record_jpg_files = glob.glob(os.path.join(record_jpg_fold_root, '*.txt'))

train_files = []
valid_files = []
datas = []
for record_file in record_jpg_files:
    record_file_path = os.path.join(record_jpg_fold_root, record_file)
    f = open(record_file_path, 'r')
    datas_temp = f.readlines()
    total_num = len(datas_temp)
    valid_datas_num = int(total_num * ratio)
    valid_datas = random.sample(datas_temp, valid_datas_num)
    datas += datas_temp # all datas
    valid_files += valid_datas # valid datas

# print(datas)
# datas = datas.split('\n')
# print(datas)
random.shuffle(datas)
# print(files)
# total_num = len(datas)
# valid_datas_num = int(total_num * 0.1)
# valid_datas = random.sample(datas, valid_datas_num)

traintxtdir = r'G:\Yolov8\ultralytics-main\datasets\mydata\merge_v8txt\merge_CoCo_pcdtrain_01.txt'
valtxtdir = r'G:\Yolov8\ultralytics-main\datasets\mydata\merge_v8txt\merge_CoCo_pcdval_01.txt'

with open(traintxtdir, 'w') as ft, \
        open(valtxtdir, 'w') as fv:
    for file in datas:
        if file in valid_files:
            fv.write(file)
        else:
            ft.write(file)

2.2.5 将大的txt分解成多个小txt，txt以图片名字命名

注意检查保存路径


import glob
import os



txtpathdir = r'G:\Yolov8\ultralytics-main\datasets\mydata\merge_v8txt\merge_CoCo_pcdtrain_01.txt'
#保存txt路径
savetxtpath = r'G:\Yolov8\ultralytics-main\datasets\mydata\labels\train'


file = open(txtpathdir,'r',encoding='utf-8')
lines = file.readlines()
for line in lines:
    line = line.split('\n')[0]
    # print(line)
    imgdir = line.split(' ')[0]
    bboxinfo = line.split(' ')[1:]
    # print('imgdir',imgdir)
    savetxtdir = os.path.join(savetxtpath,imgdir.split('\\')[-1].replace('.jpg','.txt'))
    file = open(savetxtdir, 'w', encoding='utf-8')
    # print(savetxtdir)
    for i in range(len(bboxinfo)):
        # print(bboxinfo[i])
        info = bboxinfo[i].split(',')
        # print(info)
        info1 = ' '.join(info)
        print(info1)
        file.write(info1+'\n')

运行后得到多个训练和验证txt：

txt的内容要与官方格式一致：

分别代表：类别、框的中心点xy坐标以及框的宽高（进行了归一化处理）

2.2.6 将大的txt中对应的路径的图片复制到images/train


import glob
import os
import shutil

txtpathdir = r'G:\Yolov8\ultralytics-main\datasets\mydata\merge_v8txt\merge_CoCo_pcdtrain_01.txt'
saveimgpath = r'G:\Yolov8\ultralytics-main\datasets\mydata\images\train'
file = open(txtpathdir,'r',encoding='utf-8')
lines = file.readlines()
for line in lines:
    line = line.split('\n')[0]
    imgdir = line.split(' ')[0]
    # print('imgdir',imgdir)
    saveimgdir = os.path.join(saveimgpath,imgdir.split('\\')[-1])
    # print(saveimgdir)

    shutil.copy(imgdir,saveimgdir)
print('end.....')

2.2.7 验证生成的txt是否准确


import os
import glob
import cv2
imgdir = r'G:\Yolov8\ultralytics-main\datasets\mydata\images\val'
txtdir = r'G:\Yolov8\ultralytics-main\datasets\mydata\labels\val'
list1 = glob.glob(os.path.join(imgdir,'*.jpg'))
list2 = glob.glob(os.path.join(txtdir,'*.txt'))
idx = 0
# print(len(list1))
# print(len(list2))
while (idx < len(list1)):

    imgpath = list1[idx]
    txtpath = list2[idx]
    print(imgpath)
    print(txtpath)
    f = open(txtpath,'r')
    lines = f.readlines()
    img = cv2.imread(imgpath)
    h, w, c = img.shape
    colors = [[0, 0, 255],[0, 255, 0], [0, 255, 255]]
    # print(lines)
    for line in lines:
        # print(line)
        l = line.split(' ')
        # print(len(l))
        label = l[0]
        cx = float(l[1]) * w
        cy = float(l[2]) * h
        weight = float(l[3]) * w
        height = float(l[4]) * h
        xmin = cx - weight/2
        ymin = cy - height/2
        xmax = cx + weight/2
        ymax = cy + height/2
        print(label,(xmin,ymin),(xmax,ymax))
        color = colors[int(label)]
        cv2.putText(img,label,(int(xmin),int(ymin)),cv2.FONT_HERSHEY_SIMPLEX,0.8,color,1,cv2.LINE_AA)
        cv2.rectangle(img,(int(xmin),int(ymin)),(int(xmax),int(ymax)),color,2)
        kpts = []

    img = cv2.resize(img, None, fx=1, fy=1)
    cv2.imshow('1',img)
    key = cv2.waitKey(0)
    if key == ord('q'):
        break
    if key == ord('z'):
        idx -=1
    else:
        idx += 1

将txt信息可视化：

完成以上步骤即将训练和验证的数据集准备完成。

3、创建配置文件

3.1、设置官方配置文件：default.yaml，可自行修改。

所有参数参见：Configuration - Ultralytics YOLOv8 Docs

3.2、设置data.yaml

根据自己的数据集位置进行修改和配置。

path: D:\Yolov8\ultralytics-main\datasets\mydata  # dataset root dir
train: images/train  # train images (relative to 'path') 118287 images
val: images/val  # val images (relative to 'path') 5000 images
#test: test-dev2017.txt  # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
 
# Classes
names:
  0: person
  1: cat
  2: dog
nc: 3

3.3、设置yolov8s.yaml

根据自己想使用的权重进行选择，我这里采用的是yolov8s.pt进行训练，类别为3。

# Ultralytics YOLO 🚀, AGPL-3.0 license
# YOLOv8 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
 
# Parameters
nc: 3  # number of classes
scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
  # [depth, width, max_channels]
  s: [0.33, 0.50, 1024]  # YOLOv8s summary: 225 layers, 11166560 parameters, 11166544 gradients,  28.8 GFLOPs
 
 
# YOLOv8.0n backbone
backbone:
  # [from, repeats, module, args]
  - [-1, 1, Conv, [64, 3, 2]]  # 0-P1/2
  - [-1, 1, Conv, [128, 3, 2]]  # 1-P2/4
  - [-1, 3, C2f, [128, True]]
  - [-1, 1, Conv, [256, 3, 2]]  # 3-P3/8
  - [-1, 6, C2f, [256, True]]
  - [-1, 1, Conv, [512, 3, 2]]  # 5-P4/16
  - [-1, 6, C2f, [512, True]]
  - [-1, 1, Conv, [1024, 3, 2]]  # 7-P5/32
  - [-1, 3, C2f, [1024, True]]
  - [-1, 1, SPPF, [1024, 5]]  # 9
 
# YOLOv8.0n head
head:
  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
  - [[-1, 6], 1, Concat, [1]]  # cat backbone P4
  - [-1, 3, C2f, [512]]  # 12
 
  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
  - [[-1, 4], 1, Concat, [1]]  # cat backbone P3
  - [-1, 3, C2f, [256]]  # 15 (P3/8-small)
 
  - [-1, 1, Conv, [256, 3, 2]]
  - [[-1, 12], 1, Concat, [1]]  # cat head P4
  - [-1, 3, C2f, [512]]  # 18 (P4/16-medium)
 
  - [-1, 1, Conv, [512, 3, 2]]
  - [[-1, 9], 1, Concat, [1]]  # cat head P5
  - [-1, 3, C2f, [1024]]  # 21 (P5/32-large)
 
  - [[15, 18, 21], 1, Detect, [nc]]  # Detect(P3, P4, P5)

4、进行训练

上述步骤完成后，即可开始训练。

from ultralytics import YOLO
 
if __name__ == '__main__':
    # modelpath = r'D:\Yolov8\yolov8-detect-pt\yolov8s.pt'
 
    model = YOLO('yolov8s.yaml')  # load a pretrained model (recommended for training)
    # Train the model
    model.train(data='data.yaml')

也可以不使用yaml文件，直接读取.pt文件

from ultralytics import YOLO
 
if __name__ == '__main__':
    modelpath = r'G:\\ultralytics-main\yolov8-detect\yolov8s.pt'
 
    model = YOLO(modelpath)  # load a pretrained model (recommended for training)
    # Train the model
    model.train(data='data.yaml')

训练过程：

训练过程中会保存以下内容，最后得到两个模型分别是：best.pt、last.pt

5、验证模型

训练进程完毕以后可使用一些验证数据进行模型验证，查看模型的识别效果。

from ultralytics import YOLO
import glob
import os
# Load a model
model = YOLO(r'D:\Yolov8\yolov8-detect-pt\best.pt')  # load an official model
 
# Predict with the model
imgpath = r'D:\Yolov8\ultralytics-main\testimgs'
imgs = glob.glob(os.path.join(imgpath,'*.jpg'))
for img in imgs:
    model.predict(img, save=True)

预测结果：