yolov5 8 labelme labelimg数据标注并生成训练数据集训练

图像处理大大大大大牛啊

已于 2023-08-06 22:11:52 修改

阅读量3.3k

点赞数

文章标签： YOLO python 开发语言

于 2023-05-10 13:35:55 首次发布

本文链接：https://blog.csdn.net/qq_26696715/article/details/130118379

版权

yolov5,yolov8 改进，创新，涨点专栏收录该内容

3 篇文章 0 订阅

订阅专栏

yolov5 8 labelme labelimg数据标注数据集生成训练终极教程

一.数据集准备文件夹结构，数据集标注
二.转换为yolo 数据集
三. 数据集yaml 文件
- yolov5的数据配置yaml文件
- yolov8的数据配置yaml文件
四. 训练
- 1. yolov5
- 2. yolov8

一.数据集准备文件夹结构，数据集标注

创建一个data 文件夹
在data文件夹下创建一个images 文件夹
将所有图片数据放入images文件夹下

使用labelme或者labelimg标注数据

python环境下使用 pip install labelme 安装labelme,使用 pip install labelimg 安装labelimg
在cmd 中使用命令 labelme 或者 labelimg命令打开软件
进行标注,将标注文件保存在图片的相同目录下面
将标注文件和原图都放在images文件夹

标注完成后 images 文件夹下存在原图和标注的json 文件或者xml文件

在这里插入图片描述

二.转换为yolo 数据集

然后运行以下代码修改为自己的类别, 以及比例。
会直接生成labels 标签和train,val txt文件

1. labelimg 转换

import json
import cv2
import numpy as np
import glob
import os
import xml.etree.ElementTree as ET

def split_by_ratio(arr, *ratios):
    """
    按比例拆分数组
    :param arr:
    :param ratios: 该参数的个数即为子数组的个数 eg: 0.5,0.5即为拆分两个各占50%的子数组
    :return:
    """
    arr = np.random.permutation(arr)
    ind = np.add.accumulate(np.array(ratios) * len(arr)).astype(int)
    return [x.tolist() for x in np.split(arr, ind)][:len(ratios)]

def convert_annotation(t):
    ishas = False
    basename = t.split("/")[-1].split("\\")[-1].split(".")[0]
    with open(t, 'r', encoding='utf-8') as ft:
        tree = ET.parse(ft)
        root = tree.getroot()
        
        size = root.find('size')
        width = int(size.find('width').text)
        height = int(size.find('height').text)


        
        for obj in root.iter('object'):
            cls = obj.find('name').text
            if cls in class_names:
                ishas = True
        if not ishas:return ishas

        with open("labels/"+basename + ".txt", 'w') as fa:
            for obj in root.iter('object'):
                cls = obj.find('name').text
                if cls not in class_names:continue
                class_id  = class_names.index(cls)
                
                xmlbox = obj.find('bndbox')
                x1,x2,y1,y2 = float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text),float(xmlbox.find('ymax').text)
                
                print(x1,x2,y1,y2,width,height)
                x_center = (x1 + x2) / 2 / width
                y_center = (y1 + y2) / 2 / height
                w = abs(x2 - x1) / width
                h = abs(y2 - y1) / height
                print(x_center,y_center,w,h)
                fa.write(f"{class_id} {x_center} {y_center} {w} {h}\n")

    return ishas

# 改为自己的类别
class_names = ['persona']

if __name__=="__main__":
    # 文件列表
    xml_list = glob.glob("images/*.xml")
    np.random.shuffle(xml_list)
    trains,vals = split_by_ratio(xml_list,0.7,0.3)

    # 训练文件夹
    if not os.path.exists("labels"):
        os.makedirs("labels")

    
    with open('train.txt', 'w') as f:
        for t in trains:
            basename = t.split("/")[-1].split("\\")[-1].split(".")[0]
            
            ishas = convert_annotation(t)
            if ishas:
                # yololabels
                out_txt_file = "../data/images/" +basename + ".jpg\n"
                f.write(out_txt_file)


    with open('val.txt', 'w') as f:
        for t in vals:
            basename = t.split("/")[-1].split("\\")[-1].split(".")[0]
            
            ishas = convert_annotation(t)
            if ishas:
                # yololabels
                out_txt_file = "../data/images/" + basename+ ".jpg\n"
                f.write(out_txt_file)

2. labelme 转换目标检测数据集

import json
import cv2
import numpy as np
import glob
import os

def split_by_ratio(arr, *ratios):
    """
    按比例拆分数组
    :param arr:
    :param ratios: 该参数的个数即为子数组的个数 eg: 0.5,0.5即为拆分两个各占50%的子数组
    :return:
    """
    arr = np.random.permutation(arr)
    ind = np.add.accumulate(np.array(ratios) * len(arr)).astype(int)
    return [x.tolist() for x in np.split(arr, ind)][:len(ratios)]

def convert_json(t):

    ishas = False

    basename = t.split("/")[-1].split("\\")[-1].split(".")[0]
    with open(t, 'r', encoding='utf-8') as ft:
        data = json.load(ft)
            
        for shape in data['shapes']:
            if shape['label'] in class_names:
                ishas = True
        if not ishas:return ishas

        height = data["imageHeight"]
        width = data["imageWidth"]
        with open("labels/"+basename+ ".txt", 'w') as fa:
            for shape in data['shapes']:
                assert shape['label'] in class_names, f"Error: {shape['label']} not found in {class_names}"
                class_id  = class_names.index(shape['label'])

                x1, y1 = shape['points'][0]
                x2, y2 = shape['points'][1]
                x_center = (x1 + x2) / 2 / width
                y_center = (y1 + y2) / 2 / height
                width = abs(x2 - x1) / width
                height = abs(y2 - y1) / height

                fa.write(f"{class_id} {x_center} {y_center} {width} {height}\n")


    return ishas
# 类别
class_names = ['glass']

if __name__=="__main__":
    # 文件列表
    json_list = glob.glob("images/*.json")
    np.random.shuffle(json_list)
    trains,vals = split_by_ratio(json_list,0.9,0.1)

    # 训练文件夹
    if not os.path.exists("labels"):
        os.makedirs("labels")

    
    with open('train.txt', 'w') as f:
        for t in trains:
            basename = t.split("/")[-1].split("\\")[-1].split(".")[0]
            ishas = convert_json(t)
            if ishas:
                # yololabels
                out_txt_file = "../data/images/" +basename + ".jpg\n"
                f.write(out_txt_file)


    with open('val.txt', 'w') as f:
        for t in vals:
            basename = t.split("/")[-1].split("\\")[-1].split(".")[0]
            ishas = convert_json(t)
            if ishas:
                out_txt_file = "../data/images/" + basename+ ".jpg\n"
                f.write(out_txt_file)

3. labelme 转换目标分割数据集

import json
import cv2
import numpy as np
import glob
import os

def split_by_ratio(arr, *ratios):
    """
    按比例拆分数组
    :param arr:
    :param ratios: 该参数的个数即为子数组的个数 eg: 0.5,0.5即为拆分两个各占50%的子数组
    :return:
    """
    arr = np.random.permutation(arr)
    ind = np.add.accumulate(np.array(ratios) * len(arr)).astype(int)
    return [x.tolist() for x in np.split(arr, ind)][:len(ratios)]

def convert_json(t):

    ishas = False

    basename = t.split("/")[-1].split("\\")[-1].split(".")[0]
    with open(t, 'r', encoding='utf-8') as ft:
        data = json.load(ft)
            
        for shape in data['shapes']:
            if shape['label'] in class_names:
                ishas = True
        if not ishas:return ishas

        height = data["imageHeight"]
        width = data["imageWidth"]
        with open("labels/"+basename+ ".txt", 'w') as fa:
            s="" # 用来储藏txt中的内容
            for shape in data["shapes"]: # 遍历数据集中每一个分割子类
                assert shape['label'] in class_names, f"Error: {shape['label']} not found in {class_names}"
                class_id  = class_names.index(shape['label'])

                s = s+str(class_id)+" "

                points = shape["points"]
                for point in points:
                    s=s+str(point[0]/width)+" "
                    s=s+str(point[1]/height)+" "
                s = s[:-1]+"\n"

            fa.write(s)


    return ishas
# 类别
class_names = ['glass']

if __name__=="__main__":
    # 文件列表
    json_list = glob.glob("images/*.json")
    np.random.shuffle(json_list)
    trains,vals = split_by_ratio(json_list,0.7,0.3)

    # 训练文件夹
    if not os.path.exists("labels"):
        os.makedirs("labels")

    
    with open('train.txt', 'w') as f:
        for t in trains:
            basename = t.split("/")[-1].split("\\")[-1].split(".")[0]
            ishas = convert_json(t)
            if ishas:
                # yololabels
                out_txt_file = "../data/images/" +basename + ".jpg\n"
                f.write(out_txt_file)


    with open('val.txt', 'w') as f:
        for t in vals:
            basename = t.split("/")[-1].split("\\")[-1].split(".")[0]
            ishas = convert_json(t)
            if ishas:
                out_txt_file = "../data/images/" + basename+ ".jpg\n"
                f.write(out_txt_file)

三. 数据集yaml 文件

yolov5的数据配置yaml文件

train: ../data/train.txt   # 此路径为相对路径, 如果运行路径在yolov5 文件夹下 就不需要path 路径
val: ../data/val.txt  

nc: 1 # number of classes
names: ['bird']  # class names

yolov8的数据配置yaml文件

因为yolov8 采用pip直接安装所以需要知道path 路径，此时的train 文件就是相对于path路径。
如果是像yolov5一样clone仓库也可以采用yolov5 的写法,是通用的。

path: E:\\Backup\\Desktop\\yolov8-acne\\data 
train: train.txt  
val: val.txt  

# Classes
names:
  0: acne_white
  1: acne_red

四. 训练

1. yolov5


python train.py --weights yolov5n.pt --data data/my.yaml --cfg models/yolov5n.yaml --imgsz 640 --batch-size -1 --epochs 300  --cos-lr --patience 10  --name yolov5n

2. yolov8

采用python 文件的方式训练
注意自定义网络结构,命名不需要带后面的 n,s 。在代码里面添加会自带去识别。

from ultralytics import YOLO
if __name__=='__main__':

	# 目标检测
    # Create a new YOLO model from scratch
	model = YOLO('yolov8n.yaml')  # build a new model from YAML
	model = YOLO('yolov8n.pt')  # load a pretrained model (recommended for training)
	model = YOLO('yolov8n.yaml').load('yolov8n.pt')  # build from YAML and transfer weights
		
	# 目标分割
	# Load a model
	model = YOLO('yolov8n-seg.yaml')  # build a new model from YAML
	model = YOLO('yolov8n-seg.pt')  # load a pretrained model (recommended for training)
	model = YOLO('yolov8n-seg.yaml').load('yolov8n.pt')  # build from YAML and transfer weights
    # Train the model using the 'coco128.yaml' dataset for 3 epochs
    results = model.train(data='data/my.yaml', epochs=300,batch=16,workers=1,imgsz=640)

    # Evaluate the model's performance on the validation set
    results = model.val()

    # Export the model to ONNX format
    success = model.export(format='onnx', opset=12,imgsz=640,simplify=True,half=True)