yolov5 8 labelme labelimg数据标注 并生成训练数据集 训练

一.数据集准备 文件夹结构,数据集标注

  1. 创建一个data 文件夹
  2. 在data文件夹下创建一个images 文件夹
  3. 将所有图片数据放入images文件夹下

使用labelme或者labelimg标注数据

  1. python环境下使用 pip install labelme 安装labelme,使用 pip install labelimg 安装labelimg
  2. 在cmd 中使用命令 labelme 或者 labelimg命令打开软件
  3. 进行标注,将标注文件保存在图片的相同目录下面
  4. 将标注文件和原图都放在images文件夹

标注完成后 images 文件夹下 存在原图和标注的json 文件或者xml文件

在这里插入图片描述

二.转换为yolo 数据集

然后运行以下代码 修改为自己的类别, 以及比例。
会直接生成labels 标签和train,val txt文件

1. labelimg 转换

import json
import cv2
import numpy as np
import glob
import os
import xml.etree.ElementTree as ET

def split_by_ratio(arr, *ratios):
    """
    按比例拆分数组
    :param arr:
    :param ratios: 该参数的个数即为子数组的个数 eg: 0.5,0.5即为拆分两个各占50%的子数组
    :return:
    """
    arr = np.random.permutation(arr)
    ind = np.add.accumulate(np.array(ratios) * len(arr)).astype(int)
    return [x.tolist() for x in np.split(arr, ind)][:len(ratios)]

def convert_annotation(t):
    ishas = False
    basename = t.split("/")[-1].split("\\")[-1].split(".")[0]
    with open(t, 'r', encoding='utf-8') as ft:
        tree = ET.parse(ft)
        root = tree.getroot()
        
        size = root.find('size')
        width = int(size.find('width').text)
        height = int(size.find('height').text)


        
        for obj in root.iter('object'):
            cls = obj.find('name').text
            if cls in class_names:
                ishas = True
        if not ishas:return ishas

        with open("labels/"+basename + ".txt", 'w') as fa:
            for obj in root.iter('object'):
                cls = obj.find('name').text
                if cls not in class_names:continue
                class_id  = class_names.index(cls)
                
                xmlbox = obj.find('bndbox')
                x1,x2,y1,y2 = float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text),float(xmlbox.find('ymax').text)
                
                print(x1,x2,y1,y2,width,height)
                x_center = (x1 + x2) / 2 / width
                y_center = (y1 + y2) / 2 / height
                w = abs(x2 - x1) / width
                h = abs(y2 - y1) / height
                print(x_center,y_center,w,h)
                fa.write(f"{class_id} {x_center} {y_center} {w} {h}\n")

    return ishas

# 改为自己的类别
class_names = ['persona']

if __name__=="__main__":
    # 文件列表
    xml_list = glob.glob("images/*.xml")
    np.random.shuffle(xml_list)
    trains,vals = split_by_ratio(xml_list,0.7,0.3)

    # 训练文件夹
    if not os.path.exists("labels"):
        os.makedirs("labels")

    
    with open('train.txt', 'w') as f:
        for t in trains:
            basename = t.split("/")[-1].split("\\")[-1].split(".")[0]
            
            ishas = convert_annotation(t)
            if ishas:
                # yololabels
                out_txt_file = "../data/images/" +basename + ".jpg\n"
                f.write(out_txt_file)


    with open('val.txt', 'w') as f:
        for t in vals:
            basename = t.split("/")[-1].split("\\")[-1].split(".")[0]
            
            ishas = convert_annotation(t)
            if ishas:
                # yololabels
                out_txt_file = "../data/images/" + basename+ ".jpg\n"
                f.write(out_txt_file)




2. labelme 转换 目标检测数据集

import json
import cv2
import numpy as np
import glob
import os

def split_by_ratio(arr, *ratios):
    """
    按比例拆分数组
    :param arr:
    :param ratios: 该参数的个数即为子数组的个数 eg: 0.5,0.5即为拆分两个各占50%的子数组
    :return:
    """
    arr = np.random.permutation(arr)
    ind = np.add.accumulate(np.array(ratios) * len(arr)).astype(int)
    return [x.tolist() for x in np.split(arr, ind)][:len(ratios)]

def convert_json(t):

    ishas = False

    basename = t.split("/")[-1].split("\\")[-1].split(".")[0]
    with open(t, 'r', encoding='utf-8') as ft:
        data = json.load(ft)
            
        for shape in data['shapes']:
            if shape['label'] in class_names:
                ishas = True
        if not ishas:return ishas

        height = data["imageHeight"]
        width = data["imageWidth"]
        with open("labels/"+basename+ ".txt", 'w') as fa:
            for shape in data['shapes']:
                assert shape['label'] in class_names, f"Error: {shape['label']} not found in {class_names}"
                class_id  = class_names.index(shape['label'])

                x1, y1 = shape['points'][0]
                x2, y2 = shape['points'][1]
                x_center = (x1 + x2) / 2 / width
                y_center = (y1 + y2) / 2 / height
                width = abs(x2 - x1) / width
                height = abs(y2 - y1) / height

                fa.write(f"{class_id} {x_center} {y_center} {width} {height}\n")


    return ishas
# 类别
class_names = ['glass']

if __name__=="__main__":
    # 文件列表
    json_list = glob.glob("images/*.json")
    np.random.shuffle(json_list)
    trains,vals = split_by_ratio(json_list,0.9,0.1)

    # 训练文件夹
    if not os.path.exists("labels"):
        os.makedirs("labels")

    
    with open('train.txt', 'w') as f:
        for t in trains:
            basename = t.split("/")[-1].split("\\")[-1].split(".")[0]
            ishas = convert_json(t)
            if ishas:
                # yololabels
                out_txt_file = "../data/images/" +basename + ".jpg\n"
                f.write(out_txt_file)


    with open('val.txt', 'w') as f:
        for t in vals:
            basename = t.split("/")[-1].split("\\")[-1].split(".")[0]
            ishas = convert_json(t)
            if ishas:
                out_txt_file = "../data/images/" + basename+ ".jpg\n"
                f.write(out_txt_file)


3. labelme 转换 目标分割数据集

import json
import cv2
import numpy as np
import glob
import os

def split_by_ratio(arr, *ratios):
    """
    按比例拆分数组
    :param arr:
    :param ratios: 该参数的个数即为子数组的个数 eg: 0.5,0.5即为拆分两个各占50%的子数组
    :return:
    """
    arr = np.random.permutation(arr)
    ind = np.add.accumulate(np.array(ratios) * len(arr)).astype(int)
    return [x.tolist() for x in np.split(arr, ind)][:len(ratios)]

def convert_json(t):

    ishas = False

    basename = t.split("/")[-1].split("\\")[-1].split(".")[0]
    with open(t, 'r', encoding='utf-8') as ft:
        data = json.load(ft)
            
        for shape in data['shapes']:
            if shape['label'] in class_names:
                ishas = True
        if not ishas:return ishas

        height = data["imageHeight"]
        width = data["imageWidth"]
        with open("labels/"+basename+ ".txt", 'w') as fa:
            s="" # 用来储藏txt中的内容
            for shape in data["shapes"]: # 遍历数据集中每一个分割子类
                assert shape['label'] in class_names, f"Error: {shape['label']} not found in {class_names}"
                class_id  = class_names.index(shape['label'])

                s = s+str(class_id)+" "

                points = shape["points"]
                for point in points:
                    s=s+str(point[0]/width)+" "
                    s=s+str(point[1]/height)+" "
                s = s[:-1]+"\n"

            fa.write(s)


    return ishas
# 类别
class_names = ['glass']

if __name__=="__main__":
    # 文件列表
    json_list = glob.glob("images/*.json")
    np.random.shuffle(json_list)
    trains,vals = split_by_ratio(json_list,0.7,0.3)

    # 训练文件夹
    if not os.path.exists("labels"):
        os.makedirs("labels")

    
    with open('train.txt', 'w') as f:
        for t in trains:
            basename = t.split("/")[-1].split("\\")[-1].split(".")[0]
            ishas = convert_json(t)
            if ishas:
                # yololabels
                out_txt_file = "../data/images/" +basename + ".jpg\n"
                f.write(out_txt_file)


    with open('val.txt', 'w') as f:
        for t in vals:
            basename = t.split("/")[-1].split("\\")[-1].split(".")[0]
            ishas = convert_json(t)
            if ishas:
                out_txt_file = "../data/images/" + basename+ ".jpg\n"
                f.write(out_txt_file)



三. 数据集yaml 文件

yolov5的数据配置yaml文件

train: ../data/train.txt   # 此路径为相对路径, 如果运行路径在yolov5 文件夹下 就不需要path 路径
val: ../data/val.txt  

nc: 1 # number of classes
names: ['bird']  # class names

yolov8的数据配置yaml文件

因为yolov8 采用pip直接安装 所以需要知道path 路径,此时的train 文件就是相对于path路径。
如果是像yolov5一样clone仓库 也可以采用yolov5 的写法,是通用的。

path: E:\\Backup\\Desktop\\yolov8-acne\\data 
train: train.txt  
val: val.txt  

# Classes
names:
  0: acne_white
  1: acne_red

四. 训练

1. yolov5


python train.py --weights yolov5n.pt --data data/my.yaml --cfg models/yolov5n.yaml --imgsz 640 --batch-size -1 --epochs 300  --cos-lr --patience 10  --name yolov5n

2. yolov8

采用python 文件的方式训练
注意自定义网络结构,命名不需要带后面的 n,s 。在代码里面添加 会自带去识别。

from ultralytics import YOLO
if __name__=='__main__':

	# 目标检测
    # Create a new YOLO model from scratch
	model = YOLO('yolov8n.yaml')  # build a new model from YAML
	model = YOLO('yolov8n.pt')  # load a pretrained model (recommended for training)
	model = YOLO('yolov8n.yaml').load('yolov8n.pt')  # build from YAML and transfer weights
		
	# 目标分割
	# Load a model
	model = YOLO('yolov8n-seg.yaml')  # build a new model from YAML
	model = YOLO('yolov8n-seg.pt')  # load a pretrained model (recommended for training)
	model = YOLO('yolov8n-seg.yaml').load('yolov8n.pt')  # build from YAML and transfer weights
    # Train the model using the 'coco128.yaml' dataset for 3 epochs
    results = model.train(data='data/my.yaml', epochs=300,batch=16,workers=1,imgsz=640)

    # Evaluate the model's performance on the validation set
    results = model.val()

    # Export the model to ONNX format
    success = model.export(format='onnx', opset=12,imgsz=640,simplify=True,half=True)


训练的参数
在这里插入图片描述

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

图像处理大大大大大牛啊

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值