yolov5 8 labelme labelimg数据标注 数据集生成 训练终极教程
一.数据集准备 文件夹结构,数据集标注
- 创建一个data 文件夹
- 在data文件夹下创建一个images 文件夹
- 将所有图片数据放入images文件夹下
使用labelme或者labelimg标注数据
- python环境下使用
pip install labelme
安装labelme,使用pip install labelimg
安装labelimg- 在cmd 中使用命令
labelme
或者labelimg
命令打开软件- 进行标注,
将标注文件保存在图片的相同目录下面
- 将标注文件和原图都放在images文件夹
标注完成后 images 文件夹下 存在原图和标注的json 文件或者xml文件
二.转换为yolo 数据集
然后运行以下代码 修改为自己的类别, 以及比例。
会直接生成labels 标签和train,val txt文件
1. labelimg 转换
import json
import cv2
import numpy as np
import glob
import os
import xml.etree.ElementTree as ET
def split_by_ratio(arr, *ratios):
"""
按比例拆分数组
:param arr:
:param ratios: 该参数的个数即为子数组的个数 eg: 0.5,0.5即为拆分两个各占50%的子数组
:return:
"""
arr = np.random.permutation(arr)
ind = np.add.accumulate(np.array(ratios) * len(arr)).astype(int)
return [x.tolist() for x in np.split(arr, ind)][:len(ratios)]
def convert_annotation(t):
ishas = False
basename = t.split("/")[-1].split("\\")[-1].split(".")[0]
with open(t, 'r', encoding='utf-8') as ft:
tree = ET.parse(ft)
root = tree.getroot()
size = root.find('size')
width = int(size.find('width').text)
height = int(size.find('height').text)
for obj in root.iter('object'):
cls = obj.find('name').text
if cls in class_names:
ishas = True
if not ishas:return ishas
with open("labels/"+basename + ".txt", 'w') as fa:
for obj in root.iter('object'):
cls = obj.find('name').text
if cls not in class_names:continue
class_id = class_names.index(cls)
xmlbox = obj.find('bndbox')
x1,x2,y1,y2 = float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text),float(xmlbox.find('ymax').text)
print(x1,x2,y1,y2,width,height)
x_center = (x1 + x2) / 2 / width
y_center = (y1 + y2) / 2 / height
w = abs(x2 - x1) / width
h = abs(y2 - y1) / height
print(x_center,y_center,w,h)
fa.write(f"{class_id} {x_center} {y_center} {w} {h}\n")
return ishas
# 改为自己的类别
class_names = ['persona']
if __name__=="__main__":
# 文件列表
xml_list = glob.glob("images/*.xml")
np.random.shuffle(xml_list)
trains,vals = split_by_ratio(xml_list,0.7,0.3)
# 训练文件夹
if not os.path.exists("labels"):
os.makedirs("labels")
with open('train.txt', 'w') as f:
for t in trains:
basename = t.split("/")[-1].split("\\")[-1].split(".")[0]
ishas = convert_annotation(t)
if ishas:
# yololabels
out_txt_file = "../data/images/" +basename + ".jpg\n"
f.write(out_txt_file)
with open('val.txt', 'w') as f:
for t in vals:
basename = t.split("/")[-1].split("\\")[-1].split(".")[0]
ishas = convert_annotation(t)
if ishas:
# yololabels
out_txt_file = "../data/images/" + basename+ ".jpg\n"
f.write(out_txt_file)
2. labelme 转换 目标检测数据集
import json
import cv2
import numpy as np
import glob
import os
def split_by_ratio(arr, *ratios):
"""
按比例拆分数组
:param arr:
:param ratios: 该参数的个数即为子数组的个数 eg: 0.5,0.5即为拆分两个各占50%的子数组
:return:
"""
arr = np.random.permutation(arr)
ind = np.add.accumulate(np.array(ratios) * len(arr)).astype(int)
return [x.tolist() for x in np.split(arr, ind)][:len(ratios)]
def convert_json(t):
ishas = False
basename = t.split("/")[-1].split("\\")[-1].split(".")[0]
with open(t, 'r', encoding='utf-8') as ft:
data = json.load(ft)
for shape in data['shapes']:
if shape['label'] in class_names:
ishas = True
if not ishas:return ishas
height = data["imageHeight"]
width = data["imageWidth"]
with open("labels/"+basename+ ".txt", 'w') as fa:
for shape in data['shapes']:
assert shape['label'] in class_names, f"Error: {shape['label']} not found in {class_names}"
class_id = class_names.index(shape['label'])
x1, y1 = shape['points'][0]
x2, y2 = shape['points'][1]
x_center = (x1 + x2) / 2 / width
y_center = (y1 + y2) / 2 / height
width = abs(x2 - x1) / width
height = abs(y2 - y1) / height
fa.write(f"{class_id} {x_center} {y_center} {width} {height}\n")
return ishas
# 类别
class_names = ['glass']
if __name__=="__main__":
# 文件列表
json_list = glob.glob("images/*.json")
np.random.shuffle(json_list)
trains,vals = split_by_ratio(json_list,0.9,0.1)
# 训练文件夹
if not os.path.exists("labels"):
os.makedirs("labels")
with open('train.txt', 'w') as f:
for t in trains:
basename = t.split("/")[-1].split("\\")[-1].split(".")[0]
ishas = convert_json(t)
if ishas:
# yololabels
out_txt_file = "../data/images/" +basename + ".jpg\n"
f.write(out_txt_file)
with open('val.txt', 'w') as f:
for t in vals:
basename = t.split("/")[-1].split("\\")[-1].split(".")[0]
ishas = convert_json(t)
if ishas:
out_txt_file = "../data/images/" + basename+ ".jpg\n"
f.write(out_txt_file)
3. labelme 转换 目标分割数据集
import json
import cv2
import numpy as np
import glob
import os
def split_by_ratio(arr, *ratios):
"""
按比例拆分数组
:param arr:
:param ratios: 该参数的个数即为子数组的个数 eg: 0.5,0.5即为拆分两个各占50%的子数组
:return:
"""
arr = np.random.permutation(arr)
ind = np.add.accumulate(np.array(ratios) * len(arr)).astype(int)
return [x.tolist() for x in np.split(arr, ind)][:len(ratios)]
def convert_json(t):
ishas = False
basename = t.split("/")[-1].split("\\")[-1].split(".")[0]
with open(t, 'r', encoding='utf-8') as ft:
data = json.load(ft)
for shape in data['shapes']:
if shape['label'] in class_names:
ishas = True
if not ishas:return ishas
height = data["imageHeight"]
width = data["imageWidth"]
with open("labels/"+basename+ ".txt", 'w') as fa:
s="" # 用来储藏txt中的内容
for shape in data["shapes"]: # 遍历数据集中每一个分割子类
assert shape['label'] in class_names, f"Error: {shape['label']} not found in {class_names}"
class_id = class_names.index(shape['label'])
s = s+str(class_id)+" "
points = shape["points"]
for point in points:
s=s+str(point[0]/width)+" "
s=s+str(point[1]/height)+" "
s = s[:-1]+"\n"
fa.write(s)
return ishas
# 类别
class_names = ['glass']
if __name__=="__main__":
# 文件列表
json_list = glob.glob("images/*.json")
np.random.shuffle(json_list)
trains,vals = split_by_ratio(json_list,0.7,0.3)
# 训练文件夹
if not os.path.exists("labels"):
os.makedirs("labels")
with open('train.txt', 'w') as f:
for t in trains:
basename = t.split("/")[-1].split("\\")[-1].split(".")[0]
ishas = convert_json(t)
if ishas:
# yololabels
out_txt_file = "../data/images/" +basename + ".jpg\n"
f.write(out_txt_file)
with open('val.txt', 'w') as f:
for t in vals:
basename = t.split("/")[-1].split("\\")[-1].split(".")[0]
ishas = convert_json(t)
if ishas:
out_txt_file = "../data/images/" + basename+ ".jpg\n"
f.write(out_txt_file)
三. 数据集yaml 文件
yolov5的数据配置yaml文件
train: ../data/train.txt # 此路径为相对路径, 如果运行路径在yolov5 文件夹下 就不需要path 路径
val: ../data/val.txt
nc: 1 # number of classes
names: ['bird'] # class names
yolov8的数据配置yaml文件
因为yolov8 采用pip直接安装 所以需要知道path 路径,此时的train 文件就是相对于path路径。
如果是像yolov5一样clone仓库 也可以采用yolov5 的写法,是通用的。
path: E:\\Backup\\Desktop\\yolov8-acne\\data
train: train.txt
val: val.txt
# Classes
names:
0: acne_white
1: acne_red
四. 训练
1. yolov5
python train.py --weights yolov5n.pt --data data/my.yaml --cfg models/yolov5n.yaml --imgsz 640 --batch-size -1 --epochs 300 --cos-lr --patience 10 --name yolov5n
2. yolov8
采用python 文件的方式训练
注意自定义网络结构,命名不需要带后面的 n,s 。在代码里面添加 会自带去识别。
from ultralytics import YOLO
if __name__=='__main__':
# 目标检测
# Create a new YOLO model from scratch
model = YOLO('yolov8n.yaml') # build a new model from YAML
model = YOLO('yolov8n.pt') # load a pretrained model (recommended for training)
model = YOLO('yolov8n.yaml').load('yolov8n.pt') # build from YAML and transfer weights
# 目标分割
# Load a model
model = YOLO('yolov8n-seg.yaml') # build a new model from YAML
model = YOLO('yolov8n-seg.pt') # load a pretrained model (recommended for training)
model = YOLO('yolov8n-seg.yaml').load('yolov8n.pt') # build from YAML and transfer weights
# Train the model using the 'coco128.yaml' dataset for 3 epochs
results = model.train(data='data/my.yaml', epochs=300,batch=16,workers=1,imgsz=640)
# Evaluate the model's performance on the validation set
results = model.val()
# Export the model to ONNX format
success = model.export(format='onnx', opset=12,imgsz=640,simplify=True,half=True)
训练的参数