Yolov8训练自己的数据集

sandwichhhhhh

已于 2024-09-19 13:19:44 修改

阅读量236

点赞数 2

文章标签： YOLO

于 2024-09-18 23:29:47 首次发布

本文链接：https://blog.csdn.net/sandwichhhhhh/article/details/142346362

版权

1、从网上爬取若干张图片

2、用label软件进行标注，生成json文件

3、将json文件转换成txt文件

代码如下：

import json
import os

name2id = {'phone': 4,}  #具体自己数据集类别
               
def convert(img_size, box):
    dw = 1./(img_size[0])
    dh = 1./(img_size[1])
    x = (box[0] + box[2])/2.0 - 1
    y = (box[1] + box[3])/2.0 - 1
    w = box[2] - box[0]
    h = box[3] - box[1]
    x = abs(x*dw)  #注意这里要加绝对值，否则会生成负值
    w = abs(w*dw)
    y = abs(y*dh)
    h = abs(h*dh)
    return (x,y,w,h)
 
def decode_json(json_floder_path,json_name):
    #转换好txt的标签路径
    txt_name = '/data/duanjiayi/yolov8/datasets/phone2' + json_name[0:-5] + '.txt'    
    txt_file = open(txt_name, 'w')
 
    json_path = os.path.join(json_floder_path, json_name)
    data = json.load(open(json_path, 'r', encoding='gb2312'))
 
    img_w = data['imageWidth']
    img_h = data['imageHeight']
 
    for i in data['shapes']:
        
        label_name = i['label']
        if (i['shape_type'] == 'rectangle'):
 
            x1 = int(i['points'][0][0])
            y1 = int(i['points'][0][1])
            x2 = int(i['points'][1][0])
            y2 = int(i['points'][1][1])
 
            bb = (x1,y1,x2,y2)
            bbox = convert((img_w,img_h),bb)
            txt_file.write(str(name2id[label_name]) + " " + " ".join([str(a) for a in bbox]) + '\n')
    
if __name__ == "__main__":
    #原始json标签路径
    json_floder_path = '/data/duanjiayi/yolov8/datasets/phone_labels'
    json_names = os.listdir(json_floder_path)
    for json_name in json_names:
        decode_json(json_floder_path,json_name)

4、将txt文件与jpg文件统一改名称

代码如下：

import os

dataDir = '/data/duanjiayi/yolov8/datasets/phone_on_desk'  # 替换为你的目录路径

# 获取文件列表
files = os.listdir(dataDir)

# 遍历文件并重命名
for i, pic in enumerate(files, start=1):
    old_name = os.path.join(dataDir, pic)
    
    # 生成新的文件名
    new_name = os.path.join(dataDir, f'phone_{i}.jpg') #在这里修改文件名称

    # 重命名文件
    os.rename(old_name, new_name)

print("文件重命名完成！")

5、得到一个image文件夹一个labels文件夹，进行train和val数据集划分，按照8:2的比例

代码如下：

import os
import shutil
import random

# 设置源文件夹
images_dir = './phone_images'  
labels_dir = './phone_labels'   

# 创建目标文件夹
train_images_dir = 'phone/train/images'
train_labels_dir = 'phone/train/labels'
val_images_dir = 'phone/val/images'
val_labels_dir = 'phone/val/labels'

os.makedirs(train_images_dir, exist_ok=True)
os.makedirs(train_labels_dir, exist_ok=True)
os.makedirs(val_images_dir, exist_ok=True)
os.makedirs(val_labels_dir, exist_ok=True)

# 获取文件列表
image_files = os.listdir(images_dir)
label_files = os.listdir(labels_dir)

# 打乱文件顺序
random.shuffle(image_files)

# 计算分割点
split_index = int(len(image_files) * 0.8)

# 分割文件
train_images = image_files[:split_index]
val_images = image_files[split_index:]

# 移动训练集文件
for file in train_images:
    shutil.move(os.path.join(images_dir, file), os.path.join(train_images_dir, file))
    # 也移动对应的标签文件
    label_file = file.replace('.jpg', '.txt')  # 假设标签文件与图像文件同名，扩展名为.txt

    label_path = os.path.join(labels_dir,label_file)
    if not os.path.exists(label_path):
        continue
    shutil.move(os.path.join(labels_dir, label_file), os.path.join(train_labels_dir, label_file))

# 移动验证集文件
for file in val_images:
    shutil.move(os.path.join(images_dir, file), os.path.join(val_images_dir, file))
    # 也移动对应的标签文件
    label_file = file.replace('.jpg', '.txt')  # 假设标签文件与图像文件同名，扩展名为.txt

    label_path = os.path.join(labels_dir,label_file)
    if not os.path.exists(label_path):
        continue
    shutil.move(os.path.join(labels_dir, label_file), os.path.join(val_labels_dir, label_file))

print("文件分割完成！")

6、整理yolov8数据集，格式如下：