yolov8目标检测数据集制作
默认环境已经配置完成,且数据标注已经完成。
在标注的时候可能会涉及到很多细化的种类,而训练会合并几个列别作为一个大类来训练,首先需要定义列别标签,如下所示,将路面上会出现的目标类型一共分为4大类:0行人,1机动车,2非机动车,3路障
class_names= {'行人': 0,
'轿车': 1, 'SUV': 1, '面包车': 1,
'公交': 1,'客车': 1, '卡车': 1, '货车':1,'工程车': 1, '摩托车': 1,
'自行车': 2, '电动车': 2, '三轮车': 2,
'路障': 3 }
接着,将标注生成的json文件转成yolo格式的txt
# 标注框格式转yolo格式
def x1y1x2y2Toxywh(xmin, ymin, xmax, ymax, imgh, imgw):
ymax = imgh if (ymax > imgh or (imgh - ymax < 5)) else ymax
ymin = 0 if ymin < 5 else ymin
xmax = imgw if (xmax > imgw or (imgw - xmax < 5)) else xmax
xmin = 0 if xmin < 5 else xmin
x = (xmin + xmax) / 2 / imgw
y = (ymin + ymax) / 2 / imgh
w = (xmax - xmin) / imgw
h = (ymax - ymin) / imgh
x = str(x) if len(str(x)) <= 7 else str(x)[:7]
y = str(y) if len(str(y)) <= 7 else str(y)[:7]
w = str(w) if len(str(w)) <= 7 else str(w)[:7]
h = str(h) if len(str(h)) <= 7 else str(h)[:7]
return x, y, w, h
def get_o_xmin_xmax_ymin_ymax(o, imgh, imgw):
xmin = int(min(o['points'][0][0], o['points'][1][0]))
xmax = int(max(o['points'][0][0], o['points'][1][0]))
ymin = int(min(o['points'][0][1], o['points'][1][1]))
ymax = int(max(o['points'][0][1], o['points'][1][1]))
xmin = 0 if xmin < 0 else xmin
ymin = 0 if ymin < 0 else ymin
xmax = imgw-1 if xmax > imgw else xmax
ymax = imgh-1 if ymax > imgh else ymax
return xmin, ymin, xmax, ymax
def json2yolotxt(imgpath,jsonpath,txtpath):
names = class_names
files = os.listdir(imgpath)
files = [x for x in files if x[-4:] == '.jpg']
print('len(imgpath): ', len(files))
labelsStatic={}
for f in tqdm(range(len(files)), desc='进度 '):
i = files[f]
if(i[-4:]!='.jpg'):
continue
img = cv2.imread(os.path.join(imgpath, i))
imgw, imgh = img.shape[1], img.shape[0] # 获取每张图片的宽高
json_path_name = os.path.join(jsonpath, i[:-4] + '.json')
with open(json_path_name, 'r', encoding='utf-8') as rf:
rf = json.load(rf)
try:
objs = rf['shapes']
if objs == []:
print(json_path_name, "中不包含标注信息")
continue
except:
print(json_path_name, "获取shapes 标注信息失败")
continue
boxs = []
for o in objs:
try:
if o['shape_type'] != 'rectangle':
continue
id = o['label']
if 1:
if id not in names.keys():
print(i, id, 'not in labels... ... ... ...')
continue
if o['shape_type'] != 'rectangle':
continue
id = str(names[id])
if id not in labelsStatic:
labelsStatic[id] = 1
else:
labelsStatic[id] += 1
xmin, ymin, xmax, ymax = get_o_xmin_xmax_ymin_ymax(o, imgh, imgw)
x, y, w, h = x1y1x2y2Toxywh(xmin, ymin, xmax, ymax, imgh, imgw)
box = id + ' ' + x + ' ' + y + ' ' + w + ' ' + h + '\n'
boxs.append(box)
except Exception as e:
print(json_path_name, 'JSON解析错误: ', e)
continue
# 写入txt文件
boxs[-1] = boxs[-1].replace('\n', '')
os.makedirs(txtpath, exist_ok=True)
txt_path_name = os.path.join(txtpath, i[:-4] + '.txt')
with open(txt_path_name, 'w', encoding='utf-8') as wf:
wf.writelines(boxs)
print("json转txt完成!")
json转txt完成后,制作数据集dataset,需要将图片文件和标注的json文件分别放到2个文件夹中。
本例中只制作了训练集和验证集,比例是0.8和0.2,如需添加测试集,解开代码中的测试集注释以及测试集比例即可(需要保证所有解开注释的数据集比例相加等于1)
def make_dataset(SrcPathJpg,DesPath,DesPathJpg,SrcPathTxt,DesPathTxt):
# 划分比例
train_split_ratio = 0.8
val_split_ratio = 0.2
# test_split_ratio = 0.1
os.makedirs(DesPath, exist_ok=True)
os.makedirs(DesPathJpg, exist_ok=True)
os.makedirs(os.path.join(DesPathJpg, 'train'), exist_ok=True)
os.makedirs(os.path.join(DesPathJpg, 'val'), exist_ok=True)
# os.makedirs(os.path.join(DesPathJpg, 'test'), exist_ok=True)
os.makedirs(DesPathTxt, exist_ok=True)
os.makedirs(os.path.join(DesPathTxt, 'train'), exist_ok=True)
os.makedirs(os.path.join(DesPathTxt, 'val'), exist_ok=True)
# os.makedirs(os.path.join(DesPathTxt, 'test'), exist_ok=True)
# 获取原始数据文件夹下的所有文件
files = os.listdir(SrcPathJpg)
print(files)
# 随机打乱文件顺序
random.shuffle(files)
# 计算划分数据集的索引
total_files = len(files)
train_split = int(train_split_ratio * total_files)
val_split = int(val_split_ratio * total_files)
# test_split = int(test_split_ratio * total_files)
for file in tqdm(files[:train_split], desc=f'Copying train data'):
# 复制图像
src_jpg = os.path.join(SrcPathJpg, file)
dst_jpg = os.path.join(DesPathJpg, 'train')
os.makedirs(dst_jpg, exist_ok=True)
shutil.copy(src_jpg, os.path.join(dst_jpg, file))
# 同时复制txt
src_txt = os.path.join(SrcPathTxt, file[:-4] + '.txt')
dst_txt = os.path.join(DesPathTxt, 'train')
os.makedirs(dst_txt, exist_ok=True)
shutil.copy(src_txt, os.path.join(dst_txt, file[:-4] + '.txt'))
for file in tqdm(files[train_split:train_split + val_split], desc=f'Copying validation data'):
# 复制图像
src_jpg = os.path.join(SrcPathJpg, file)
dst_jpg = os.path.join(DesPathJpg, 'val')
os.makedirs(dst_jpg, exist_ok=True)
shutil.copy(src_jpg, os.path.join(dst_jpg, file))
# 同时复制txt
src_txt = os.path.join(SrcPathTxt, file[:-4] + '.txt')
dst_txt = os.path.join(DesPathTxt, 'val')
os.makedirs(dst_txt, exist_ok=True)
shutil.copy(src_txt, os.path.join(dst_txt, file[:-4] + '.txt'))
# for file in tqdm(files[val_split:val_split + test_split], desc=f'Copying test data'):
# # 复制图像
# src_jpg = os.path.join(SrcPathJpg, file)
# dst_jpg = os.path.join(DesPathJpg, 'test')
# os.makedirs(dst_jpg, exist_ok=True)
# shutil.copy(src_jpg, os.path.join(dst_jpg, file))
# # 同时复制txt
# src_txt = os.path.join(SrcPathTxt, file[:-4] + '.txt')
# dst_txt = os.path.join(DesPathTxt, 'test')
# os.makedirs(dst_txt, exist_ok=True)
# shutil.copy(src_txt, os.path.join(dst_txt, file[:-4] + '.txt'))
print("数据集划分完成!")
所有代码以及使用如下:
import os, cv2, json
import shutil
import random
from tqdm import tqdm
class_names= {'行人': 0,
'轿车': 1, 'SUV': 1, '面包车': 1,
'公交': 1,'客车': 1, '卡车': 1, '货车':1,'工程车': 1, '摩托车': 1,
'自行车': 2, '电动车': 2, '三轮车': 2,
'路障': 3 }
# 标注框格式转yolo格式
def x1y1x2y2Toxywh(xmin, ymin, xmax, ymax, imgh, imgw):
ymax = imgh if (ymax > imgh or (imgh - ymax < 5)) else ymax
ymin = 0 if ymin < 5 else ymin
xmax = imgw if (xmax > imgw or (imgw - xmax < 5)) else xmax
xmin = 0 if xmin < 5 else xmin
x = (xmin + xmax) / 2 / imgw
y = (ymin + ymax) / 2 / imgh
w = (xmax - xmin) / imgw
h = (ymax - ymin) / imgh
x = str(x) if len(str(x)) <= 7 else str(x)[:7]
y = str(y) if len(str(y)) <= 7 else str(y)[:7]
w = str(w) if len(str(w)) <= 7 else str(w)[:7]
h = str(h) if len(str(h)) <= 7 else str(h)[:7]
return x, y, w, h
def get_o_xmin_xmax_ymin_ymax(o, imgh, imgw):
xmin = int(min(o['points'][0][0], o['points'][1][0]))
xmax = int(max(o['points'][0][0], o['points'][1][0]))
ymin = int(min(o['points'][0][1], o['points'][1][1]))
ymax = int(max(o['points'][0][1], o['points'][1][1]))
xmin = 0 if xmin < 0 else xmin
ymin = 0 if ymin < 0 else ymin
xmax = imgw-1 if xmax > imgw else xmax
ymax = imgh-1 if ymax > imgh else ymax
return xmin, ymin, xmax, ymax
def json2yolotxt(imgpath,jsonpath,txtpath,classname):
names = class_names
files = os.listdir(imgpath)
files = [x for x in files if x[-4:] == '.jpg']
print('len(imgpath): ', len(files))
labelsStatic={}
for f in tqdm(range(len(files)), desc='进度 '):
i = files[f]
if(i[-4:]!='.jpg'):
continue
img = cv2.imread(os.path.join(imgpath, i))
imgw, imgh = img.shape[1], img.shape[0] # 获取每张图片的宽高
json_path_name = os.path.join(jsonpath, i[:-4] + '.json')
with open(json_path_name, 'r', encoding='utf-8') as rf:
rf = json.load(rf)
try:
objs = rf['shapes']
if objs == []:
print(json_path_name, "中不包含标注信息")
continue
except:
print(json_path_name, "获取shapes 标注信息失败")
continue
boxs = []
for o in objs:
try:
if o['shape_type'] != 'rectangle':
continue
id = o['label']
if 1:
if id not in names.keys():
print(i, id, 'not in labels... ... ... ...')
continue
if o['shape_type'] != 'rectangle':
continue
id = str(names[id])
if id not in labelsStatic:
labelsStatic[id] = 1
else:
labelsStatic[id] += 1
xmin, ymin, xmax, ymax = get_o_xmin_xmax_ymin_ymax(o, imgh, imgw)
x, y, w, h = x1y1x2y2Toxywh(xmin, ymin, xmax, ymax, imgh, imgw)
box = id + ' ' + x + ' ' + y + ' ' + w + ' ' + h + '\n'
boxs.append(box)
except Exception as e:
print(json_path_name, 'JSON解析错误: ', e)
continue
# 写入txt文件
boxs[-1] = boxs[-1].replace('\n', '')
os.makedirs(txtpath, exist_ok=True)
txt_path_name = os.path.join(txtpath, i[:-4] + '.txt')
with open(txt_path_name, 'w', encoding='utf-8') as wf:
wf.writelines(boxs)
print("json转txt完成!")
def make_dataset(SrcPathJpg,DesPath,DesPathJpg,SrcPathTxt,DesPathTxt):
# 划分比例
train_split_ratio = 0.8
val_split_ratio = 0.2
# test_split_ratio = 0.1
os.makedirs(DesPath, exist_ok=True)
os.makedirs(DesPathJpg, exist_ok=True)
os.makedirs(os.path.join(DesPathJpg, 'train'), exist_ok=True)
os.makedirs(os.path.join(DesPathJpg, 'val'), exist_ok=True)
# os.makedirs(os.path.join(DesPathJpg, 'test'), exist_ok=True)
os.makedirs(DesPathTxt, exist_ok=True)
os.makedirs(os.path.join(DesPathTxt, 'train'), exist_ok=True)
os.makedirs(os.path.join(DesPathTxt, 'val'), exist_ok=True)
# os.makedirs(os.path.join(DesPathTxt, 'test'), exist_ok=True)
# 获取原始数据文件夹下的所有文件
files = os.listdir(SrcPathJpg)
print(files)
# 随机打乱文件顺序
random.shuffle(files)
# 计算划分数据集的索引
total_files = len(files)
train_split = int(train_split_ratio * total_files)
val_split = int(val_split_ratio * total_files)
# test_split = int(test_split_ratio * total_files)
for file in tqdm(files[:train_split], desc=f'Copying train data'):
# 复制图像
src_jpg = os.path.join(SrcPathJpg, file)
dst_jpg = os.path.join(DesPathJpg, 'train')
os.makedirs(dst_jpg, exist_ok=True)
shutil.copy(src_jpg, os.path.join(dst_jpg, file))
# 同时复制txt
src_txt = os.path.join(SrcPathTxt, file[:-4] + '.txt')
dst_txt = os.path.join(DesPathTxt, 'train')
os.makedirs(dst_txt, exist_ok=True)
shutil.copy(src_txt, os.path.join(dst_txt, file[:-4] + '.txt'))
for file in tqdm(files[train_split:train_split + val_split], desc=f'Copying validation data'):
# 复制图像
src_jpg = os.path.join(SrcPathJpg, file)
dst_jpg = os.path.join(DesPathJpg, 'val')
os.makedirs(dst_jpg, exist_ok=True)
shutil.copy(src_jpg, os.path.join(dst_jpg, file))
# 同时复制txt
src_txt = os.path.join(SrcPathTxt, file[:-4] + '.txt')
dst_txt = os.path.join(DesPathTxt, 'val')
os.makedirs(dst_txt, exist_ok=True)
shutil.copy(src_txt, os.path.join(dst_txt, file[:-4] + '.txt'))
# for file in tqdm(files[val_split:val_split + test_split], desc=f'Copying test data'):
# # 复制图像
# src_jpg = os.path.join(SrcPathJpg, file)
# dst_jpg = os.path.join(DesPathJpg, 'test')
# os.makedirs(dst_jpg, exist_ok=True)
# shutil.copy(src_jpg, os.path.join(dst_jpg, file))
# # 同时复制txt
# src_txt = os.path.join(SrcPathTxt, file[:-4] + '.txt')
# dst_txt = os.path.join(DesPathTxt, 'test')
# os.makedirs(dst_txt, exist_ok=True)
# shutil.copy(src_txt, os.path.join(dst_txt, file[:-4] + '.txt'))
print("数据集划分完成!")
imgPath = 'imgALL'
jsonPath = 'json'
txtPath = 'txt'
json2yolotxt(imgPath,jsonPath,txtPath)
SrcPathJpg = imgPath
SrcPathTxt = txtPath
DesPath = 'LQ_new_dataset'
DesPathJpg = DesPath + '/images'
DesPathTxt = DesPath+'/labels'
make_dataset(SrcPathJpg,DesPath,DesPathJpg,SrcPathTxt,DesPathTxt)