常见的yolov5替换数据集,一般都是通过编写voc数据集转yolo格式的代码,且需要单独运行,本文介绍一种编写VOC.yaml方法,无需增加文件,即可完成数据集替换。
一、环境介绍
针对绝缘子识别数据集,利用2022年8月23日版本的yolov5算法,以及labelimg标注工具,开展目标检测任务。
目标 | 类别 | 标注工具 | 标注格式 | 检测方法 |
---|---|---|---|---|
检测图片中的绝缘子 | [‘insulator’] | labelimg | voc | yolov5(2022年8月23日) |
二、目录结构
整个实现过程所涉及的文件目录,其中,自备表示自己需要准备的,生成表示无需自己准备。
├─VOCdevkit # 所有VOC数据集(自备)
│ └─VOC0823 # 某个VOC数据集(自备)
│ ├─Annotations # xml标注文件(自备)
│ ├─JPEGImages # jpg文件(自备)
│ ├─Labels # txt文件(生成)
│ ├─train # 训练文件夹(生成)
│ │ ├─images # 训练图片(生成)
│ │ └─labels # 训练标签(生成)
│ └─val # 验证文件夹(生成)
│ ├─images # 验证图片(生成)
│ └─labels # 验证标签(生成)
└─yolov5 # yolov5源码(github下载)
三、操作步骤
3.1 修改yolov5/data/VOC.yaml文件
不同于其他yolov5替换自由数据集方式,本文介绍的是直接修改VOC.yaml文件,利用download入口,来粘贴数据集准备代码,可移植性强。
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC by University of Oxford
# Example usage: python train.py --data VOC.yaml
# parent
# ├── yolov5
# └── datasets
# └── VOCdevkit ← create dataset here
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../VOCdevkit/VOC0823
train: # train images (relative to 'path') images and labels
- train
val: # val images (relative to 'path') images and labels
- val
test: # test images (optional)
- test
# Classes
names:
0: insulator
# Download script/URL (optional) ---------------------------------------------------------------------------------------
# val is not existed, then exec download code
download: |
import shutil
import xml.etree.ElementTree as ET
import os
import random
from tqdm import tqdm
def convert_label(xml_file_path, txt_file_path):
def convert_box(size, box):
dw, dh = 1. / size[0], 1. / size[1]
x, y, w, h = (box[0] + box[1]) / 2.0 - 1, (box[2] + box[3]) / 2.0 - 1,\
box[1] - box[0], box[3] - box[2]
return x * dw, y * dh, w * dw, h * dh
in_file = open(xml_file_path)
out_file = open(txt_file_path, 'w')
tree = ET.parse(in_file)
root = tree.getroot()
size = root.find('size')
w = int(size.find('width').text)
h = int(size.find('height').text)
names = list(yaml['names'].values())
for obj in root.iter('object'):
cls = obj.find('name').text
if cls in names:
xmlbox = obj.find('bndbox')
bb = convert_box((w, h), [float(xmlbox.find(x).text) for x in ('xmin', 'xmax', 'ymin', 'ymax')])
cls_id = names.index(cls) # class id
out_file.write(" ".join([str(a) for a in (cls_id, *bb)]) + '\n')
out_file.close()
# 1.path
root_dir = yaml['path'] # need
imgs_dir = os.path.join(root_dir, 'JPEGImages') # need
xmls_dir = os.path.join(root_dir, 'Annotations') # need
txts_dir = os.path.join(root_dir, 'Labels')
train_dir = os.path.join(root_dir, 'train')
val_dir = os.path.join(root_dir, 'val')
train_img_dir = os.path.join(train_dir, 'images')
train_txt_dir = os.path.join(train_dir, 'labels')
val_img_dir = os.path.join(val_dir, 'images')
val_txt_dir = os.path.join(val_dir, 'labels')
# 2.dir
shutil.rmtree(txts_dir, ignore_errors=True)
os.mkdir(txts_dir)
shutil.rmtree(train_dir, ignore_errors=True)
os.mkdir(train_dir)
os.mkdir(train_img_dir)
os.mkdir(train_txt_dir)
shutil.rmtree(val_dir, ignore_errors=True)
os.mkdir(val_dir)
os.mkdir(val_img_dir)
os.mkdir(val_txt_dir)
# 3.convert xml to txt
imgs_list = os.listdir(imgs_dir)
for img_file in tqdm(imgs_list, desc='convert xml to txt'):
name, _ = os.path.splitext(img_file)
txt_file = name + '.txt'
xml_file = name + '.xml'
xml_path = os.path.join(xmls_dir, xml_file)
txt_path = os.path.join(txts_dir, txt_file)
convert_label(xml_path, txt_path)
# 4.split train and val
random.seed(0)
random.shuffle(imgs_list)
data_len = len(imgs_list)
train_list, val_list = imgs_list[:int(0.8*data_len)], imgs_list[int(0.8*data_len):]
# 5.copy file
for img_file in tqdm(train_list, desc='copy train file'):
src_path = os.path.join(imgs_dir, img_file)
dst_path = os.path.join(train_img_dir, img_file)
shutil.copyfile(src=src_path, dst=dst_path)
name, _ = os.path.splitext(img_file)
txt_file = name + '.txt'
src_path = os.path.join(txts_dir, txt_file)
dst_path = os.path.join(train_txt_dir, txt_file)
shutil.copyfile(src=src_path, dst=dst_path)
for img_file in tqdm(val_list, desc='copy val file'):
src_path = os.path.join(imgs_dir, img_file)
dst_path = os.path.join(val_img_dir, img_file)
shutil.copyfile(src=src_path, dst=dst_path)
name, _ = os.path.splitext(img_file)
txt_file = name + '.txt'
src_path = os.path.join(txts_dir, txt_file)
dst_path = os.path.join(val_txt_dir, txt_file)
shutil.copyfile(src=src_path, dst=dst_path)
3.2 修改yolov5/models/yolov5n.yaml部分代码
修改完VOC.yaml后,自然需要修改yolov5n.yaml(也可以使用其他网络),修改标签类别数量nc。
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Parameters
nc: 1 # number of classes
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.25 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
...
四、测试
4.1 训练
4.1.1 修改yolov5/train.py/parse_opt部分代码
在开始训练之前,需要指定网络、数据等参数,修改该文件中的default即可。
def parse_opt(known=False):
parser = argparse.ArgumentParser()
parser.add_argument('--weights', type=str, default=ROOT / 'yolov5n.pt', help='initial weights path')
parser.add_argument('--cfg', type=str, default=ROOT / 'models/yolov5n.yaml', help='model.yaml path')
parser.add_argument('--data', type=str, default=ROOT / 'data/VOC.yaml', help='dataset.yaml path')
parser.add_argument('--hyp', type=str, default=ROOT / 'data/hyps/hyp.scratch-low.yaml', help='hyperparameters path')
parser.add_argument('--epochs', type=int, default=300, help='total training epochs')
parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs, -1 for autobatch')
parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='train, val image size (pixels)')
...
4.1.2 执行训练
在命令行中运行python train.py即可。
train: weights=yolov5n.pt, cfg=models/yolov5n.yaml, data=data/VOC.yaml, hyp=data/hyps/hyp.scratch-low.yaml, epochs=300, batch_size=16, imgsz=640, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, noplots=False, evolve=None, bucket=, cache=None, image_weights=False, device=, multi_scale=False, single_cls=False, optimizer=SGD, sync_bn=False, workers=8, project=runs/train, name=exp, exist_ok=False, quad=False, cos_lr=False, label_smoothing=0.0, patience=100, freeze=[0], save_period=-1, seed=0, local_rank=-1, entity=None, upload_dataset=False, bbox_interval=-1, artifact_alias=latest
github: skipping check (offline), for updates see https://github.com/ultralytics/yolov5
YOLOv5 🚀 v6.2-47-geab35f6 Python-3.9.12 torch-1.12.1 CUDA:0 (NVIDIA GeForce RTX 2080 Ti, 11019MiB)
hyperparameters: lr0=0.01, lrf=0.01, momentum=0.937, weight_decay=0.0005, warmup_epochs=3.0, warmup_momentum=0.8, warmup_bias_lr=0.1, box=0.05, cls=0.5, cls_pw=1.0, obj=1.0, obj_pw=1.0, iou_t=0.2, anchor_t=4.0, fl_gamma=0.0, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, degrees=0.0, translate=0.1, scale=0.5, shear=0.0, perspective=0.0, flipud=0.0, fliplr=0.5, mosaic=1.0, mixup=0.0, copy_paste=0.0
Weights & Biases: run 'pip install wandb' to automatically track and visualize YOLOv5 🚀 runs in Weights & Biases
ClearML: run 'pip install clearml' to automatically track, visualize and remotely train YOLOv5 🚀 in ClearML
TensorBoard: Start with 'tensorboard --logdir runs/train', view at http://localhost:6006/
Dataset not found ⚠️, missing paths ['/dw/dw05/VOCdevkit/VOC0823/val']
convert xml to txt: 100%|██████████| 248/248 [00:00<00:00, 12201.47it/s]
copy train file: 100%|██████████| 198/198 [00:00<00:00, 5472.42it/s]
copy val file: 100%|██████████| 50/50 [00:00<00:00, 5640.23it/s]
Dataset download success ✅ (0.1s), saved to /dw/dw05/VOCdevkit
from n params module arguments
0 -1 1 1760 models.common.Conv [3, 16, 6, 2, 2]
1 -1 1 4672 models.common.Conv [16, 32, 3, 2]
2 -1 1 4800 models.common.C3 [32, 32, 1]
3 -1 1 18560 models.common.Conv [32, 64, 3, 2]
4 -1 2 29184 models.common.C3 [64, 64, 2]
5 -1 1 73984 models.common.Conv [64, 128, 3, 2]
6 -1 3 156928 models.common.C3 [128, 128, 3]
7 -1 1 295424 models.common.Conv [128, 256, 3, 2]
8 -1 1 296448 models.common.C3 [256, 256, 1]
9 -1 1 164608 models.common.SPPF [256, 256, 5]
10 -1 1 33024 models.common.Conv [256, 128, 1, 1]
11 -1 1 0 torch.nn.modules.upsampling.Upsample [None, 2, 'nearest']
12 [-1, 6] 1 0 models.common.Concat [1]
13 -1 1 90880 models.common.C3 [256, 128, 1, False]
14 -1 1 8320 models.common.Conv [128, 64, 1, 1]
15 -1 1 0 torch.nn.modules.upsampling.Upsample [None, 2, 'nearest']
16 [-1, 4] 1 0 models.common.Concat [1]
17 -1 1 22912 models.common.C3 [128, 64, 1, False]
18 -1 1 36992 models.common.Conv [64, 64, 3, 2]
19 [-1, 14] 1 0 models.common.Concat [1]
20 -1 1 74496 models.common.C3 [128, 128, 1, False]
21 -1 1 147712 models.common.Conv [128, 128, 3, 2]
22 [-1, 10] 1 0 models.common.Concat [1]
23 -1 1 296448 models.common.C3 [256, 256, 1, False]
24 [17, 20, 23] 1 8118 models.yolo.Detect [1, [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]], [64, 128, 256]]
YOLOv5n summary: 270 layers, 1765270 parameters, 1765270 gradients, 4.2 GFLOPs
Transferred 342/349 items from yolov5n.pt
AMP: checks passed ✅
optimizer: SGD(lr=0.01) with parameter groups 57 weight(decay=0.0), 60 weight(decay=0.0005), 60 bias
WARNING: DP not recommended, use torch.distributed.run for best DDP Multi-GPU results.
See Multi-GPU Tutorial at https://github.com/ultralytics/yolov5/issues/475 to get started.
train: Scanning '/dw/dw05/VOCdevkit/VOC0823/train/labels' images and labels...198 found, 0 missing, 0 empty, 0 corrupt: 100%|██████████| 198/198 [00:00<00:00, 15604.81it/s]
train: New cache created: /dw/dw05/VOCdevkit/VOC0823/train/labels.cache
val: Scanning '/dw/dw05/VOCdevkit/VOC0823/val/labels' images and labels...50 found, 0 missing, 0 empty, 0 corrupt: 100%|██████████| 50/50 [00:00<00:00, 9719.39it/s]
val: New cache created: /dw/dw05/VOCdevkit/VOC0823/val/labels.cache
AutoAnchor: 2.55 anchors/target, 1.000 Best Possible Recall (BPR). Current anchors are a good fit to dataset ✅
Plotting labels to runs/train/exp21/labels.jpg...
Image sizes 640 train, 640 val
Using 8 dataloader workers
Logging results to runs/train/exp21
Starting training for 300 epochs...
Epoch GPU_mem box_loss obj_loss cls_loss Instances Size
0%| | 0/13 [00:03<?, ?it/s]
4.2 测试
4.2.1 修改yolov5/val.py/parse_opt部分代码
在开始测试之前,需要指定网络、数据等参数,修改该文件中的default即可。
def parse_opt():
parser = argparse.ArgumentParser()
parser.add_argument('--data', type=str, default=ROOT / 'data/VOC.yaml', help='dataset.yaml path')
parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'runs/train/exp21/weights/best.pt', help='model.pt path(s)')
parser.add_argument('--batch-size', type=int, default=32, help='batch size')
parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='inference size (pixels)')
parser.add_argument('--conf-thres', type=float, default=0.001, help='confidence threshold')
parser.add_argument('--iou-thres', type=float, default=0.6, help='NMS IoU threshold')
parser.add_argument('--task', default='val', help='train, val, test, speed or study')
...
4.2.2 执行测试
在命令行中运行python val.py即可。
val: data=data/VOC.yaml, weights=runs/train/exp21/weights/best.pt, batch_size=32, imgsz=640, conf_thres=0.001, iou_thres=0.6, task=val, device=, workers=8, single_cls=False, augment=False, verbose=False, save_txt=False, save_hybrid=False, save_conf=False, save_json=False, project=runs/val, name=exp, exist_ok=False, half=False, dnn=False
YOLOv5 🚀 v6.2-47-geab35f6 Python-3.9.12 torch-1.12.1 CUDA:0 (NVIDIA GeForce RTX 2080 Ti, 11019MiB)
Fusing layers...
YOLOv5n summary: 213 layers, 1760518 parameters, 0 gradients, 4.1 GFLOPs
val: Scanning '/dw/dw05/VOCdevkit/VOC0823/val/labels.cache' images and labels... 50 found, 0 missing, 0 empty, 0 corrupt: 100%|██████████| 50/50 [00:00<?, ?it/s]
Class Images Instances P R mAP@.5 mAP@.5:.95: 100%|██████████| 2/2 [00:01<00:00, 1.95it/s]
all 50 50 0.999 1 0.995 0.917
Speed: 0.3ms pre-process, 1.2ms inference, 1.1ms NMS per image at shape (32, 3, 640, 640)
Results saved to runs/val/exp5
五、其他
已知训练、验证集情况下,修改VOC.yaml。
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC by University of Oxford
# Example usage: python train.py --data VOC.yaml
# parent
# ├── yolov5
# └── datasets
# └── VOCdevkit ← create dataset here
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../autodl-tmp/SF6barometer/VOC
train: # train images (relative to 'path') images and labels
- train
val: # val images (relative to 'path') images and labels
- val
test: # test images (optional)
- test
# Classes
names:
0: abnormal
1: normal
# Download script/URL (optional) ---------------------------------------------------------------------------------------
# val is not existed, then exec download code
download: |
import shutil
import xml.etree.ElementTree as ET
import os
import random
from tqdm import tqdm
def convert_label(xml_file_path, txt_file_path):
def convert_box(size, box):
dw, dh = 1. / size[0], 1. / size[1]
x, y, w, h = (box[0] + box[1]) / 2.0 - 1, (box[2] + box[3]) / 2.0 - 1,\
box[1] - box[0], box[3] - box[2]
return x * dw, y * dh, w * dw, h * dh
in_file = open(xml_file_path)
out_file = open(txt_file_path, 'w')
tree = ET.parse(in_file)
root = tree.getroot()
size = root.find('size')
w = int(size.find('width').text)
h = int(size.find('height').text)
names = list(yaml['names'].values())
for obj in root.iter('object'):
cls = obj.find('name').text
if cls in names:
xmlbox = obj.find('bndbox')
bb = convert_box((w, h), [float(xmlbox.find(x).text) for x in ('xmin', 'xmax', 'ymin', 'ymax')])
cls_id = names.index(cls) # class id
out_file.write(" ".join([str(a) for a in (cls_id, *bb)]) + '\n')
out_file.close()
# 1.path
root_dir = os.path.join(yaml['path'], '..') # need
src_train_dir = os.path.join(root_dir, 'train') # need
src_val_dir = os.path.join(root_dir, 'val') # need
voc_dir = yaml['path']
txts_dir = os.path.join(voc_dir, 'Labels')
train_dir = os.path.join(voc_dir, 'train')
val_dir = os.path.join(voc_dir, 'val')
train_img_dir = os.path.join(train_dir, 'images')
train_txt_dir = os.path.join(train_dir, 'labels')
val_img_dir = os.path.join(val_dir, 'images')
val_txt_dir = os.path.join(val_dir, 'labels')
# 2.dir
shutil.rmtree(voc_dir, ignore_errors=True)
os.mkdir(voc_dir)
os.mkdir(txts_dir)
os.mkdir(train_dir)
os.mkdir(train_img_dir)
os.mkdir(train_txt_dir)
os.mkdir(val_dir)
os.mkdir(val_img_dir)
os.mkdir(val_txt_dir)
# 3.copy train file
file_list = os.listdir(src_train_dir)
for file in tqdm(file_list, desc='copy train file'):
if file.endswith('.jpg'):
img_file = file
xml_file = file.replace('.jpg', '.xml')
txt_file = file.replace('.jpg', '.txt')
img_file_path = os.path.join(src_train_dir, img_file)
xml_file_path = os.path.join(src_train_dir, xml_file)
dst_img_file_path = os.path.join(train_img_dir, img_file)
dst_txt_file_path = os.path.join(train_txt_dir, txt_file)
shutil.copyfile(img_file_path, dst_img_file_path)
convert_label(xml_file_path, dst_txt_file_path)
# 4.copy val file
file_list = os.listdir(src_val_dir)
for file in tqdm(file_list, desc='copy val file'):
if file.endswith('.jpg'):
img_file = file
xml_file = file.replace('.jpg', '.xml')
txt_file = file.replace('.jpg', '.txt')
img_file_path = os.path.join(src_val_dir, img_file)
xml_file_path = os.path.join(src_val_dir, xml_file)
dst_img_file_path = os.path.join(val_img_dir, img_file)
dst_txt_file_path = os.path.join(val_txt_dir, txt_file)
shutil.copyfile(img_file_path, dst_img_file_path)
convert_label(xml_file_path, dst_txt_file_path)