【深度学习】YOLOv5使用自己VOC数据集

Kindavid

已于 2022-11-28 17:22:21 修改

阅读量3.7k

点赞数

分类专栏：深度学习文章标签：深度学习计算机视觉目标检测数据集 yolov5

于 2022-08-23 20:45:21 首次发布

本文链接：https://blog.csdn.net/u010398722/article/details/126492623

版权

深度学习专栏收录该内容

6 篇文章 1 订阅

订阅专栏

常见的yolov5替换数据集，一般都是通过编写voc数据集转yolo格式的代码，且需要单独运行，本文介绍一种编写VOC.yaml方法，无需增加文件，即可完成数据集替换。

一、环境介绍

针对绝缘子识别数据集，利用2022年8月23日版本的yolov5算法，以及labelimg标注工具，开展目标检测任务。

目标	类别	标注工具	标注格式	检测方法
检测图片中的绝缘子	[‘insulator’]	labelimg	voc	yolov5（2022年8月23日）

二、目录结构

整个实现过程所涉及的文件目录，其中，自备表示自己需要准备的，生成表示无需自己准备。

├─VOCdevkit  # 所有VOC数据集（自备）
│  └─VOC0823  # 某个VOC数据集（自备）
│      ├─Annotations  # xml标注文件（自备）
│      ├─JPEGImages  # jpg文件（自备）
│      ├─Labels  # txt文件（生成）
│      ├─train  # 训练文件夹（生成）
│      │  ├─images  # 训练图片（生成）
│      │  └─labels  # 训练标签（生成）
│      └─val  # 验证文件夹（生成）
│          ├─images  # 验证图片（生成）
│          └─labels  # 验证标签（生成）
└─yolov5  # yolov5源码（github下载）

三、操作步骤

3.1 修改yolov5/data/VOC.yaml文件

不同于其他yolov5替换自由数据集方式，本文介绍的是直接修改VOC.yaml文件，利用download入口，来粘贴数据集准备代码，可移植性强。

# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC by University of Oxford
# Example usage: python train.py --data VOC.yaml
# parent
# ├── yolov5
# └── datasets
#     └── VOCdevkit  ← create dataset here


# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../VOCdevkit/VOC0823
train: # train images (relative to 'path') images and labels
  - train
val: # val images (relative to 'path') images and labels
  - val
test: # test images (optional)
  - test

# Classes
names:
  0: insulator


# Download script/URL (optional) ---------------------------------------------------------------------------------------
# val is not existed, then exec download code
download: |
	import shutil
	import xml.etree.ElementTree as ET
	import os
	import random
	from tqdm import tqdm
	
	
	def convert_label(xml_file_path, txt_file_path):
		def convert_box(size, box):
			dw, dh = 1. / size[0], 1. / size[1]
			x, y, w, h = (box[0] + box[1]) / 2.0 - 1, (box[2] + box[3]) / 2.0 - 1,\
				box[1] - box[0], box[3] - box[2]
			return x * dw, y * dh, w * dw, h * dh
	
		in_file = open(xml_file_path)
		out_file = open(txt_file_path, 'w')
		tree = ET.parse(in_file)
		root = tree.getroot()
		size = root.find('size')
		w = int(size.find('width').text)
		h = int(size.find('height').text)
		
		names = list(yaml['names'].values())
		for obj in root.iter('object'):
			cls = obj.find('name').text
			if cls in names:
				xmlbox = obj.find('bndbox')
				bb = convert_box((w, h), [float(xmlbox.find(x).text) for x in ('xmin', 'xmax', 'ymin', 'ymax')])
				cls_id = names.index(cls)  # class id
				out_file.write(" ".join([str(a) for a in (cls_id, *bb)]) + '\n')
		out_file.close()
	
	
	# 1.path
	root_dir = yaml['path']  # need
	imgs_dir = os.path.join(root_dir, 'JPEGImages')  # need
	xmls_dir = os.path.join(root_dir, 'Annotations')  # need
	txts_dir = os.path.join(root_dir, 'Labels')
	train_dir = os.path.join(root_dir, 'train')
	val_dir = os.path.join(root_dir, 'val')
	train_img_dir = os.path.join(train_dir, 'images')
	train_txt_dir = os.path.join(train_dir, 'labels')
	val_img_dir = os.path.join(val_dir, 'images')
	val_txt_dir = os.path.join(val_dir, 'labels')
	# 2.dir
	shutil.rmtree(txts_dir, ignore_errors=True)
	os.mkdir(txts_dir)
	shutil.rmtree(train_dir, ignore_errors=True)
	os.mkdir(train_dir)
	os.mkdir(train_img_dir)
	os.mkdir(train_txt_dir)
	shutil.rmtree(val_dir, ignore_errors=True)
	os.mkdir(val_dir)
	os.mkdir(val_img_dir)
	os.mkdir(val_txt_dir)
	# 3.convert xml to txt
	imgs_list = os.listdir(imgs_dir)
	for img_file in tqdm(imgs_list, desc='convert xml to txt'):
		name, _ = os.path.splitext(img_file)
		txt_file = name + '.txt'
		xml_file = name + '.xml'
		xml_path = os.path.join(xmls_dir, xml_file)
		txt_path = os.path.join(txts_dir, txt_file)
		convert_label(xml_path, txt_path)
	# 4.split train and val
	random.seed(0)
	random.shuffle(imgs_list)
	data_len = len(imgs_list)
	train_list, val_list = imgs_list[:int(0.8*data_len)], imgs_list[int(0.8*data_len):]
	# 5.copy file
	for img_file in tqdm(train_list, desc='copy train file'):
		src_path = os.path.join(imgs_dir, img_file)
		dst_path = os.path.join(train_img_dir, img_file)
		shutil.copyfile(src=src_path, dst=dst_path)
		name, _ = os.path.splitext(img_file)
		txt_file = name + '.txt'
		src_path = os.path.join(txts_dir, txt_file)
		dst_path = os.path.join(train_txt_dir, txt_file)
		shutil.copyfile(src=src_path, dst=dst_path) 
	
	for img_file in tqdm(val_list, desc='copy val file'):
		src_path = os.path.join(imgs_dir, img_file)
		dst_path = os.path.join(val_img_dir, img_file)
		shutil.copyfile(src=src_path, dst=dst_path)
		name, _ = os.path.splitext(img_file)
		txt_file = name + '.txt'
		src_path = os.path.join(txts_dir, txt_file)
		dst_path = os.path.join(val_txt_dir, txt_file)
		shutil.copyfile(src=src_path, dst=dst_path)

3.2 修改yolov5/models/yolov5n.yaml部分代码

修改完VOC.yaml后，自然需要修改yolov5n.yaml（也可以使用其他网络），修改标签类别数量nc。

# YOLOv5 🚀 by Ultralytics, GPL-3.0 license

# Parameters
nc: 1  # number of classes
depth_multiple: 0.33  # model depth multiple
width_multiple: 0.25  # layer channel multiple
anchors:
  - [10,13, 16,30, 33,23]  # P3/8
  - [30,61, 62,45, 59,119]  # P4/16
  - [116,90, 156,198, 373,326]  # P5/32

...

四、测试

4.1 训练

4.1.1 修改yolov5/train.py/parse_opt部分代码

在开始训练之前，需要指定网络、数据等参数，修改该文件中的default即可。

def parse_opt(known=False):
    parser = argparse.ArgumentParser()
    parser.add_argument('--weights', type=str, default=ROOT / 'yolov5n.pt', help='initial weights path')
    parser.add_argument('--cfg', type=str, default=ROOT / 'models/yolov5n.yaml', help='model.yaml path')
    parser.add_argument('--data', type=str, default=ROOT / 'data/VOC.yaml', help='dataset.yaml path')
    parser.add_argument('--hyp', type=str, default=ROOT / 'data/hyps/hyp.scratch-low.yaml', help='hyperparameters path')
    parser.add_argument('--epochs', type=int, default=300, help='total training epochs')
    parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs, -1 for autobatch')
    parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='train, val image size (pixels)')
...

4.1.2 执行训练

在命令行中运行python train.py即可。

train: weights=yolov5n.pt, cfg=models/yolov5n.yaml, data=data/VOC.yaml, hyp=data/hyps/hyp.scratch-low.yaml, epochs=300, batch_size=16, imgsz=640, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, noplots=False, evolve=None, bucket=, cache=None, image_weights=False, device=, multi_scale=False, single_cls=False, optimizer=SGD, sync_bn=False, workers=8, project=runs/train, name=exp, exist_ok=False, quad=False, cos_lr=False, label_smoothing=0.0, patience=100, freeze=[0], save_period=-1, seed=0, local_rank=-1, entity=None, upload_dataset=False, bbox_interval=-1, artifact_alias=latest
github: skipping check (offline), for updates see https://github.com/ultralytics/yolov5
YOLOv5 🚀 v6.2-47-geab35f6 Python-3.9.12 torch-1.12.1 CUDA:0 (NVIDIA GeForce RTX 2080 Ti, 11019MiB)

hyperparameters: lr0=0.01, lrf=0.01, momentum=0.937, weight_decay=0.0005, warmup_epochs=3.0, warmup_momentum=0.8, warmup_bias_lr=0.1, box=0.05, cls=0.5, cls_pw=1.0, obj=1.0, obj_pw=1.0, iou_t=0.2, anchor_t=4.0, fl_gamma=0.0, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, degrees=0.0, translate=0.1, scale=0.5, shear=0.0, perspective=0.0, flipud=0.0, fliplr=0.5, mosaic=1.0, mixup=0.0, copy_paste=0.0
Weights & Biases: run 'pip install wandb' to automatically track and visualize YOLOv5 🚀 runs in Weights & Biases
ClearML: run 'pip install clearml' to automatically track, visualize and remotely train YOLOv5 🚀 in ClearML
TensorBoard: Start with 'tensorboard --logdir runs/train', view at http://localhost:6006/

Dataset not found ⚠️, missing paths ['/dw/dw05/VOCdevkit/VOC0823/val']
convert xml to txt: 100%|██████████| 248/248 [00:00<00:00, 12201.47it/s]
copy train file: 100%|██████████| 198/198 [00:00<00:00, 5472.42it/s]
copy val file: 100%|██████████| 50/50 [00:00<00:00, 5640.23it/s]
Dataset download success ✅ (0.1s), saved to /dw/dw05/VOCdevkit

from  n    params  module                                  arguments                     
0                -1  1      1760  models.common.Conv                      [3, 16, 6, 2, 2]              
1                -1  1      4672  models.common.Conv                      [16, 32, 3, 2]                
2                -1  1      4800  models.common.C3                        [32, 32, 1]                   
3                -1  1     18560  models.common.Conv                      [32, 64, 3, 2]                
4                -1  2     29184  models.common.C3                        [64, 64, 2]                   
5                -1  1     73984  models.common.Conv                      [64, 128, 3, 2]               
6                -1  3    156928  models.common.C3                        [128, 128, 3]                 
7                -1  1    295424  models.common.Conv                      [128, 256, 3, 2]              
8                -1  1    296448  models.common.C3                        [256, 256, 1]                 
9                -1  1    164608  models.common.SPPF                      [256, 256, 5]                 
10                -1  1     33024  models.common.Conv                      [256, 128, 1, 1]              
11                -1  1         0  torch.nn.modules.upsampling.Upsample    [None, 2, 'nearest']          
12           [-1, 6]  1         0  models.common.Concat                    [1]                           
13                -1  1     90880  models.common.C3                        [256, 128, 1, False]          
14                -1  1      8320  models.common.Conv                      [128, 64, 1, 1]               
15                -1  1         0  torch.nn.modules.upsampling.Upsample    [None, 2, 'nearest']          
16           [-1, 4]  1         0  models.common.Concat                    [1]                           
17                -1  1     22912  models.common.C3                        [128, 64, 1, False]           
18                -1  1     36992  models.common.Conv                      [64, 64, 3, 2]                
19          [-1, 14]  1         0  models.common.Concat                    [1]                           
20                -1  1     74496  models.common.C3                        [128, 128, 1, False]          
21                -1  1    147712  models.common.Conv                      [128, 128, 3, 2]              
22          [-1, 10]  1         0  models.common.Concat                    [1]                           
23                -1  1    296448  models.common.C3                        [256, 256, 1, False]          
24      [17, 20, 23]  1      8118  models.yolo.Detect                      [1, [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]], [64, 128, 256]]
YOLOv5n summary: 270 layers, 1765270 parameters, 1765270 gradients, 4.2 GFLOPs

Transferred 342/349 items from yolov5n.pt
AMP: checks passed ✅
optimizer: SGD(lr=0.01) with parameter groups 57 weight(decay=0.0), 60 weight(decay=0.0005), 60 bias
WARNING: DP not recommended, use torch.distributed.run for best DDP Multi-GPU results.
See Multi-GPU Tutorial at https://github.com/ultralytics/yolov5/issues/475 to get started.
train: Scanning '/dw/dw05/VOCdevkit/VOC0823/train/labels' images and labels...198 found, 0 missing, 0 empty, 0 corrupt: 100%|██████████| 198/198 [00:00<00:00, 15604.81it/s]
train: New cache created: /dw/dw05/VOCdevkit/VOC0823/train/labels.cache
val: Scanning '/dw/dw05/VOCdevkit/VOC0823/val/labels' images and labels...50 found, 0 missing, 0 empty, 0 corrupt: 100%|██████████| 50/50 [00:00<00:00, 9719.39it/s]
val: New cache created: /dw/dw05/VOCdevkit/VOC0823/val/labels.cache

AutoAnchor: 2.55 anchors/target, 1.000 Best Possible Recall (BPR). Current anchors are a good fit to dataset ✅
Plotting labels to runs/train/exp21/labels.jpg... 
Image sizes 640 train, 640 val
Using 8 dataloader workers
Logging results to runs/train/exp21
Starting training for 300 epochs...

Epoch    GPU_mem   box_loss   obj_loss   cls_loss  Instances       Size
  0%|          | 0/13 [00:03<?, ?it/s]

4.2 测试

4.2.1 修改yolov5/val.py/parse_opt部分代码

在开始测试之前，需要指定网络、数据等参数，修改该文件中的default即可。

def parse_opt():
    parser = argparse.ArgumentParser()
    parser.add_argument('--data', type=str, default=ROOT / 'data/VOC.yaml', help='dataset.yaml path')
    parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'runs/train/exp21/weights/best.pt', help='model.pt path(s)')
    parser.add_argument('--batch-size', type=int, default=32, help='batch size')
    parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='inference size (pixels)')
    parser.add_argument('--conf-thres', type=float, default=0.001, help='confidence threshold')
    parser.add_argument('--iou-thres', type=float, default=0.6, help='NMS IoU threshold')
    parser.add_argument('--task', default='val', help='train, val, test, speed or study')
...

4.2.2 执行测试

在命令行中运行python val.py即可。

val: data=data/VOC.yaml, weights=runs/train/exp21/weights/best.pt, batch_size=32, imgsz=640, conf_thres=0.001, iou_thres=0.6, task=val, device=, workers=8, single_cls=False, augment=False, verbose=False, save_txt=False, save_hybrid=False, save_conf=False, save_json=False, project=runs/val, name=exp, exist_ok=False, half=False, dnn=False
YOLOv5 🚀 v6.2-47-geab35f6 Python-3.9.12 torch-1.12.1 CUDA:0 (NVIDIA GeForce RTX 2080 Ti, 11019MiB)

Fusing layers... 
YOLOv5n summary: 213 layers, 1760518 parameters, 0 gradients, 4.1 GFLOPs
val: Scanning '/dw/dw05/VOCdevkit/VOC0823/val/labels.cache' images and labels... 50 found, 0 missing, 0 empty, 0 corrupt: 100%|██████████| 50/50 [00:00<?, ?it/s]
                 Class     Images  Instances          P          R     mAP@.5 mAP@.5:.95: 100%|██████████| 2/2 [00:01<00:00,  1.95it/s]
                   all         50         50      0.999          1      0.995      0.917
Speed: 0.3ms pre-process, 1.2ms inference, 1.1ms NMS per image at shape (32, 3, 640, 640)
Results saved to runs/val/exp5

五、其他

已知训练、验证集情况下，修改VOC.yaml。

# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC by University of Oxford
# Example usage: python train.py --data VOC.yaml
# parent
# ├── yolov5
# └── datasets
#     └── VOCdevkit  ← create dataset here


# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../autodl-tmp/SF6barometer/VOC
train: # train images (relative to 'path') images and labels
  - train
val: # val images (relative to 'path') images and labels
  - val
test: # test images (optional)
  - test

# Classes
names:
  0: abnormal
  1: normal


# Download script/URL (optional) ---------------------------------------------------------------------------------------
# val is not existed, then exec download code
download: |
    import shutil
    import xml.etree.ElementTree as ET
    import os
    import random
    from tqdm import tqdm
    
    
    def convert_label(xml_file_path, txt_file_path):
        def convert_box(size, box):
            dw, dh = 1. / size[0], 1. / size[1]
            x, y, w, h = (box[0] + box[1]) / 2.0 - 1, (box[2] + box[3]) / 2.0 - 1,\
                box[1] - box[0], box[3] - box[2]
            return x * dw, y * dh, w * dw, h * dh
    
        in_file = open(xml_file_path)
        out_file = open(txt_file_path, 'w')
        tree = ET.parse(in_file)
        root = tree.getroot()
        size = root.find('size')
        w = int(size.find('width').text)
        h = int(size.find('height').text)
        
        names = list(yaml['names'].values())
        for obj in root.iter('object'):
            cls = obj.find('name').text
            if cls in names:
                xmlbox = obj.find('bndbox')
                bb = convert_box((w, h), [float(xmlbox.find(x).text) for x in ('xmin', 'xmax', 'ymin', 'ymax')])
                cls_id = names.index(cls)  # class id
                out_file.write(" ".join([str(a) for a in (cls_id, *bb)]) + '\n')
        out_file.close()
    
    
    # 1.path
    root_dir = os.path.join(yaml['path'], '..')  # need
    src_train_dir = os.path.join(root_dir, 'train')  # need
    src_val_dir = os.path.join(root_dir, 'val')  # need
    voc_dir = yaml['path']
    txts_dir = os.path.join(voc_dir, 'Labels')
    train_dir = os.path.join(voc_dir, 'train')
    val_dir = os.path.join(voc_dir, 'val')
    train_img_dir = os.path.join(train_dir, 'images')
    train_txt_dir = os.path.join(train_dir, 'labels')
    val_img_dir = os.path.join(val_dir, 'images')
    val_txt_dir = os.path.join(val_dir, 'labels')
    # 2.dir
    shutil.rmtree(voc_dir, ignore_errors=True)
    os.mkdir(voc_dir)
    os.mkdir(txts_dir)
    os.mkdir(train_dir)
    os.mkdir(train_img_dir)
    os.mkdir(train_txt_dir)
    os.mkdir(val_dir)
    os.mkdir(val_img_dir)
    os.mkdir(val_txt_dir)
    # 3.copy train file
    file_list = os.listdir(src_train_dir)
    for file in tqdm(file_list, desc='copy train file'):
        if file.endswith('.jpg'):
            img_file = file
            xml_file = file.replace('.jpg', '.xml')
            txt_file = file.replace('.jpg', '.txt')
            img_file_path = os.path.join(src_train_dir, img_file)
            xml_file_path = os.path.join(src_train_dir, xml_file)
            dst_img_file_path = os.path.join(train_img_dir, img_file)
            dst_txt_file_path = os.path.join(train_txt_dir, txt_file)   
            shutil.copyfile(img_file_path, dst_img_file_path)
            convert_label(xml_file_path, dst_txt_file_path)
    # 4.copy val file
    file_list = os.listdir(src_val_dir)
    for file in tqdm(file_list, desc='copy val file'):
        if file.endswith('.jpg'):
            img_file = file
            xml_file = file.replace('.jpg', '.xml')
            txt_file = file.replace('.jpg', '.txt')
            img_file_path = os.path.join(src_val_dir, img_file)
            xml_file_path = os.path.join(src_val_dir, xml_file)
            dst_img_file_path = os.path.join(val_img_dir, img_file)
            dst_txt_file_path = os.path.join(val_txt_dir, txt_file)   
            shutil.copyfile(img_file_path, dst_img_file_path)
            convert_label(xml_file_path, dst_txt_file_path)