【YOLO学习笔记——数据集】VOC格式转YOLO格式数据

Wansit

已于 2023-01-29 16:39:14 修改

阅读量2.1k

点赞数 3

分类专栏： YOLO 文章标签： python

于 2023-01-29 11:31:18 首次发布

本文链接：https://blog.csdn.net/Wansit/article/details/128786234

版权

YOLO 专栏收录该内容

4 篇文章 10 订阅

订阅专栏

注：需要下面打包好的代码文件，可私聊博主（免费）

1.统计数据集中包含的类别数量

2.划分数据集并转换为YOLO格式（txt格式）

3.修改数据集yaml

4.修改train.py中的数据集格式yaml路径

1.统计数据集中包含的类别数量

这一步是为了后续改写数据集格式yaml文件所需要，为必要步骤，具体代码如下。

整个文件唯一需要改动的地方为xml_path变量，填写xml标签所在的文件夹即可。运行后即可得到数据集的所有类别及各自目标数量。

import os
import random
import xml.etree.ElementTree as ET

clses = []
cls_num = {}
xml_path = r'Annotations'

temp_xml        = os.listdir(xml_path)
total_xml       = []
for xml in temp_xml:
    if xml.endswith(".xml"):
        total_xml.append(xml)

for name in total_xml:
    try:
        in_file = open(os.path.join(xml_path, name), encoding='utf-8')
        tree = ET.parse(in_file)
        root = tree.getroot()
    except:
        in_file = open(os.path.join(xml_path, name))
        tree = ET.parse(in_file)
        root = tree.getroot()
        

    for obj in root.iter('object'):
        cls = obj.find('name').text
        clses.append(cls)
        if cls not in cls_num.keys():
            cls_num[cls] = 1
        else:
            cls_num[cls] += 1

result = list(set(clses))
print(result)
print(cls_num)
with open('cls_classes.txt',"w") as f:    #设置文件对象
    for i in result:
        f.write(i)
        f.write('\n')

2.划分数据集并转换为YOLO格式（txt格式）

整个代码块需要改的地方共有四处。

1.classes = [ ]，需要改为1中所生成的类别，或者是自己想要的类别，可能存在数据集中某些类别不需要的情况，则可以自动筛选，仅保留自己需要的类别。

2.train_k和val_k 这两个用来控制数据集的划分比例，计算公式为：

训练集(train)：1*train_k*val_k

验证集(val): train_k*(1 - val_k)

测试集(test): 1 - train_k

3.VOC数据集格式的存放路径， dirs

需要改为自己VOC数据集的存放地址，如你的VOC文件夹目录为

VOC

JPEGImages

Annotations

则需将dirs改为‘VOC’，运行下列代码后，VOC文件夹目录会成为

VOC

JPEGImages

Annotations

images

labels

其中images文件夹包含划分好的train，val，test图片；labels文件夹包含着划分好的yolo格式数据集标签，与images中的图片一一对应。

import xml.etree.ElementTree as ET
import os
import random
from shutil import copy
from tqdm import tqdm
import shutil
import cv2

classes = []


# 训练集 验证集 测试集比例
train_k = 0.8
val_k = 0.75

# VOC格式数据集地址
dirs = r'VOC'

random.seed(0)


def clear_hidden_files(path):
    dir_list = os.listdir(path)
    for i in dir_list:
        abspath = os.path.join(os.path.abspath(path), i)
        if os.path.isfile(abspath):
            if i.startswith("._"):
                os.remove(abspath)
        else:
            clear_hidden_files(abspath)


def convert(size, box):
    dw = 1. / size[0]
    dh = 1. / size[1]
    x = (box[0] + box[1]) / 2.0
    y = (box[2] + box[3]) / 2.0
    w = box[1] - box[0]
    h = box[3] - box[2]
    x = x * dw
    w = w * dw
    y = y * dh
    h = h * dh
    return (x, y, w, h)


def convert_annotation(label_path, xml_path):
    try:
        in_file = open(xml_path, 'r', encoding='utf-8')
        out_file = open(label_path, 'w')
        #     print(xml_path)
        out_tmp = set()
        tree = ET.parse(in_file)
        root = tree.getroot()
    except:
        in_file = open(xml_path, 'r')
        out_file = open(label_path, 'w')
        #     print(xml_path)
        out_tmp = set()
        tree = ET.parse(in_file)
        root = tree.getroot()
    size = root.find('size')
    w = int(size.find('width').text)
    h = int(size.find('height').text)

    if w <= 0 or h <= 0:
        tmp_img_path = xml_path.split('.')[0].split('/')[-1]
        tmp_img = cv2.imread(os.path.join(dirs, 'JPEGImages', tmp_img_path + '.jpg'))
        h, w, c = tmp_img.shape

    for obj in root.iter('object'):
        difficult = obj.find('difficult').text
        cls = obj.find('name').text
        if cls not in classes or int(difficult) == 1:
            continue
        cls_id = classes.index(cls)
        xmlbox = obj.find('bndbox')
        b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text),
             float(xmlbox.find('ymax').text))
        bb = convert((w, h), b)
        out_tmp.add(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
    #         out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')

    for i in out_tmp:
        out_file.write(i)

    in_file.close()
    out_file.close()


wd = os.getcwd()
wd = os.getcwd()

if os.path.isdir(os.path.join(dirs, "labels")):
    shutil.rmtree(os.path.join(dirs, "labels"), True)
if not os.path.isdir(os.path.join(dirs, "labels")):
    os.mkdir(os.path.join(dirs, "labels"))
dir_label = os.path.join(dirs, "labels")
if not os.path.isdir(os.path.join(dir_label, "train")):
    os.mkdir(os.path.join(dir_label, "train"))
if not os.path.isdir(os.path.join(dir_label, "val")):
    os.mkdir(os.path.join(dir_label, "val"))
    if not os.path.isdir(os.path.join(dir_label, "test")):
        os.mkdir(os.path.join(dir_label, "test"))

jpg_dirs = os.path.join(dirs, 'images')
if os.path.isdir(os.path.join(dirs, "images")):
    shutil.rmtree(os.path.join(dirs, "images"), True)
if not os.path.isdir(jpg_dirs):
    os.mkdir(jpg_dirs)
jpg_train_dir = os.path.join(jpg_dirs, 'train')
if not os.path.isdir(jpg_train_dir):
    os.mkdir(jpg_train_dir)
jpg_val_dir = os.path.join(jpg_dirs, 'val')
if not os.path.isdir(jpg_val_dir):
    os.mkdir(jpg_val_dir)
jpg_test_dir = os.path.join(jpg_dirs, 'test')
if not os.path.isdir(jpg_test_dir):
    os.mkdir(jpg_test_dir)
label_dir = os.path.join(dirs, "labels")
clear_hidden_files(label_dir)
train_label_dir = os.path.join(label_dir, "train")
val_label_dir = os.path.join(label_dir, "val")
test_label_dir = os.path.join(label_dir, "test")
train_anno_dir = r''

train_file = open(os.path.join(wd, "yolov5_train.txt"), 'w')
val_file = open(os.path.join(wd, "yolov5_val.txt"), 'w')
test_file = open(os.path.join(wd, "yolov5_test.txt"), 'w')
train_file.close()
val_file.close()
test_file.close()
train_file = open(os.path.join(wd, "yolov5_train.txt"), 'a')
val_file = open(os.path.join(wd, "yolov5_val.txt"), 'a')
test_file = open(os.path.join(wd, "yolov5_test.txt"), 'a')

jpg_dir = os.path.join(dirs, 'JPEGImages')
clear_hidden_files(jpg_dir)
jpg_dir_total = os.listdir(jpg_dir)
random.shuffle(jpg_dir_total)
list_train = jpg_dir_total[:int(len(jpg_dir_total) * train_k * val_k)]
list_val = jpg_dir_total[int(len(jpg_dir_total) * train_k * val_k): int(len(jpg_dir_total) * train_k)]
list_test = jpg_dir_total[int(len(jpg_dir_total) * train_k):]

for i in tqdm(range(0, len(list_train))):
    image_path = os.path.join(jpg_dir, list_train[i])
    voc_path = list_train[i]
    (nameWithoutExtention, extention) = os.path.splitext(os.path.basename(image_path))
    (voc_nameWithoutExtention, voc_extention) = os.path.splitext(os.path.basename(voc_path))
    annotation_name = nameWithoutExtention + '.xml'
    label_name = nameWithoutExtention + '.txt'
    label_path = os.path.join(train_label_dir, label_name)
    xml_path = os.path.join(dirs, 'Annotations', annotation_name)

    train_file.write(image_path + '\n')
    # convert_annotation(label_path, xml_path)  # convert label
    new_path = os.path.join(jpg_train_dir, list_train[i])
    copy(image_path, new_path)
    copy(xml_path, os.path.join(label_dir, 'train', annotation_name))

for i in tqdm(range(0, len(list_val))):
    image_path = os.path.join(jpg_dir, list_val[i])
    voc_path = list_val[i]
    (nameWithoutExtention, extention) = os.path.splitext(os.path.basename(image_path))
    (voc_nameWithoutExtention, voc_extention) = os.path.splitext(os.path.basename(voc_path))
    annotation_name = nameWithoutExtention + '.xml'
    label_name = nameWithoutExtention + '.txt'
    label_path = os.path.join(val_label_dir, label_name)
    xml_path = os.path.join(dirs, 'Annotations', annotation_name)

    val_file.write(image_path + '\n')
    # convert_annotation(label_path, xml_path)  # convert label
    new_path = os.path.join(jpg_val_dir, list_val[i])
    copy(image_path, new_path)
    copy(xml_path, os.path.join(label_dir, 'val', annotation_name))

for i in tqdm(range(0, len(list_test))):
    image_path = os.path.join(jpg_dir, list_test[i])
    voc_path = list_test[i]
    (nameWithoutExtention, extention) = os.path.splitext(os.path.basename(image_path))
    (voc_nameWithoutExtention, voc_extention) = os.path.splitext(os.path.basename(voc_path))
    annotation_name = nameWithoutExtention + '.xml'
    label_name = nameWithoutExtention + '.txt'
    label_path = os.path.join(test_label_dir, label_name)
    xml_path = os.path.join(dirs, 'Annotations', annotation_name)

    test_file.write(image_path + '\n')
    # convert_annotation(label_path, xml_path)  # convert label
    new_path = os.path.join(jpg_test_dir, list_test[i])
    copy(image_path, new_path)
    copy(xml_path, os.path.join(label_dir, 'test', annotation_name))

train_file.close()
val_file.close()
test_file.close()

3.修改数据集yaml

首先我们在data文件夹下找到VOC.yaml，如下所示

# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC by University of Oxford
# Example usage: python train.py --data VOC.yaml
# parent
# ├── yolov5
# └── datasets
#     └── VOC  ← downloads here (2.8 GB)


# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/VOC
train: # train images (relative to 'path')  16551 images
  - images/train2012
  - images/train2007
  - images/val2012
  - images/val2007
val: # val images (relative to 'path')  4952 images
  - images/test2007
test: # test images (optional)
  - images/test2007

# Classes
nc: 20  # number of classes
names: ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog',
        'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor']  # class names


# Download script/URL (optional) ---------------------------------------------------------------------------------------
download: |
  import xml.etree.ElementTree as ET

  from tqdm import tqdm
  from utils.general import download, Path


  def convert_label(path, lb_path, year, image_id):
      def convert_box(size, box):
          dw, dh = 1. / size[0], 1. / size[1]
          x, y, w, h = (box[0] + box[1]) / 2.0 - 1, (box[2] + box[3]) / 2.0 - 1, box[1] - box[0], box[3] - box[2]
          return x * dw, y * dh, w * dw, h * dh

      in_file = open(path / f'VOC{year}/Annotations/{image_id}.xml')
      out_file = open(lb_path, 'w')
      tree = ET.parse(in_file)
      root = tree.getroot()
      size = root.find('size')
      w = int(size.find('width').text)
      h = int(size.find('height').text)

      for obj in root.iter('object'):
          cls = obj.find('name').text
          if cls in yaml['names'] and not int(obj.find('difficult').text) == 1:
              xmlbox = obj.find('bndbox')
              bb = convert_box((w, h), [float(xmlbox.find(x).text) for x in ('xmin', 'xmax', 'ymin', 'ymax')])
              cls_id = yaml['names'].index(cls)  # class id
              out_file.write(" ".join([str(a) for a in (cls_id, *bb)]) + '\n')


  # Download
  dir = Path(yaml['path'])  # dataset root dir
  url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/'
  urls = [url + 'VOCtrainval_06-Nov-2007.zip',  # 446MB, 5012 images
          url + 'VOCtest_06-Nov-2007.zip',  # 438MB, 4953 images
          url + 'VOCtrainval_11-May-2012.zip']  # 1.95GB, 17126 images
  download(urls, dir=dir / 'images', delete=False, curl=True, threads=3)

  # Convert
  path = dir / f'images/VOCdevkit'
  for year, image_set in ('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test'):
      imgs_path = dir / 'images' / f'{image_set}{year}'
      lbs_path = dir / 'labels' / f'{image_set}{year}'
      imgs_path.mkdir(exist_ok=True, parents=True)
      lbs_path.mkdir(exist_ok=True, parents=True)

      with open(path / f'VOC{year}/ImageSets/Main/{image_set}.txt') as f:
          image_ids = f.read().strip().split()
      for id in tqdm(image_ids, desc=f'{image_set}{year}'):
          f = path / f'VOC{year}/JPEGImages/{id}.jpg'  # old img path
          lb_path = (lbs_path / f.name).with_suffix('.txt')  # new label path
          f.rename(imgs_path / f.name)  # move image
          convert_label(path, lb_path, year, id)  # convert labels to YOLO format

然后我们将其复制一份，改名为test.yaml（个人习惯，可自行随意选择），并将下列代码粘贴进去

# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC by University of Oxford
# Example usage: python train.py --data VOC.yaml
# parent
# ├── yolov5
# └── datasets
#     └── VOC  ← downloads here (2.8 GB)


# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
# path: ./VOC
train: ./VOC/images/train
val: ./VOC/images/val
test: ./VOC/images/test
# number of classes
nc: 0

# class names
names: ['']

注意，我们需要改动几个参数。

如你的数据集命名为VOC，符合2中的目录结构，则只需要改动nc和names，

nc改为你所需要的类别数，比如names：['apple']，则只需要填写1

names改为2中classes = []中的内容即可，注意！！括号中的顺序必须两者保持一致，否则会出现数据类别对应不上的问题！！

如你的数据集命名不为VOC，则建议你改为VOC（皮一下），可以省去很多麻烦

4.修改train.py中的数据集格式yaml路径

终于最后一步了，胜利就在眼前，不要放弃！！

首先我们将train.py拉到最后，你会看到一排这样的代码

def parse_opt(known=False):
    parser = argparse.ArgumentParser()
    parser.add_argument('--weights', type=str, default=ROOT / 'yolov5s.pt', help='initial weights path')
    parser.add_argument('--cfg', type=str, default='', help='model.yaml path')
    parser.add_argument('--data', type=str, default=ROOT / 'data/test.yaml', help='dataset.yaml path')
    parser.add_argument('--hyp', type=str, default=ROOT / 'data/hyps/hyp.scratch-low.yaml', help='hyperparameters path')
    parser.add_argument('--epochs', type=int, default=150)
    parser.add_argument('--batch-size', type=int, default=32, help='total batch size for all GPUs, -1 for autobatch')
    parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='train, val image size (pixels)')
    parser.add_argument('--rect', action='store_true', help='rectangular training')
    parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training')
    parser.add_argument('--nosave', action='store_true', help='only save final checkpoint')
    parser.add_argument('--noval', action='store_true', help='only validate final epoch')
    parser.add_argument('--noautoanchor', action='store_true', help='disable AutoAnchor')
    parser.add_argument('--noplots', action='store_true', help='save no plot files')
    parser.add_argument('--evolve', type=int, nargs='?', const=300, help='evolve hyperparameters for x generations')
    parser.add_argument('--bucket', type=str, default='', help='gsutil bucket')
    parser.add_argument('--cache', type=str, nargs='?', const='ram', help='--cache images in "ram" (default) or "disk"')
    parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training')
    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
    parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%')
    parser.add_argument('--single-cls', action='store_true', help='train multi-class data as single-class')
    parser.add_argument('--optimizer', type=str, choices=['SGD', 'Adam', 'AdamW'], default='SGD', help='optimizer')
    parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode')
    parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)')
    parser.add_argument('--project', default=ROOT / 'runs/train', help='save to project/name')
    parser.add_argument('--name', default='exp', help='save to project/name')
    parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
    parser.add_argument('--quad', action='store_true', help='quad dataloader')
    parser.add_argument('--cos-lr', action='store_true', help='cosine LR scheduler')
    parser.add_argument('--label-smoothing', type=float, default=0.0, help='Label smoothing epsilon')
    parser.add_argument('--patience', type=int, default=100, help='EarlyStopping patience (epochs without improvement)')
    parser.add_argument('--freeze', nargs='+', type=int, default=[0], help='Freeze layers: backbone=10, first3=0 1 2')
    parser.add_argument('--save-period', type=int, default=-1, help='Save checkpoint every x epochs (disabled if < 1)')
    parser.add_argument('--local_rank', type=int, default=-1, help='DDP parameter, do not modify')

    # Weights & Biases arguments
    parser.add_argument('--entity', default=None, help='W&B: Entity')
    parser.add_argument('--upload_dataset', nargs='?', const=True, default=False, help='W&B: Upload data, "val" option')
    parser.add_argument('--bbox_interval', type=int, default=-1, help='W&B: Set bounding-box image logging interval')
    parser.add_argument('--artifact_alias', type=str, default='latest', help='W&B: Version of dataset artifact to use')

    opt = parser.parse_known_args()[0] if known else parser.parse_args()
    return opt

真的是又多又长，看的都头疼，不过没关系，我们仅需要改动一行即可。

看到第五行，就是这一行

 parser.add_argument('--data', type=str, default=ROOT / 'data/test.yaml', help='dataset.yaml path')

仅需要改动default=ROOT / 'data/test.yaml' 将最后的test.yaml改为xxxx.yaml即可，xxxx为刚刚3中自己改好的名字。

大功告成，请开始你的表演吧哈哈哈

Wansit

关注

3
点赞
踩
13

收藏

觉得还不错? 一键收藏
2
评论
【YOLO学习笔记——数据集】VOC格式转YOLO格式数据

VOC格式转YOLO格式数据
复制链接

扫一扫

专栏目录