划分VOC数据集,以及转换为划分后的COCO数据集格式

1.VOC数据集

    LabelImg是一款广泛应用于图像标注的开源工具,主要用于构建目标检测模型所需的数据集。Visual Object Classes(VOC)数据集作为一种常见的目标检测数据集,通过labelimg工具在图像中标注边界框和类别标签,为训练模型提供了必要的注解信息。VOC数据集源于对PASCAL挑战赛的贡献,涵盖多个物体类别,成为目标检测领域的重要基准之一,推动着算法性能的不断提升。

    使用labelimg标注或者其他VOC标注工具标注后,会得到两个文件夹,如下:

BirdNest    ------->>>  主目录,存放下面的两个文件夹
    ----Annotations    ------->>>  存放.xml标注信息文件
    ----JPEGImages     ------->>>  存放图片文件

在这里插入图片描述

2.划分VOC数据集

    如下代码是按照训练集:验证集 = 8:2来划分的,会找出没有对应.xml的图片文件,且划分的时候支持JPEGImages文件夹下有如下图片格式:

['.jpg', '.png', '.gif', '.bmp', '.tiff', '.jpeg', '.webp', '.svg', '.psd', '.cr2', '.nef', '.dng']

整体代码为:

import os
import random

from tqdm import tqdm

image_extensions = ['.jpg', '.png', '.gif', '.bmp', '.tiff', '.jpeg', '.webp', '.svg', '.psd', '.cr2', '.nef', '.dng']


ef split_voc_dataset(dataset_dir, train_ratio, val_ratio, use_random_seed=False, random_seed=999):
    if not (0 < train_ratio + val_ratio <= 1):
        print("Invalid ratio values. They should sum up to 1.")
        return

    annotations_dir = os.path.join(dataset_dir, 'Annotations')
    images_dir = os.path.join(dataset_dir, 'JPEGImages')
    output_dir = os.path.join(dataset_dir, 'ImageSets/Main')

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    dict_info = dict()
    # List all the image files in the JPEGImages directory
    for file in os.listdir(images_dir):
        if any(ext in file for ext in image_extensions):
            jpg_files, endwith = os.path.splitext(file)
            dict_info[jpg_files] = endwith

    # List all the XML files in the Annotations directory
    xml_files = [file for file in os.listdir(annotations_dir) if file.endswith('.xml')]

    if use_random_seed:
        # Set the random seed for reproducibility
        random.seed(random_seed)

    random.shuffle(xml_files)

    num_samples = len(xml_files)
    num_train = int(num_samples * train_ratio)
    num_val = int(num_samples * val_ratio)

    train_xml_files = xml_files[:num_train]
    val_xml_files = xml_files[num_train:num_train + num_val]

    with open(os.path.join(output_dir, 'train_list.txt'), 'w+') as train_file:
        for xml_file in train_xml_files:
            image_name = os.path.splitext(xml_file)[0]
            if image_name in dict_info:
                image_path = os.path.join('JPEGImages', image_name + dict_info[image_name])
                annotation_path = os.path.join('Annotations', xml_file)
                train_file.write(f'{image_path}\t{annotation_path}\n')
            else:
                print(f"没有找到图片 {os.path.join(images_dir, image_name)}")

    with open(os.path.join(output_dir, 'val_list.txt'), 'w+') as val_file:
        for xml_file in val_xml_files:
            image_name = os.path.splitext(xml_file)[0]
            if image_name in dict_info:
                image_path = os.path.join('JPEGImages', image_name + dict_info[image_name])
                annotation_path = os.path.join('Annotations', xml_file)
                val_file.write(f'{image_path}\t{annotation_path}\n')
            else:
                print(f"没有找到图片 {os.path.join(images_dir, image_name)}")

    labels = set()
    for xml_file in tqdm(xml_files):
        annotation_path = os.path.join(annotations_dir, xml_file)
        with open(annotation_path, 'r+', encoding='utf-8') as f:
            lines = f.readlines()
            for line in lines:
                if '<name>' in line:
                    label = line.strip().replace('<name>', '').replace('</name>', '')
                    labels.add(label)

    with open(os.path.join(output_dir, 'labels.txt'), 'w+') as labels_file:
        for label in labels:
            labels_file.write(f'{label}\n')
            

if __name__ == "__main__":
    train_ratio = 0.9  # Adjust the train-validation split ratio as needed
    val_ratio = 1 - train_ratio
		
		# 假如使用随机种子,并且自己设置种子数值,则换种子后划分后的训练集和验证集图片会不一样;
		# 如果不使用种子(默认种子),每次的训练集和验证集图片划分的部分是一样的,但txt记录的循序会变;
    random_seed = 6888
    use_random_seed = True
    split_voc_dataset(dataset_dir, train_ratio, val_ratio, use_random_seed, random_seed)


划分好后的截图:
在这里插入图片描述

3.VOC转COCO格式

目前很多框架大多支持的是COCO格式,因为存放与使用起来方便,采用了json文件来代替xml文件。

import os
import json

from xml.etree import ElementTree as ET


def parse_xml(dataset_dir, xml_file):
    xml_path = os.path.join(dataset_dir, xml_file)
    tree = ET.parse(xml_path)
    root = tree.getroot()

    objects = root.findall('object')
    annotations = []

    for obj in objects:
        bbox = obj.find('bndbox')
        xmin = int(bbox.find('xmin').text)
        ymin = int(bbox.find('ymin').text)
        xmax = int(bbox.find('xmax').text)
        ymax = int(bbox.find('ymax').text)

        # Extract label from XML annotation
        label = obj.find('name').text
        if not label:
            print(f"Label not found in XML annotation. Skipping annotation.")
            continue

        annotations.append({
            'xmin': xmin,
            'ymin': ymin,
            'xmax': xmax,
            'ymax': ymax,
            'label': label
        })

    return annotations


def convert_to_coco_format(image_list_file, annotations_dir, output_json_file, dataset_dir):
    images = []
    annotations = []
    categories = []

    # Load labels
    with open(os.path.join(annotations_dir, 'labels.txt'), 'r+', encoding='utf-8') as labels_file:
        label_lines = labels_file.readlines()
        categories = [{'id': i + 1, 'name': label.strip()} for i, label in enumerate(label_lines)]

    annotation_id = 1  # Initialize unique annotation ID

    # Load image list file
    with open(image_list_file, 'r+') as image_list:
        image_lines = image_list.readlines()
        for i, line in enumerate(image_lines):
            image_path, annotation_path = line.strip().split('\t')
            image_id = i + 1
            image_filename = os.path.basename(image_path)

            # Extract image size from XML file
            xml_path = os.path.join(dataset_dir, annotation_path)
            tree = ET.parse(xml_path)
            size = tree.find('size')
            image_height = int(size.find('height').text)
            image_width = int(size.find('width').text)

            images.append({
                'id': image_id,
                'file_name': image_filename,
                'height': image_height,
                'width': image_width,
                'license': None,
                'flickr_url': None,
                'coco_url': None,
                'date_captured': None
            })

            # Load annotations from XML files
            xml_annotations = parse_xml(dataset_dir, annotation_path)
            for xml_annotation in xml_annotations:
                label = xml_annotation['label']
                category_id = next((cat['id'] for cat in categories if cat['name'] == label), None)
                if category_id is None:
                    print(f"Label '{label}' not found in categories. Skipping annotation.")
                    continue

                bbox = {
                    'xmin': xml_annotation['xmin'],
                    'ymin': xml_annotation['ymin'],
                    'xmax': xml_annotation['xmax'],
                    'ymax': xml_annotation['ymax']
                }

                annotations.append({
                    'id': annotation_id,
                    'image_id': image_id,
                    'category_id': category_id,
                    'bbox': [bbox['xmin'], bbox['ymin'], bbox['xmax'] - bbox['xmin'], bbox['ymax'] - bbox['ymin']],
                    'area': (bbox['xmax'] - bbox['xmin']) * (bbox['ymax'] - bbox['ymin']),
                    'segmentation': [],
                    'iscrowd': 0
                })
                annotation_id += 1  # Increment annotation ID for uniqueness

    coco_data = {
        'images': images,
        'annotations': annotations,
        'categories': categories
    }

    with open(output_json_file, 'w+') as json_file:
        json.dump(coco_data, json_file, indent=4)


if __name__ == "__main__":
    # Adjust paths as needed
    dataset_dir = 'BirdNest/'
    image_sets_dir = 'BirdNest/ImageSets/Main/'
    train_list_file = os.path.join(image_sets_dir, 'train_list.txt')
    val_list_file = os.path.join(image_sets_dir, 'val_list.txt')
    output_train_json_file = os.path.join(dataset_dir, 'train_coco.json')
    output_val_json_file = os.path.join(dataset_dir, 'val_coco.json')

    convert_to_coco_format(train_list_file, image_sets_dir, output_train_json_file, dataset_dir)
    convert_to_coco_format(val_list_file, image_sets_dir, output_val_json_file, dataset_dir)
    print("The JSON file has been successfully generated!!!")

转COCO格式成功截图:
在这里插入图片描述
在这里插入图片描述

### 如何使用 TransUNet 训练自定义数据集 为了使用 TransUNet 模型训练自定义数据集,特别是针对医学图像分割任务,可以遵循以下方法: #### 数据准备 确保自定义数据集按照特定结构组织。对于医学图像分割任务,通常采用类似于 VOCCOCO格式。如果数据集尚未转换为此类标准格式,则需先完成此操作。 ```bash dataset/ ├── images/ │ ├── img1.png │ └── ... └── masks/ ├── mask1.png └── ... ``` 每个图像应有一个对应的掩码文件表示目标区域[^2]。 #### 安装依赖项 安装必要的 Python 库以支持 TransUNet 和其他辅助功能。这包括但不限于 PyTorch、TensorFlow 及其扩展包 torchvision/tensorflow_datasets 等。 ```bash pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu113 pip install tensorflow matplotlib scikit-image opencv-python git clone https://github.com/Beckschen/TransUNet.git cd TransUNet && pip install . ``` #### 配置环境变量与参数设置 创建配置文件 `config.yaml` 来指定超参数和其他重要选项,如批次大小、学习率等。 ```yaml # config.yaml example configuration file for training a TransUNet model. BATCH_SIZE: 8 LEARNING_RATE: 0.0001 EPOCHS: 50 IMAGE_SIZE: [256, 256] NUM_CLASSES: 2 DATASET_PATH: './path/to/dataset' MODEL_SAVE_DIR: './checkpoints/' LOGGING_LEVEL: 'INFO' ``` #### 编写训练脚本 编写一个完整的训练循环,在其中加载预处理后的数据并调用 TransUNet 进行迭代优化过程。 ```python import os from transunet import CONFIGS as configs from transunet.modeling import VisionTransformer, CONFIGS from data_loader import get_loader import torch.optim as optim import torch.nn.functional as F from tqdm import trange def train(): device = "cuda" if torch.cuda.is_available() else "cpu" # Load dataset loader and initialize the network architecture based on chosen settings from YAML dataloader = get_loader(config['DATASET_PATH'], batch_size=config['BATCH_SIZE']) config_vit = configs['ViT-B_16'] net = VisionTransformer(config_vit, num_classes=config['NUM_CLASSES']).to(device) optimizer = optim.Adam(net.parameters(), lr=float(config['LEARNING_RATE'])) criterion = F.cross_entropy epochs = int(config['EPOCHS']) with trange(epochs) as t: for epoch in t: running_loss = 0.0 for i, (inputs, labels) in enumerate(dataloader): inputs, labels = inputs.to(device), labels.to(device) outputs = net(inputs) loss = criterion(outputs, labels.long()) optimizer.zero_grad() loss.backward() optimizer.step() running_loss += loss.item() avg_loss = running_loss / len(dataloader) t.set_description(f'Epoch {epoch}, Loss={avg_loss:.4f}') if __name__ == '__main__': import yaml global config with open('config.yaml') as f: config = yaml.safe_load(f)['train'] train() ``` 上述代码展示了如何构建基于 PyTorch 实现的 TransUNet 模型训练流程,并通过 DataLoader 加载自定义的数据集来进行批量训练[^1]。
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

锦鲤AI幸运

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值