1.下载dior数据集
dior数据集共包含四个文件夹:
(1)Annotations
'Horizontal Bounding Boxes'
'Oriented Bounding Boxes': 0001.xml — 23463.xml
(2)ImageSets
Layout :空
Main :
train.txt
val.txt
test.txt
Segmentation : 空
(3)JPEGImages-test
11726.jpg — 23463.jpg
(4)JPEGImages-trainval
00001.jpg — 11725.jpg
2.将DIOR处理成DOTA格式
(1)xml转换为txt,修改input_folder、output_folder路径为自己的。
import os
import xml.etree.ElementTree as ET
# 输入文件夹路径和输出文件夹路径
input_folder = '/home/xh/dataset/DIOR/Annotations/Oriented_Bounding_Boxes'
output_folder = '/home/xh/dataset/DIOR/Annotations/Oriented_Bounding_Boxes_processed'
# 遍历输入文件夹中的所有XML文件
for filename in os.listdir(input_folder):
if filename.endswith('.xml'):
xml_path = os.path.join(input_folder, filename)
print(f'convert {filename}')
# 解析XML文件
tree = ET.parse(xml_path)
root = tree.getroot()
# 创建用于存储文本数据的列表
text_data = []
# 遍历XML元素并提取所需的信息
for obj in root.findall('.//object'):
robndbox = obj.find('robndbox')
if robndbox is not None:
x_left_top = robndbox.find('x_left_top').text
y_left_top = robndbox.find('y_left_top').text
x_right_top = robndbox.find('x_right_top').text
y_right_top = robndbox.find('y_right_top').text
x_right_bottom = robndbox.find('x_right_bottom').text
y_right_bottom = robndbox.find('y_right_bottom').text
x_left_bottom = robndbox.find('x_left_bottom').text
y_left_bottom = robndbox.find('y_left_bottom').text
name = obj.find('name').text
difficult = obj.find('difficult').text
# 将提取的信息格式化并添加到文本数据列表中
text_line = f"{x_left_top} {y_left_top} {x_right_top} {y_right_top} {x_right_bottom} {y_right_bottom} {x_left_bottom} {y_left_bottom} {name} {difficult}\n"
text_data.append(text_line)
# 创建输出文件路径
output_path = os.path.join(output_folder, filename.replace('.xml', '.txt'))
# 将文本数据写入输出文件
with open(output_path, 'w') as output_file:
output_file.writelines(text_data)
print("转换完成!")
(2)标签划分:DIOR数据集本身所有的标签是放在一起的,需要按照DOTA格式 划分标签
现在DIOR数据集所在目录新建文件夹:DIOR_processed,内部新建test、trainval两个文件夹,这俩文件夹下再分别新建annfiles、images文件夹。至此,格式与DOTA数据集一致。运行下面的代码,进行标签划分。
所有的路径修改为自己的。
import os
import shutil
# 源文件夹和目标文件夹路径
source_folder = '/home/xh/dataset/DIOR/Annotations/Oriented_Bounding_Boxes_processed'
trainval_target_folder = '/home/xh/dataset/DIOR_processed/trainval/annfiles'
test_target_folder = '/home/xh/dataset/DIOR_processed/test/annfiles'
# 读取trainval.txt文件中的文件名列表
with open('/home/xh/dataset/DIOR/ImageSets/Main/trainval.txt', 'r') as file:
file_names = file.read().splitlines()
# 遍历文件名列表并复制文件
total_trainval = 0
for file_name in file_names:
source_file_path = os.path.join(source_folder, f"{file_name}.txt")
target_file_path = os.path.join(trainval_target_folder, f"{file_name}.txt")
# 使用shutil库进行文件复制
if os.path.exists(source_file_path):
total_trainval += 1
shutil.copy(source_file_path, target_file_path)
print(f"复制 {file_name}.txt 完成")
print("trainval 复制完成!")
print(f"total trainval {total_trainval}")
# 读取trainval.txt文件中的文件名列表
with open('/home/xh/dataset/DIOR/ImageSets/Main/test.txt', 'r') as file:
file_names = file.read().splitlines()
# 遍历文件名列表并复制文件
total_test = 0
for file_name in file_names:
source_file_path = os.path.join(source_folder, f"{file_name}.txt")
target_file_path = os.path.join(test_target_folder, f"{file_name}.txt")
# 使用shutil库进行文件复制
if os.path.exists(source_file_path):
total_test += 1
shutil.copy(source_file_path, target_file_path)
print(f"复制 {file_name}.txt 完成")
print("test 复制完成!")
print(f"total test {total_test}")
print(f"total trainval {total_trainval}")
print(f"total test {total_test}")
(3)图片:标签划分完以后,把原始DIOR数据集中的trainval 、test的图分别放入DIOR_processed对应的文件夹。
3.mmrotate 修改
(1)新增数据集类别:mmrotate/datasets/下复制dota.py 为dior.py,修改类名、类别名、PALETTE。
@ROTATED_DATASETS.register_module()
class DIORDataset(CustomDataset):
"""DOTA dataset for detection.
Args:
ann_file (str): Annotation file path.
pipeline (list[dict]): Processing pipeline.
version (str, optional): Angle representations. Defaults to 'oc'.
difficulty (bool, optional): The difficulty threshold of GT.
"""
CLASSES = ('airplane',
'airport',
'baseballfield',
'basketballcourt',
'bridge',
'chimney',
'dam',
'Expressway-Service-area',
'Expressway-toll-station',
'golffield',
'groundtrackfield',
'harbor',
'overpass',
'ship',
'stadium',
'storagetank',
'tenniscourt',
'trainstation',
'vehicle',
'windmill')
PALETTE = [(165, 42, 42), (189, 183, 107), (0, 255, 0), (255, 0, 0), (138, 43, 226),
(255, 128, 0), (255, 0, 255), (0, 255, 255), (255, 193, 193), (0, 51, 153),
(255, 250, 205), (0, 139, 139), (255, 255, 0), (147, 116, 116), (0, 0, 255),
(255, 69, 0), (128, 0, 128), (0, 128, 128), (218, 165, 32), (199, 21, 133)]
def __init__(self,
ann_file,
pipeline,
version='oc',
difficulty=100,
**kwargs):
self.version = version
self.difficulty = difficulty
super(DIORDataset, self).__init__(ann_file, pipeline, **kwargs)
#剩余部分全部一样
(2)修改__init__.py
# Copyright (c) OpenMMLab. All rights reserved.
from .builder import build_dataset # noqa: F401, F403
from .dota import DOTADataset # noqa: F401, F403
from .hrsc import HRSCDataset # noqa: F401, F403
from .pipelines import * # noqa: F401, F403
from .sar import SARDataset # noqa: F401, F403
from .drone import DroneDataset
from .dior import DIORDataset
__all__ = ['SARDataset', 'DOTADataset', 'build_dataset', 'HRSCDataset', 'DroneDataset', 'DIORDataset']
(3)configs/base/datasets/,复制dotav1.py为dior.py。data_root、train、val、test都修改为自己的路径。
# dataset settings
dataset_type = 'DIORDataset'
data_root = '/home/xh/dataset/DIOR_processed/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True),
dict(type='RResize', img_scale=(1024, 1024)),
dict(type='RRandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(1024, 1024),
flip=False,
transforms=[
dict(type='RResize'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img'])
])
]
data = dict(
samples_per_gpu=2, #batch_size
workers_per_gpu=2, # num_worker 多线程
train=dict(
type=dataset_type,
ann_file=data_root + 'trainval/annfiles/',
img_prefix=data_root + 'trainval/images/',
pipeline=train_pipeline),
val=dict(
type=dataset_type,
ann_file=data_root + 'test/annfiles/',
img_prefix=data_root + 'test/images/',
pipeline=test_pipeline),
test=dict(
type=dataset_type,
ann_file=data_root + 'test/annfiles/',
img_prefix=data_root + 'test/images/',
pipeline=test_pipeline)
)
(4)配置文件中,在文件my_oriented_rcnn_r50_fpn_1x_dior_le90中将num_classes设置为20。文件开头_base_部分设置为(只改datasets的文件即可):
_base_ = [
'_base_/datasets/dior.py', '_base_/schedules/schedule_1x.py',
'_base_/default_runtime.py'
]
OVER!