对于开集识别任务,单张遥感图像的出现的类别较少,经常出现完全不存在其他类别的图片的情况。因此需要对图片进行融合,本文将Pascal VOC格式数据集的图片和标签每四张融合为一张。
导入相关库
import os
import cv2
import xml.etree.ElementTree as ET
import numpy as np
from xml.dom import minidom
方法:解析Annotation文件
def parse_xml(xml_file):
tree = ET.parse(xml_file)
root = tree.getroot()
objects = []
for obj in root.findall('object'):
bbox = obj.find('bndbox')
object_info = {
'name': obj.find('name').text,
'xmin': int(bbox.find('xmin').text),
'ymin': int(bbox.find('ymin').text),
'xmax': int(bbox.find('xmax').text),
'ymax': int(bbox.find('ymax').text),
}
objects.append(object_info)
return objects, root
通过遍历object返回一个标签文件中所有对象的类别、坐标的字典的列表。
方法:创建新的空白Annotation文件
def create_new_annotation(root, objects, new_width, new_height):
root.find('size/width').text = str(new_width)
root.find('size/height').text = str(new_height)
for obj in root.findall('object'):
root.remove(obj)
for object_info in objects:
obj = ET.SubElement(root, 'object')
ET.SubElement(obj, 'name').text = object_info['name']
ET.SubElement(obj, 'pose').text = 'Unspecified'
ET.SubElement(obj, 'truncated').text = '0'
ET.SubElement(obj, 'difficult').text = '0'
bbox = ET.SubElement(obj, 'bndbox')
ET.SubElement(bbox, 'xmin').text = str(object_info['xmin'])
ET.SubElement(bbox, 'ymin').text = str(object_info['ymin'])
ET.SubElement(bbox, 'xmax').text = str(object_info['xmax'])
ET.SubElement(bbox, 'ymax').text = str(object_info['ymax'])
return root
root为传入的一个batch的最后一个annotation文件的根节点,root 对应annotation标签及其子元素的整个子树。这里需要修改标签中图片宽、高,之后清空object,加入新的融合的object信息。
方法:修改图片和gt框尺寸
def resize_image_and_bboxes(img, objects, target_size):
"""
将图像和对应的边界框调整到目标尺寸。
"""
h, w, _ = img.shape
scale_x = target_size[0] / w
scale_y = target_size[1] / h
img_resized = cv2.resize(img, target_size)
for obj in objects:
obj['xmin'] = int(obj['xmin'] * scale_x)
obj['xmax'] = int(obj['xmax'] * scale_x)
obj['ymin'] = int(obj['ymin'] * scale_y)
obj['ymax'] = int(obj['ymax'] * scale_y)
return img_resized, objects
方法:融合图片和标签文件
def merge_images_and_annotations(image_paths, annotation_paths, output_image_path, output_annotation_path, target_size):
images = []
all_objects = []
root = None
for i, img_path in enumerate(image_paths): # 遍历一个batch中的图片路径
if img_path: # 如果图片路径存在
img = cv2.imread(img_path)
objects, root = parse_xml(annotation_paths[i]) # 找到对应的标签文件,获取objects列表
img, objects = resize_image_and_bboxes(img, objects, target_size) # 修改图片和gt框尺寸
else: # 如果图片路径不存在,使用空白图像填充
img = np.zeros((target_size[1], target_size[0], 3), dtype=np.uint8)
objects = []
images.append(img) # 添加一个resize后的图片进batch的列表
y_offset = (i // 2) * target_size[1] # 如果图片为第2张和第3张,则需要修改gt框y坐标,第0张、第1张无变化
x_offset = (i % 2) * target_size[0] # 如果图片为第1张和第3张,则需要修改gt狂x坐标,第0张、第2张无变化
for obj in objects:
obj['xmin'] += x_offset
obj['xmax'] += x_offset
obj['ymin'] += y_offset
obj['ymax'] += y_offset
all_objects.extend(objects)
merged_image = np.zeros((target_size[1] * 2, target_size[0] * 2, 3), dtype=np.uint8)
positions = [(0, 0), (0, target_size[0]), (target_size[1], 0), (target_size[1], target_size[0])]
for img, (y_offset, x_offset) in zip(images, positions): # 填入图片至指定位置
merged_image[y_offset:y_offset + target_size[1], x_offset:x_offset + target_size[0]] = img
new_root = create_new_annotation(root, all_objects, target_size[0] * 2, target_size[1] * 2)
xml_str = ET.tostring(new_root, encoding='unicode') # 将 XML 树转换为字符串格
pretty_xml_as_string = minidom.parseString(xml_str).toprettyxml() # XML 字符串进行格式化
with open(output_annotation_path, 'w') as f:
f.write(pretty_xml_as_string) # 保存 XML 文件
cv2.imwrite(output_image_path, merged_image) # 保存合并后的图片
main
if __name__ == '__main__':
voc_dir = './VOC2007' # VOC数据集路径
images_dir = os.path.join(voc_dir, 'JPEGImages') # 图片文件夹路径
annotations_dir = os.path.join(voc_dir, 'Annotations') # 标签文件夹路径
images_output_dir = os.path.join(voc_dir, 'JPEGImages_merge') # 目标图片文件夹路径
annotations_output_dir = os.path.join(voc_dir, 'Annotations_merge') # 目标标签文件夹路径
image_files = os.listdir(images_dir) # 获得图片文件夹下所有文件名
image_files.sort() # 可选:如果你希望顺序处理图片
# 确定目标图像大小
target_size = (800, 800) # 你可以根据需要调整这个尺寸
# 每4个图片为一个batch,遍历batch的图片序号
for i in range(0, len(image_files), 4):
batch_images = image_files[i:i + 4]
# 如果不足四张,填充 None
if len(batch_images) < 4:
for _ in range(4 - len(batch_images)):
batch_images.append(None)
# 一个batch中的图片路径
image_paths = [os.path.join(images_dir, img) if img else None for img in batch_images]
# 一个batch中的标签文件路径
annotation_paths = [os.path.join(annotations_dir, img.replace('.jpg', '.xml')) if img else None for img in
batch_images]
output_image_name = f'merged_{i // 4}.jpg'
output_image_path = os.path.join(images_output_dir, output_image_name)
output_annotation_name = f'merged_{i // 4}.xml'
output_annotation_path = os.path.join(annotations_output_dir, output_annotation_name)
merge_images_and_annotations(image_paths, annotation_paths, output_image_path, output_annotation_path,
target_size)
print("所有图片和标注文件已合并完成。")
划分训练集和测试集脚本
import os
import random
def split_dataset(image_files, split_ratio=0.8):
"""
将图像文件名列表划分为训练集和测试集。
:param image_files: 图像文件名列表
:param split_ratio: 训练集所占比例,默认为0.8(80%)
:return: 训练集列表,测试集列表
"""
random.shuffle(image_files)
split_index = int(len(image_files) * split_ratio)
trainval_files = image_files[:split_index]
test_files = image_files[split_index:]
return trainval_files, test_files
def write_split_file(file_list, output_file):
"""
将文件名列表写入到指定的输出文件中。
:param file_list: 文件名列表
:param output_file: 输出文件路径
"""
with open(output_file, 'w') as f:
for file_name in file_list:
f.write(f"{file_name}\n")
if __name__ == '__main__':
voc_dir = 'VOCdevkit/VOC2007'
images_dir = os.path.join(voc_dir, 'JPEGImages')
imageset_dir = os.path.join(voc_dir, 'ImageSets/Main')
os.makedirs(imageset_dir, exist_ok=True)
# 获取所有合并后的图像文件名(不带扩展名)
image_files = [os.path.splitext(f)[0] for f in os.listdir(images_dir) if f.startswith("merged_") and f.endswith(".jpg")]
# 划分训练集和测试集
trainval_files, test_files = split_dataset(image_files, split_ratio=0.8)
# 将划分结果写入trainval.txt和test.txt
write_split_file(trainval_files, os.path.join(imageset_dir, 'trainval.txt'))
write_split_file(test_files, os.path.join(imageset_dir, 'test.txt'))
print("数据集划分完成。")
print(f"训练集样本数: {len(trainval_files)}")
print(f"测试集样本数: {len(test_files)}")
运行结果