将 VEDAI 遥感图像数据集转为 PASCAL VOC 格式

最新推荐文章于 2023-06-27 16:04:35 发布

dear_jing

最新推荐文章于 2023-06-27 16:04:35 发布

阅读量5.1k

点赞数 9

分类专栏：论文复现调代码文章标签：自动驾驶深度学习 pytorch

本文链接：https://blog.csdn.net/dear_jing/article/details/116933053

版权

论文复现调代码专栏收录该内容

5 篇文章 0 订阅

订阅专栏

PASCAL VOC 格式的 VEDAI 数据集（只有JPEGImages和Annotations）：地址（所需积分/C币: 0）
（VEDAI 数据集共有11类，类别和所对应的 label 如下表所示）

类别	car	truck	tractor	camping car	—	—	van	vehicle	pick-up	boat	plane
label	1	2	4	5	7	8	9	10	11	23	31

官方下载地址（里面也有论文和 Devkit）

关于这个数据集，有几个点需要注意，是和其他数据集不一样的：（其中，第2和3点，在网上找不到思路，希望有朋友可以提出比较准确的答案~）

1. 论文中分成了9类，如下图所示；而在官方的 devkit 上，把 other 这个分类换成了vehicle。并且 lable 并不是从0-8顺序标下来的，具体对应关系可以参考Devkit/watch_image.m/64-108行
在这里插入图片描述
2. 理论上是9类，但是在Annotations的txt文件中，会发现还有两类，label分别是7和8。由于不知道该标记为什么，所以就舍弃了这两个。

3. txt文件中，每一行的各个数字的含义也不明确。论文里说有：中心点坐标、旋转框的角度、4个顶点的坐标、label、是否遮挡、是否被裁剪。但是没有说顺序是怎样的，所以还是按照Devkit/watch_image.m中的取值以及我的推断，将txt中的值从左到右定义为：中心点x坐标、中心点y坐标、旋转框的角度、label、是否遮挡、是否被裁剪、4个顶点的坐标
在这里插入图片描述

1、txt->xml

VEDAI中每张图像都有四个未压缩的彩色通道，包括三个RGB彩色通道和一个近红外通道。这里只用了彩色通道的图像。

import os
from xml.dom.minidom import Document
import numpy as np
import copy
import cv2
import sys

sys.path.append('../../..')


def mkdir(path):
    if not os.path.exists(path):
        os.makedirs(path)


def save_to_xml(save_path, im_height, im_width, objects_axis):
    im_depth = 0
    object_num = len(objects_axis)
    doc = Document()

    annotation = doc.createElement('annotation')
    doc.appendChild(annotation)

    folder = doc.createElement('folder')
    folder_name = doc.createTextNode('VOC2007')
    folder.appendChild(folder_name)
    annotation.appendChild(folder)

    filename = doc.createElement('filename')
    filename_name = doc.createTextNode(save_path.split('/')[-1])
    filename.appendChild(filename_name)
    annotation.appendChild(filename)

    source = doc.createElement('source')
    annotation.appendChild(source)

    database = doc.createElement('database')
    database.appendChild(doc.createTextNode('The VOC2007 Database'))
    source.appendChild(database)

    annotation_s = doc.createElement('annotation')
    annotation_s.appendChild(doc.createTextNode('PASCAL VOC2007'))
    source.appendChild(annotation_s)

    image = doc.createElement('image')
    image.appendChild(doc.createTextNode('flickr'))
    source.appendChild(image)

    flickrid = doc.createElement('flickrid')
    flickrid.appendChild(doc.createTextNode('322409915'))
    source.appendChild(flickrid)

    owner = doc.createElement('owner')
    annotation.appendChild(owner)

    flickrid_o = doc.createElement('flickrid')
    flickrid_o.appendChild(doc.createTextNode('knautia'))
    owner.appendChild(flickrid_o)

    name_o = doc.createElement('name')
    name_o.appendChild(doc.createTextNode('dear_jing'))
    owner.appendChild(name_o)

    size = doc.createElement('size')
    annotation.appendChild(size)
    width = doc.createElement('width')
    width.appendChild(doc.createTextNode(str(im_width)))
    height = doc.createElement('height')
    height.appendChild(doc.createTextNode(str(im_height)))
    depth = doc.createElement('depth')
    depth.appendChild(doc.createTextNode(str(im_depth)))
    size.appendChild(width)
    size.appendChild(height)
    size.appendChild(depth)
    segmented = doc.createElement('segmented')
    segmented.appendChild(doc.createTextNode('0'))
    annotation.appendChild(segmented)
    for i in range(object_num):
        objects = doc.createElement('object')
        annotation.appendChild(objects)
        object_name = doc.createElement('name')
        object_name.appendChild(doc.createTextNode(str(objects_axis[i][-1])))
        objects.appendChild(object_name)
        pose = doc.createElement('pose')
        pose.appendChild(doc.createTextNode('Unspecified'))
        objects.appendChild(pose)
        truncated = doc.createElement('truncated')
        truncated.appendChild(doc.createTextNode(str(objects_axis[i][9])))
        objects.appendChild(truncated)
        difficult = doc.createElement('difficult')
        difficult.appendChild(doc.createTextNode(str(objects_axis[i][8])))
        objects.appendChild(difficult)
        bndbox = doc.createElement('bndbox')
        objects.appendChild(bndbox)

        x0 = doc.createElement('x0')
        x0.appendChild(doc.createTextNode(str((objects_axis[i][0]))))
        bndbox.appendChild(x0)
        y0 = doc.createElement('y0')
        y0.appendChild(doc.createTextNode(str((objects_axis[i][4]))))
        bndbox.appendChild(y0)

        x1 = doc.createElement('x1')
        x1.appendChild(doc.createTextNode(str((objects_axis[i][1]))))
        bndbox.appendChild(x1)
        y1 = doc.createElement('y1')
        y1.appendChild(doc.createTextNode(str((objects_axis[i][5]))))
        bndbox.appendChild(y1)

        x2 = doc.createElement('x2')
        x2.appendChild(doc.createTextNode(str((objects_axis[i][2]))))
        bndbox.appendChild(x2)
        y2 = doc.createElement('y2')
        y2.appendChild(doc.createTextNode(str((objects_axis[i][6]))))
        bndbox.appendChild(y2)

        x3 = doc.createElement('x3')
        x3.appendChild(doc.createTextNode(str((objects_axis[i][3]))))
        bndbox.appendChild(x3)
        y3 = doc.createElement('y3')
        y3.appendChild(doc.createTextNode(str((objects_axis[i][7]))))
        bndbox.appendChild(y3)

    f = open(save_path, 'w')
    f.write(doc.toprettyxml(indent=''))
    f.close()


# class_list = ['plane', 'boat', 'camping_car', 'car', 'pick-up', 'tractor', 'truck', 'van', 'vehicle']
class_list = {'plane': 31, 'boat': 23, 'camping_car': 5, 'car': 1, 'pick-up': 11, 'tractor': 4, 'truck': 2, 'van': 9,
              'vehicle': 10, 'others':0}


def format_label(txt_list):
    format_data = []

    for i in txt_list:
        if len(i.split(' ')) < 14:
            continue
        flag = False
        for k, v in class_list.items():
            if v == int(i.split(' ')[3].split('\n')[0]):
                format_data.append(
                    [float(xy) for xy in i.split(' ')[6:14]] + [int(x) for x in i.split(' ')[4:6]] + [k]
                )
                flag = True
        if not flag:
            format_data.append(
                [float(xy) for xy in i.split(' ')[6:14]] + [int(x) for x in i.split(' ')[4:6]] + ['others']
            )

    return np.array(format_data)


def clip_image(file_idx, image, boxes_all):
    if len(boxes_all) > 0:
        mkdir(os.path.join(save_dir, 'JPEGImages'))
        img = os.path.join(save_dir, 'JPEGImages',
                           "%06d.png" % (int(file_idx)))
        cv2.imwrite(img, image)

        mkdir(os.path.join(save_dir, 'Annotations'))
        xml = os.path.join(save_dir, 'Annotations',
                           "%06d.xml" % (int(file_idx)))
        save_to_xml(xml, image.shape[0], image.shape[1], boxes_all)


print('class_list', len(class_list))
raw_data = 'VEDAI'
raw_images_dir = os.path.join(raw_data, 'Vehicules512')
raw_label_dir = os.path.join(raw_data, 'Annotations512')

save_dir = 'VEDAI/VOC/'

images = [i for i in os.listdir(raw_images_dir) if '_co.png' in i]
labels = [i for i in os.listdir(raw_label_dir) if 'txt' in i]

print('find image', len(images))
print('find label', len(labels))

for idx, img in enumerate(images):
    # print(idx, 'read image', img)
    if not os.path.exists(os.path.join(raw_label_dir, img.replace('_co.png', '.txt'))):
        continue
    img_data = cv2.imread(os.path.join(raw_images_dir, img))

    txt_data = open(os.path.join(raw_label_dir, img.replace('_co.png', '.txt')), 'r').readlines()
    box = format_label(txt_data)
    clip_image(img.strip('_co.png'), img_data, box)

2、生成 txt

自己制作 trainval.txt等文件，里面放的是待训练图片名（可能会报“没有这个文件”的错误，需要手动新建……能力有限）。

from sklearn.model_selection import train_test_split
import os

name_path = r'VEDAI/VOC/JPEGImages'
name_list = os.listdir(name_path)
names = []

for i in name_list:
    names.append(i.split('.')[0])

trainval, test = train_test_split(names, test_size=0.5, shuffle=10)
val, train = train_test_split(trainval, test_size=0.5, shuffle=10)

with open('VEDAI/VOC/ImageSets/Main/trainval.txt', 'w') as fw:
    for i in trainval:
        fw.write(i + '\n')

with open('VEDAI/VOC/ImageSets/Main/test.txt', 'w') as fw:
    for i in test:
        fw.write(i + '\n')

with open('VEDAI/VOC/ImageSets/Main/val.txt', 'w') as fw:
    for i in val:
        fw.write(i + '\n')

with open('VEDAI/VOC/ImageSets/Main/train.txt', 'w') as fw:
    for i in train:
        fw.write(i + '\n')

print('done!')