目录
1.数据集下载:
DroneVenicle数据集是由天津大学收集、标注的大型无人机航拍车辆数据集。
DroneVenicle训练集下载地址:https://pan.baidu.com/s/1ptZCJ1mKYqFnMnsgqEyoGg(密码:ngar)
DroneVenicle验证集下载地址:https://pan.baidu.com/s/1e6e9mESZecpME4IEdU8t3Q(密码:jnj6)
DroneVenicle测试集下载地址:https://pan.baidu.com/s/1JlXO4jEUQgkR1Vco1hfKhg(密码:tqwc)
2.数据集介绍:
DroneVehicle数据集由无人机采集的56,878张图像组成,其中一半是RGB图像,其余为红外图像。
作者为这五个类别制作了带有定向边界框的丰富注释。其中,汽车(car)有389,779个RGB图像注释,428,086个红外图像注释,卡车(truck)有22,123个RGB图像注释,25,960个红外图像注释,公共汽车(bus)有15,333个RGB图像注释,红外图像有16,590个注释,面包车(van)有11,935个RGB图像注释,红外图像有12,708个注释,货车(freight car)有13,400个RGB图像注释,和17,173个红外图像注释。
在DroneVehicle中,为了在图像边界处标注对象,作者在每张图像的顶部、底部、左侧和右侧设置了一个宽度为100像素的白色边框,因此下载的图像比例为840 x 712,去除周围的白色边框并将图像比例更改为640 x 512。
3.数据集标签转换
1.DroneVehicle转DOTA
1、 Dronevehicle 数据集中有旋转框和水平框,转换时要进行分类获取坐标
2、其中有个类别为feright car,我把官方的feright car全部改写为feright_car,所以要记得更改数据集配置文件里类别名字。
import os
import xml.etree.ElementTree as ET
import math
import cv2 as cv
import argparse
def voc_to_dota(xml_dir, xml_name, img_dir, savedImg_dir, outputTxt_dir):
txt_name = xml_name[:-4] + '.txt' # txt文件名字:去掉xml 加上.txt
txt_path = outputTxt_dir # txt文件目录:在xml目录下创建的txtl_label文件夹
if not os.path.exists(txt_path):
os.makedirs(txt_path)
txt_file = os.path.join(txt_path, txt_name) # txt完整的含名文件路径
img_name = xml_name[:-4] + '.jpg' # 图像名字
img_path = os.path.join(img_dir, img_name) # 图像完整路径
img = cv.imread(img_path) # 读取图像
xml_file = os.path.join(xml_dir, xml_name)
tree = ET.parse(os.path.join(xml_file)) # 解析xml文件 然后转换为DOTA格式文件
root = tree.getroot()
with open(txt_file, "w+", encoding='UTF-8') as out_file:
# out_file.write('imagesource:null' + '\n' + 'gsd:null' + '\n')
for obj in root.findall('object'):
name = obj.find('name').text
if name == 'feright car':
name = 'feright_car'
else:
name = name
obj_difficult = obj.find('difficult')
if obj_difficult:
difficult = obj_difficult.text
else:
difficult = '0'
# print(name, difficult)
if obj.find('bndbox'):
obj_bnd = obj.find('bndbox')
obj_xmin = obj_bnd.find('xmin').text
obj_ymin = obj_bnd.find('ymin').text
obj_xmax = obj_bnd.find('xmax').text
obj_ymax = obj_bnd.find('ymax').text
# w = obj_xmax-obj_xmin
# h = obj_ymax-obj_ymin
x1 = obj_xmin
y1 = obj_ymin
x2 = obj_xmax
y2 = obj_ymin
x3 = obj_xmax
y3 = obj_ymax
x4 = obj_xmin
y4 = obj_ymax
elif obj.find('polygon'):
obj_bnd = obj.find('polygon')
x1 = obj_bnd.find('x1').text
x2 = obj_bnd.find('x2').text
x3 = obj_bnd.find('x3').text
x4 = obj_bnd.find('x4').text
y1 = obj_bnd.find('y1').text
y2 = obj_bnd.find('y2').text
y3 = obj_bnd.find('y3').text
y4 = obj_bnd.find('y4').text
# robndbox = obj.find('robndbox')
# cx = float(robndbox.find('cx').text)
# cy = float(robndbox.find('cy').text)
# w = float(robndbox.find('w').text)
# h = float(robndbox.find('h').text)
# angle = float(robndbox.find('angle').text)
# print(cx, cy, w, h, angle)
# 找最左上角的点
# 在原图上画矩形 看是否转换正确
# cv.line(img, (int(list_xy[0]), int(list_xy[1])), (int(list_xy[2]), int(list_xy[3])), color=(255, 0, 0),
# thickness=3)
# cv.line(img, (int(list_xy[2]), int(list_xy[3])), (int(list_xy[4]), int(list_xy[5])), color=(0, 255, 0),
# thickness=3)
# cv.line(img, (int(list_xy[4]), int(list_xy[5])), (int(list_xy[6]), int(list_xy[7])), color=(0, 0, 255),
# thickness=2)
# cv.line(img, (int(list_xy[6]), int(list_xy[7])), (int(list_xy[0]), int(list_xy[1])), color=(255, 255, 0),
# thickness=2)
data = str(x1) + " " + str(y1) + " " + str(x2) + " " + str(y2) + " " + \
str(x3) + " " + str(y3) + " " + str(x4) + " " + str(y4) + " "
data = data + name + " " + difficult + "\n"
out_file.write(data)
if not os.path.exists(savedImg_dir):
os.makedirs(savedImg_dir)
out_img = os.path.join(savedImg_dir, xml_name[:-4] + '.jpg')
cv.imwrite(out_img, img)
def find_topLeftPopint(dict):
dict_keys = sorted(dict.keys()) # y值
temp = [dict[dict_keys[0]], dict[dict_keys[1]]]
minx = min(temp)
if minx == temp[0]:
miny = dict_keys[0]
else:
miny = dict_keys[1]
return [minx, miny]
# # 转换成四点坐标
# def rotatePoint(xc, yc, xp, yp, theta):
# xoff = xp - xc
# yoff = yp - yc
# cosTheta = math.cos(theta)
# sinTheta = math.sin(theta)
# pResx = cosTheta * xoff + sinTheta * yoff
# pResy = - sinTheta * xoff + cosTheta * yoff
# # pRes = (xc + pResx, yc + pResy)
# # 保留一位小数点
# return float(format(xc + pResx, '.1f')), float(format(yc + pResy, '.1f'))
# # return xc + pResx, yc + pResy
def parse_args():
parser = argparse.ArgumentParser(description='数据格式转换')
parser.add_argument('--xml_dir', default='./datasets/DroneVehicle/train-original/train/trainlabel', help='original xml file dictionary')
parser.add_argument('--img_dir', default='./datasets/DroneVehicle/train-original/train/trainimg', help='original image dictionary')
parser.add_argument('--outputImg_dir', default='./datasets/DroneVehicle/images/train', help='saved image dictionary after dealing ')
parser.add_argument('--outputTxt_dir', default='./datasets/DroneVehicle/labels/train', help='saved txt dictionary after dealing ')
args = parser.parse_args()
return args
if __name__ == '__main__':
args = parse_args()
xml_path = args.xml_dir
xmlFile_list = os.listdir(xml_path)
# print(xmlFile_list)
for i in range(0, len(xmlFile_list)):
if ('.xml' in xmlFile_list[i]) or ('.XML' in xmlFile_list[i]):
voc_to_dota(xml_path, xmlFile_list[i], args.img_dir, args.outputImg_dir, args.outputTxt_dir)
print('----------------------------------------{}{}----------------------------------------'
.format(xmlFile_list[i], ' has Done!'))
else:
print(xmlFile_list[i] + ' is not xml file')
2.DOTA转YOLO_OBB
这里使用yolo官方的ultralytics-main/ultralytics/data/converter.py里的
def convert_dota_to_yolo_obb(dota_root_path: str):
需要更改两个地方:1.更改里面的类别名字
2.因为DroneVehicle图片格式是jpg格式,需要更改这里
3.注意此函数要求的文件夹结构
5.数据集标签可视化
import cv2
import numpy as np
img = cv2.imread('./datasets/DroneVehicle/images/train/00002.jpg')
with open('./datasets/DroneVehicle/labels/train/00002.txt', 'r') as f:
for line in f.readlines():
# temp = f.read()
temp = line.split()
x1_n, y1_n, x2_n, y2_n, x3_n, y3_n, x4_n, y4_n = eval(temp[1]), eval(temp[2]), eval(temp[3]), eval(temp[4]), eval(temp[5]), eval(temp[6]), eval(temp[7]), eval(temp[8])
x1 = x1_n * img.shape[1]
y1 = y1_n * img.shape[0]
x2 = x2_n * img.shape[1]
y2 = y2_n * img.shape[0]
x3 = x3_n * img.shape[1]
y3 = y3_n * img.shape[0]
x4 = x4_n * img.shape[1]
y4 = y4_n * img.shape[0]
points = np.array([(x1, y1), (x2, y2), (x3, y3), (x4, y4)], np.int32)
points = points.reshape((-1, 1, 2)) # points.reshape((-1, 1, 2)) 将点的数组形状调整为适合 cv2.polylines() 函数的形式。
'''
image:要绘制矩形的图像。
[points]:一个包含所有顶点坐标的列表。
isClosed=True:指定多边形是否封闭,这里为 True 表示绘制封闭多边形,即矩形。
color=(255, 0, 0):绘制的颜色,这里是蓝色 (BGR 格式)。
thickness=2:线条的粗细,单位为像素。
'''
# 画矩形
cv2.polylines(img, [points], isClosed=True, color=(255, 0, 0), thickness=2)
cv2.imshow('windows', img)
cv2.waitKey(0)
可视化如下图: