概要
DOTA和HRSC2016都是遥感常用数据集,本文旨在将转换为YOLO旋转框标注格式
HRSC2016数据集解读
HRSC2016官网:https://sites.google.com/site/hrsc2016/
百度飞桨HRSC2016数据集:https://aistudio.baidu.com/datasetdetail/54106
提示:HRSC2016官网已经寄了,标注格式说明在网上也找不到,所以写篇博客记录一下.
首先,下载下来的数据集是下面的这个zip压缩文件,无脑解压即可.接下来进入HRSC2016_dataset文件夹中,长这个样子
或许有人看到这里会蒙,为什么有这么多个HRSC2016压缩文件?还标注了part01,part02?其实这是分卷压缩,我们只需要解压其中一个就可以,比如我们解压HRSC2016.part01.rar,系统会自动将02-05也全部解压.解压后长这个样子
分为FullDataSet,ImageSets,Test,Train文件夹.FullDataSet,Test,Train放的是对应的图像和标注文件,结构如下图所示,本文不涉及分割,所以只看前两个文件(Allmages,Annotations)即可.其实笔者认为,FullDataSet对于训练是没什么意义的.这个文件夹中包含所有的图像和标签,but,其中很多图像并没有标注!!或许这个数据集就是一个半成品数据集,总之,不要直接使用FullDataSet中的图像和标签,因为它会将很多有目标的图像当做背景图.所以,只看Test,Train和ImageSets文件即可.
首先看Train和Test
准确的说Train并不是train,而是train+val,
这个Train文件夹中包含了626个图像及其标注,其中有9张为背景图,这9张背景图的名称为100000624、100000628、100000633、100000639、100000671、100000695、100000702、100000752、10000090.有很多博客甚至博士论文中会认为train+val的有效标注为617张,他们忽略了这9张背景图或者认为这9张背景图为无效标注.笔者认为这是不正确的,至少训练时加入适当的背景图对模型的精度及鲁棒性是有好处的.
Test中有444张图像及对应标签,不包含这9张图像!!!
至于这里为什么使用三个!来强调,请看ImageSets解析.
ImageSets中,有如下文件
说实话,笔者第一次看到这些文件时直接傻了.怎么有这么多???我们仔细查看就知道,其中每个文件中包含的都是图像的名称,对应的数量如下:
冥冥中感觉到他和Train文件夹和Test文件夹有着某种联系~
test.txt,即test,包含9张背景图
test.txt~ ,即test,不包含9张背景图
trainval.txt,即train+val,不包含9张背景图
trainval.txt~,即train+val,包含9张背景图
val和train分别包含2张和7张背景图,至于具体是哪些,笔者没有细找.如果需要知道的话,写个python脚本可以很快找到.
呵呵,这个划分真的interesting…
终于梳理完了,接下来可以看看标注格式了(也可以参照后面的HRSC2YOLO的代码看).直接上图
HRSC数据集转YOLO旋转框格式
不多说了,直接上代码
import xml.etree.ElementTree as ET
import os
import math
def hrsc_to_yolo_rotated(xml_path,label_path):
in_file = open(xml_path)
tree=ET.parse(in_file)
root = tree.getroot()
with open(label_path,'w') as f:
for obj in root.iter('HRSC_Object'):
difficult = obj.find('difficult').text
class_id = int(obj.find('Class_ID').text) # % 100
# class_id = 0 # 标签对应关系自行修改
if int(difficult) == 1:
continue
mbox_cx, mbox_cy, mbox_w, mbox_h, mbox_ang = (
float(obj.find('mbox_cx').text),
float(obj.find('mbox_cy').text),
float(obj.find('mbox_w').text),
float(obj.find('mbox_h').text),
float(obj.find('mbox_ang').text)
)
image_width = int(root.find('Img_SizeWidth').text)
image_height = int(root.find('Img_SizeHeight').text)
norm_cx = mbox_cx / image_width
norm_cy = mbox_cy / image_height
norm_w = mbox_w / image_width
norm_h = mbox_h / image_height
rotation_angle_degrees = mbox_ang * (180.0 / math.pi)
yolo_rotated_bbox = [class_id, norm_cx, norm_cy, norm_w, norm_h, rotation_angle_degrees]
f.write(" ".join([str(a) for a in yolo_rotated_bbox]) + '\n')
xml_root = r"你的Annotations文件夹路径"
txt_root = r"你所需要生成的YOLO_labels文件夹路径"
xml_name = os.listdir(xml_root)
for i in range(len(xml_name)):
xml_path = os.path.join(xml_root,xml_name[i])
txt_path = os.path.join(txt_root,xml_name[i].split('.')[0]+'.txt')
hrsc_to_yolo_rotated(xml_path,txt_path)
如何知道转换的对不对呢?根据画出来可以直观地看到,绘图代码在后面的drawed图像绘制部分
DOTA数据集转YOLO旋转框格式
参考:https://zhuanlan.zhihu.com/p/356416158
其对应代码:https://github.com/hukaixuan19970627/DOTA_devkit_YOLO
这里只展示本人略改过的YOLO_Transform.py(我记得只是加了点注释,修改了下路径)
# -*- coding: utf-8 -*-
import dota_utils as util
import os
import numpy as np
from PIL import Image
import cv2
import random
import shutil
import matplotlib.pyplot as plt
from shapely.geometry import Polygon, MultiPoint # 多边形
import time
import argparse
## trans dota format to format YOLO(darknet) required
def dota2Darknet(imgpath, txtpath, dstpath, extractclassname):
"""
:param imgpath: the path of images
:param txtpath: the path of txt in dota format
:param dstpath: the path of txt in YOLO format
:param extractclassname: the category you selected
:return:
txt format: id x y w h
"""
if os.path.exists(dstpath):
shutil.rmtree(dstpath) # delete output folder
os.makedirs(dstpath) # make new output folder
filelist = util.GetFileFromThisRootDir(txtpath) # fileist=['/.../P0005.txt', ..., /.../P000?.txt]
for fullname in filelist: # fullname='/.../P000?.txt'
objects = util.parse_dota_poly(fullname)
'''
objects =
[{'name': 'ship',
'difficult': '1',
'poly': [(1054.0, 1028.0), (1063.0, 1011.0), (1111.0, 1040.0), (1112.0, 1062.0)],
'area': 1159.5
},
...
]
'''
name = os.path.splitext(os.path.basename(fullname))[0] # name='P000?'
img_fullname = os.path.join(imgpath, name + '.png') # img_fullname='/.../P000?.png'
img = Image.open(img_fullname)
img_w, img_h = img.size
# print img_w,img_h
with open(os.path.join(dstpath, name + '.txt'), 'w') as f_out:
for obj in objects:
poly = obj['poly'] # poly=[(x1,y1),(x2,y2),(x3,y3),(x4,y4)]
bbox = np.array(util.dots4ToRecC(poly, img_w, img_h)) # bbox=[x y w h]
if (sum(bbox <= 0) + sum(bbox >= 1)) >= 1: # 若bbox中有<=0或>= 1的元素则将该box排除
continue
if (obj['name'] in extractclassname):
id = extractclassname.index(obj['name']) # id=类名的索引 比如'plane'对应id=0
else:
continue
outline = str(id) + ' ' + ' '.join(list(map(str, bbox))) # outline='id x y w h'
f_out.write(outline + '\n') # 写入txt文件中并加上换行符号 \n
## trans dota format to (cls, c_x, c_y, Longest side, short side, angle:[0,179))
def dota2LongSideFormat(imgpath, txtpath, dstpath, extractclassname):
"""
trans dota farmat to longside format
:param imgpath: the path of images
:param txtpath: the path of txt in dota format
:param dstpath: the path of txt in YOLO format
:param extractclassname: the category you selected
"""
if os.path.exists(dstpath):
shutil.rmtree(dstpath) # delete output folder
os.makedirs(dstpath) # make new output folder
filelist = util.GetFileFromThisRootDir(txtpath) # fileist=['/.../P0005.txt', ..., /.../P000?.txt]
for fullname in filelist: # fullname='/.../P000?.txt'
objects = util.parse_dota_poly(fullname)
'''
objects =
[{'name': 'ship',
'difficult': '1',
'poly': [(1054.0, 1028.0), (1063.0, 1011.0), (1111.0, 1040.0), (1112.0, 1062.0)],
'area': 1159.5
},
...
]
'''
name = os.path.splitext(os.path.basename(fullname))[0] # name='P000?'
img_fullname = os.path.join(imgpath, name + '.png') # img_fullname='/.../P000?.png'
img = Image.open(img_fullname)
img_w, img_h = img.size
# print img_w,img_h
with open(os.path.join(dstpath, name + '.txt'), 'w') as f_out:
num_gt = 0
for i, obj in enumerate(objects):
num_gt = num_gt + 1 # 为当前有效gt计数
poly = obj['poly'] # poly=[(x1,y1),(x2,y2),(x3,y3),(x4,y4)]
poly = np.float32(np.array(poly))
# 四点坐标归一化
poly[:, 0] = poly[:, 0]/img_w
poly[:, 1] = poly[:, 1]/img_h
rect = cv2.minAreaRect(poly) # 得到最小外接矩形的(中心(x,y), (宽,高), 旋转角度)
# box = np.float32(cv2.boxPoints(rect)) # 返回rect四个点的值
c_x = rect[0][0]
c_y = rect[0][1]
w = rect[1][0]
h = rect[1][1]
theta = rect[-1] # Range for angle is [-90,0)
trans_data = cvminAreaRect2longsideformat(c_x, c_y, w, h, theta)
if not trans_data:
if theta != 90: # Θ=90说明wh中有为0的元素,即gt信息不完整,无需提示异常,直接删除
print('opencv表示法转长边表示法出现异常,已将第%d个box排除,问题出现在该图片中:%s' % (i, img_fullname))
num_gt = num_gt - 1
continue
else:
# range:[-180,0)
c_x, c_y, longside, shortside, theta_longside = trans_data
bbox = np.array((c_x, c_y, longside, shortside))
if (sum(bbox <= 0) + sum(bbox[:2] >= 1) ) >= 1: # 0<xy<1, 0<side<=1
print('bbox[:2]中有>= 1的元素,bbox中有<= 0的元素,已将第%d个box排除,问题出现在该图片中:%s' % (i, img_fullname))
print('出问题的longside形式数据:[%.16f, %.16f, %.16f, %.16f, %.1f]' % (c_x, c_y, longside, shortside, theta_longside))
num_gt = num_gt - 1
continue
if (obj['name'] in extractclassname):
id = extractclassname.index(obj['name']) # id=类名的索引 比如'plane'对应id=0
else:
print('预定类别中没有类别:%s;已将该box排除,问题出现在该图片中:%s' % (obj['name'], fullname))
num_gt = num_gt - 1
continue
theta_label = int(theta_longside + 180.5) # range int[0,180] 四舍五入
if theta_label == 180: # range int[0,179]
theta_label = 179
# outline='id x y longside shortside Θ'
# final check
if id > 15 or id < 0:
print('id problems,问题出现在该图片中:%s' % (i, img_fullname))
print('出问题的longside形式数据:[%.16f, %.16f, %.16f, %.16f, %.1f]' % (
c_x, c_y, longside, shortside, theta_longside))
if theta_label < 0 or theta_label > 179:
print('id problems,问题出现在该图片中:%s' % (i, img_fullname))
print('出问题的longside形式数据:[%.16f, %.16f, %.16f, %.16f, %.1f]' % (
c_x, c_y, longside, shortside, theta_longside))
outline = str(id) + ' ' + ' '.join(list(map(str, bbox))) + ' ' + str(theta_label)
f_out.write(outline + '\n') # 写入txt文件中并加上换行符号 \n
if num_gt == 0:
# os.remove(os.path.join(dstpath, name + '.txt')) #
# os.remove(img_fullname)
# os.remove(fullname)
print('%s 图片对应的txt不存在有效目标,已删除对应图片与txt' % img_fullname)
print('已完成文件夹内DOTA数据形式到长边表示法的转换')
def cvminAreaRect2longsideformat(x_c, y_c, width, height, theta):
'''
trans minAreaRect(x_c, y_c, width, height, θ) to longside format(x_c, y_c, longside, shortside, θ)
两者区别为:
当opencv表示法中width为最长边时(包括正方形的情况),则两种表示方法一致
当opencv表示法中width不为最长边 ,则最长边表示法的角度要在opencv的Θ基础上-90度
@param x_c: center_x
@param y_c: center_y
@param width: x轴逆时针旋转碰到的第一条边
@param height: 与width不同的边
@param theta: x轴逆时针旋转与width的夹角,由于原点位于图像的左上角,逆时针旋转角度为负 [-90, 0)
@return:
x_c: center_x
y_c: center_y
longside: 最长边
shortside: 最短边
theta_longside: 最长边和x轴逆时针旋转的夹角,逆时针方向角度为负 [-180, 0)
'''
'''
意外情况:(此时要将它们恢复符合规则的opencv形式:wh交换,Θ置为-90)
竖直box:box_width < box_height θ=0
水平box:box_width > box_height θ=0
'''
if theta == 0:
theta = -90
buffer_width = width
width = height
height = buffer_width
if theta > 0:
if theta != 90: # Θ=90说明wh中有为0的元素,即gt信息不完整,无需提示异常,直接删除
print('θ计算出现异常,当前数据为:%.16f, %.16f, %.16f, %.16f, %.1f;超出opencv表示法的范围:[-90,0)' % (x_c, y_c, width, height, theta))
return False
if theta < -90:
print('θ计算出现异常,当前数据为:%.16f, %.16f, %.16f, %.16f, %.1f;超出opencv表示法的范围:[-90,0)' % (x_c, y_c, width, height, theta))
return False
if width != max(width, height): # 若width不是最长边
longside = height
shortside = width
theta_longside = theta - 90
else: # 若width是最长边(包括正方形的情况)
longside = width
shortside = height
theta_longside = theta
if longside < shortside:
print('旋转框转换表示形式后出现问题:最长边小于短边;[%.16f, %.16f, %.16f, %.16f, %.1f]' % (x_c, y_c, longside, shortside, theta_longside))
return False
if (theta_longside < -180 or theta_longside >= 0):
print('旋转框转换表示形式时出现问题:θ超出长边表示法的范围:[-180,0);[%.16f, %.16f, %.16f, %.16f, %.1f]' % (x_c, y_c, longside, shortside, theta_longside))
return False
return x_c, y_c, longside, shortside, theta_longside
def drawLongsideFormatimg(imgpath, txtpath, dstpath, extractclassname, thickness=2):
"""
根据labels绘制边框(label_format:classid, x_c_normalized, y_c_normalized, longside_normalized, shortside_normalized, Θ)
:param imgpath: the path of images
:param txtpath: the path of txt in longside format
:param dstpath: the path of image_drawed
:param extractclassname: the category you selected
"""
if os.path.exists(dstpath):
shutil.rmtree(dstpath) # delete output folder
os.makedirs(dstpath) # make new output folder
# 设置画框的颜色 colors = [[178, 63, 143], [25, 184, 176], [238, 152, 129],....,[235, 137, 120]]随机设置RGB颜色
colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(extractclassname))]
filelist = util.GetFileFromThisRootDir(txtpath) # fileist=['/.../P0005.txt', ..., /.../P000?.txt]
for fullname in filelist: # fullname='/.../P000?.txt'
objects = util.parse_longsideformat(fullname)
'''
objects[i] = [classid, x_c_normalized, y_c_normalized, longside_normalized, shortside_normalized, theta]
'''
name = os.path.splitext(os.path.basename(fullname))[0] # name='P000?'
img_fullname = os.path.join(imgpath, name + '.png') # img_fullname='/.../P000?.png'
img_savename = os.path.join(dstpath, name + '_.png') # img_fullname='/.../_P000?.png'
img = Image.open(img_fullname) # 图像被打开但未被读取
img_w, img_h = img.size
img = cv2.imread(img_fullname) # 读取图像像素
for i, obj in enumerate(objects):
# obj = [classid, x_c_normalized, y_c_normalized, longside_normalized, shortside_normalized, float:0-179]
class_index = obj[0]
# rect=[(x_c,y_c),(w,h),Θ] Θ:flaot[0-179] -> (-180,0)
rect = longsideformat2cvminAreaRect(obj[1], obj[2], obj[3], obj[4], (obj[5]-179.9))
# poly = [(x1,y1),(x2,y2),(x3,y3),(x4,y4)]
poly = np.float32(cv2.boxPoints(rect)) # 返回rect对应的四个点的值 normalized
# 四点坐标反归一化 取整
poly[:, 0] = poly[:, 0] * img_w
poly[:, 1] = poly[:, 1] * img_h
poly = np.int0(poly)
# 画出来
cv2.drawContours(image=img,
contours=[poly],
contourIdx=-1,
color=colors[int(class_index)],
thickness=thickness)
cv2.imwrite(img_savename, img)
# time.sleep()
def longsideformat2cvminAreaRect(x_c, y_c, longside, shortside, theta_longside):
'''
trans longside format(x_c, y_c, longside, shortside, θ) to minAreaRect(x_c, y_c, width, height, θ)
两者区别为:
当opencv表示法中width为最长边时(包括正方形的情况),则两种表示方法一致
当opencv表示法中width不为最长边 ,则最长边表示法的角度要在opencv的Θ基础上-90度
@param x_c: center_x
@param y_c: center_y
@param longside: 最长边
@param shortside: 最短边
@param theta_longside: 最长边和x轴逆时针旋转的夹角,逆时针方向角度为负 [-180, 0)
@return: ((x_c, y_c),(width, height),Θ)
x_c: center_x
y_c: center_y
width: x轴逆时针旋转碰到的第一条边最长边
height: 与width不同的边
theta: x轴逆时针旋转与width的夹角,由于原点位于图像的左上角,逆时针旋转角度为负 [-90, 0)
'''
if (theta_longside >= -180 and theta_longside < -90): # width is not the longest side
width = shortside
height = longside
theta = theta_longside + 90
else:
width = longside
height =shortside
theta = theta_longside
if theta < -90 or theta >= 0:
print('当前θ=%.1f,超出opencv的θ定义范围[-90, 0)' % theta)
return ((x_c, y_c), (width, height), theta)
def delete(imgpath, txtpath):
filelist = util.GetFileFromThisRootDir(txtpath) # fileist=['/.../P0005.txt', ..., /.../P000?.txt]
for fullname in filelist: # fullname='/.../P000?.txt'
name = os.path.splitext(os.path.basename(fullname))[0] # name='P000?'
img_fullname = os.path.join(imgpath, name + '.png') # img_fullname='/.../P000?.png'
if not os.path.exists(img_fullname): # 如果文件bu存在
os.remove(fullname)
if __name__ == '__main__':
## an example
dota2LongSideFormat(r'images',
r'labelTxt',
r'yolo_labels',
util.wordname_15)
# drawLongsideFormatimg(imgpath=r'images',
# txtpath=r'yolo_labels',
# dstpath=r'images_drawed',
# extractclassname=util.wordname_15)
drawed图像绘制
这里用到了DOTA数据集转YOLO旋转框格式中的util,请先到https://github.com/hukaixuan19970627/DOTA_devkit_YOLO下载
下好之后,将其放到绘制脚本相同的目录下,以便调用到其dota_utils等包
如下为绘制代码:
import dota_utils as util
import os
import numpy as np
from PIL import Image
import cv2
import random
import shutil
import matplotlib.pyplot as plt
from shapely.geometry import Polygon, MultiPoint # 多边形
import time
import argparse
import xml.etree.ElementTree as ET
import pickle
from os import listdir, getcwd
from os.path import join
def longsideformat2cvminAreaRect(x_c, y_c, longside, shortside, theta_longside):
'''
trans longside format(x_c, y_c, longside, shortside, θ) to minAreaRect(x_c, y_c, width, height, θ)
两者区别为:
当opencv表示法中width为最长边时(包括正方形的情况),则两种表示方法一致
当opencv表示法中width不为最长边 ,则最长边表示法的角度要在opencv的Θ基础上-90度
@param x_c: center_x
@param y_c: center_y
@param longside: 最长边
@param shortside: 最短边
@param theta_longside: 最长边和x轴逆时针旋转的夹角,逆时针方向角度为负 [-180, 0)
@return: ((x_c, y_c),(width, height),Θ)
x_c: center_x
y_c: center_y
width: x轴逆时针旋转碰到的第一条边最长边
height: 与width不同的边
theta: x轴逆时针旋转与width的夹角,由于原点位于图像的左上角,逆时针旋转角度为负 [-90, 0)
'''
if (theta_longside >= -180 and theta_longside < -90): # width is not the longest side
width = shortside
height = longside
theta = theta_longside + 90
else:
width = longside
height =shortside
theta = theta_longside
if theta < -90 or theta >= 0:
print('当前θ=%.1f,超出opencv的θ定义范围[-90, 0)' % theta)
return ((x_c, y_c), (width, height), theta)
def delete(imgpath, txtpath):
filelist = util.GetFileFromThisRootDir(txtpath) # fileist=['/.../P0005.txt', ..., /.../P000?.txt]
for fullname in filelist: # fullname='/.../P000?.txt'
name = os.path.splitext(os.path.basename(fullname))[0] # name='P000?'
img_fullname = os.path.join(imgpath, name + '.png') # img_fullname='/.../P000?.png'
if not os.path.exists(img_fullname): # 如果文件bu存在
os.remove(fullname)
def drawLongsideFormatimg(imgpath, txtpath, extractclassname, thickness=2):
"""
根据labels绘制边框(label_format:classid, x_c_normalized, y_c_normalized, longside_normalized, shortside_normalized, Θ)
:param imgpath: the path of images
:param txtpath: the path of txt in longside format
:param dstpath: the path of image_drawed
:param extractclassname: the category you selected
"""
# 设置画框的颜色 colors = [[178, 63, 143], [25, 184, 176], [238, 152, 129],....,[235, 137, 120]]随机设置RGB颜色
colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(extractclassname))]
filelist = txtpath # fileist=['/.../P0005.txt', ..., /.../P000?.txt]
for fullname in filelist: # fullname='/.../P000?.txt'
objects = util.parse_longsideformat(fullname)
'''
objects[i] = [classid, x_c_normalized, y_c_normalized, longside_normalized, shortside_normalized, theta]
'''
name = os.path.splitext(os.path.basename(fullname))[0] # name='P000?'
img_fullname = os.path.join(imgpath) # img_fullname='/.../P000?.png'
img = Image.open(img_fullname) # 图像被打开但未被读取
img_w, img_h = img.size
img = cv2.imread(img_fullname) # 读取图像像素
for i, obj in enumerate(objects):
# obj = [classid, x_c_normalized, y_c_normalized, longside_normalized, shortside_normalized, float:0-179]
class_index = obj[0]
# rect=[(x_c,y_c),(w,h),Θ] Θ:flaot[0-179] -> (-180,0)
rect = longsideformat2cvminAreaRect(obj[1], obj[2], obj[3], obj[4], (obj[5]-179.9))
# poly = [(x1,y1),(x2,y2),(x3,y3),(x4,y4)]
poly = np.float32(cv2.boxPoints(rect)) # 返回rect对应的四个点的值 normalized
# 四点坐标反归一化 取整
poly[:, 0] = poly[:, 0] * img_w
poly[:, 1] = poly[:, 1] * img_h
poly = np.int0(poly)
# 画出来
cv2.drawContours(image=img,
contours=[poly],
contourIdx=-1,
color=colors[int(class_index)],
thickness=thickness)
# print("**********")
cv2.imwrite(os.path.join(r'images_drawed',os.path.basename(imgpath)), img)#这里改为你保存绘制图形的文件夹路径
# cv2.imshow(os.path.basename(imgpath),img) # 或者你可以不保存,只是将他们显示出来
# cv2.waitKey(0)
img_root = r"Train\AllImages"
label_root = r"Train\YOLO_labels"
image_name = os.listdir(img_root)
extractclassname=[str(i) for i in range(100)] #笔者还没有手动录入标签对应关系,所以写了个简单的列表来代替
for i in range(len(image_name)):
img_path = os.path.join(img_root,image_name[i])
label_path = os.path.join(label_root,image_name[i].split('.')[0]+'.txt')
drawLongsideFormatimg(img_path,[label_path],extractclassname)
最后展示一下效果
DOTA
HRSC2016
如有不对,欢迎批评指正!