第一步:清洗数据
import os
import shutil
import xml.etree.ElementTree as ET
import numpy as np
import cv2
# 输入文件夹路径和输出文件夹路径
input_folder = '../dior-r/Annotations-yuan/Oriented_Bounding_Boxes/'
labels_folder = '../dior-r/labels/'
output_folder = '../dior-r/Annotations/'
kk = 0
jj = 0
VOCdevkit_sets = ['train', 'val']
VOCdevkit_path = '../dior-r'
for image_set in VOCdevkit_sets:
f_target = open(os.path.join(VOCdevkit_path, 'ImageSets/Main/%s.txt' % (image_set)), 'w', encoding='utf-8') # 清洗后的ImageSets
image_ids = open(os.path.join(VOCdevkit_path, 'ImageSets-yuan/Main/%s.txt' % (image_set)),
encoding='utf-8').read().strip().split() # 原始ImageSets ,需要修改名字
# 遍历输入文件夹中的所有XML文件
for image_id in image_ids:
filename = image_id +'.xml'
kk = kk + 1
xml_path = os.path.join(input_folder, filename)
print(f'convert {filename}')
# 解析XML文件
tree = ET.parse(xml_path)
root = tree.getroot()
# 创建用于存储文本数据的列表
text_data = []
# 遍历XML元素并提取所需的信息
for obj in root.findall('object'):
robndbox = obj.find('robndbox')
if robndbox is not None:
x_left_top = robndbox.find('x_left_top').text
y_left_top = robndbox.find('y_left_top').text
x_right_top = robndbox.find('x_right_top').text
y_right_top = robndbox.find('y_right_top').text
x_right_bottom = robndbox.find('x_right_bottom').text
y_right_bottom = robndbox.find('y_right_bottom').text
x_left_bottom = robndbox.find('x_left_bottom').text
y_left_bottom = robndbox.find('y_left_bottom').text
name = obj.find('name').text
difficult = obj.find('difficult').text
f1 = float(x_left_top)
f2 = float(y_left_top)
f3 = float(x_right_top)
f4 = float(y_right_top)
f5 = float(x_right_bottom)
f6 = float(y_right_bottom)
f7 = float(x_left_bottom)
f8 = float(y_left_bottom)
poly = np.array([[f1,f2], [f3,f4], [f5,f6], [f7,f8]], np.int32) #
(x, y), (w, h), angle = cv2.minAreaRect(poly)
if w <= 0.001 or h <= 0.001 or f1 < 0 or f2 < 0 or f3 < 0 or f4 < 0 or f5 < 0 or f6 < 0 or f7 < 0 or f8 < 0:
print('++++++++++',filename,w,h,poly)
else:
# 将提取的信息格式化并添加到文本数据列表中
text_line = f"{x_left_top} {y_left_top} {x_right_top} {y_right_top} {x_right_bottom} {y_right_bottom} {x_left_bottom} {y_left_bottom} {name} {difficult}\n"
text_data.append(text_line)
if len(text_data) ==0:
print('-----------',filename)
else:
# 创建输出文件路径
output_path = os.path.join(labels_folder, filename.replace('.xml', '.txt'))
# 将文本数据写入输出文件
with open(output_path, 'w') as output_file:
output_file.writelines(text_data)
jj = jj + 1
f_target.write(image_id + '\n')
shutil.copy(xml_path, output_folder + filename)
print("转换完成!",'原始labels总数:',kk,'清洗后的labels总数:',jj)
第二步:移动数据
# -*- coding: utf-8 -*-
import shutil
import os
img_label_path = '../dior-r/'
# 更换local_file_name_list,newpath中的路径
def objFileName(oldpath,sets,newpath,name_strip,k):
local_file_name_list = img_label_path + "ImageSets/Main/{}.txt".format(sets)
obj_name_list = []
with open (local_file_name_list,'r',encoding='utf-8') as i:
p = 0
for line in i:
line = line.strip('\n') # 删除换行符
dir1, file = oldpath + '/' + line + name_strip ,newpath
print(dir1, file)
assert os.path.exists(dir1)
shutil.copy(dir1, file)
p+=1
print(sets+'的'+ k + '数量为:/{}'.format(p))
return obj_name_list
def copy_img():
# 指定存放图片的目录
for sets in ["train", "val"]:
for i,j,k in [("JPEGImages-trainval",".jpg", "images"),("Annotations",".xml", "Annotations"),("labels",".txt", "labelTxt")]: # 前者放到后者里面
# 指定要复制的图片路径
oldpath = img_label_path + i
newpath = img_label_path + sets
print(oldpath ,newpath)
if not os.path.exists(newpath):
os.mkdir(newpath)
if not os.path.exists(newpath + '/' + k):
os.mkdir(newpath + '/' + k)
newpath = newpath + '/' + k
objFileName(oldpath ,sets,newpath,j,k)
print("finally!!")
if __name__ == '__main__':
copy_img()
第三步:转换数据格式【YOLOv8 专属】
import json
from collections import defaultdict
from pathlib import Path
import cv2
import numpy as np
from ultralytics.utils import LOGGER, TQDM
from ultralytics.utils.files import increment_path
def convert_aod_to_yolo_obb(dota_root_path: str):
"""
Converts DOTA dataset annotations to YOLO OBB (Oriented Bounding Box) format.
The function processes images in the 'train' and 'val' folders of the DOTA dataset. For each image, it reads the
associated label from the original labels directory and writes new labels in YOLO OBB format to a new directory.
Args:
dota_root_path (str): The root directory path of the DOTA dataset.
Example:
```python
from ultralytics.data.converter import convert_dota_to_yolo_obb
convert_dota_to_yolo_obb('path/to/DOTA')
```
Notes:
The directory structure assumed for the DOTA dataset:
- AOD
├─ images
│ ├─ train2017
│ └─ val2017
└─ labels-yuan
├─ train2017
└─ val2017
After execution, the function will organize the labels into:
- AOD
└─ labels
├─ train2017
└─ val2017
"""
dota_root_path = Path(dota_root_path)
# Class names to indices mapping
class_mapping = {
"car": 0,
"airplane": 1,
}
def convert_label(image_name, image_width, image_height, orig_label_dir, save_dir):
"""Converts a single image's DOTA annotation to YOLO OBB format and saves it to a specified directory."""
orig_label_path = orig_label_dir / f"{image_name}.txt"
save_path = save_dir / f"{image_name}.txt"
with orig_label_path.open("r") as f, save_path.open("w") as g:
lines = f.readlines()
for line in lines:
parts = line.strip().split()
if len(parts) < 9:
continue
class_name = parts[8]
class_idx = class_mapping[class_name]
coords = [float(p) for p in parts[:8]]
normalized_coords = [
coords[i] / image_width if i % 2 == 0 else coords[i] / image_height for i in range(8)
]
formatted_coords = ["{:.6g}".format(coord) for coord in normalized_coords]
g.write(f"{class_idx} {' '.join(formatted_coords)}\n")
for phase in ["train2017", "val2017"]:
image_dir = dota_root_path / "images" / phase
orig_label_dir = dota_root_path / "labels-yuan" / f"{phase}"
save_dir = dota_root_path / "labels" / phase
save_dir.mkdir(parents=True, exist_ok=True)
image_paths = list(image_dir.iterdir())
for image_path in TQDM(image_paths, desc=f"Processing {phase} images"):
if image_path.suffix != ".png":
continue
image_name_without_ext = image_path.stem
img = cv2.imread(str(image_path))
h, w = img.shape[:2]
convert_label(image_name_without_ext, w, h, orig_label_dir, save_dir)
def convert_dior_to_yolo_obb(dota_root_path: str): # 洗 ,转格式, 移动
"""
Converts DOTA dataset annotations to YOLO OBB (Oriented Bounding Box) format.
The function processes images in the 'train' and 'val' folders of the DOTA dataset. For each image, it reads the
associated label from the original labels directory and writes new labels in YOLO OBB format to a new directory.
Args:
dota_root_path (str): The root directory path of the DOTA dataset.
Example:
```python
from ultralytics.data.converter import convert_dota_to_yolo_obb
convert_dota_to_yolo_obb('path/to/DOTA')
```
Notes:
The directory structure assumed for the DOTA dataset:
"""
dota_root_path = Path(dota_root_path)
# Class names to indices mapping
class_mapping = {"airplane":0,
"airport":1,
"baseballfield":2,
"basketballcourt":3,
"bridge":4,
"chimney":5,
"Expressway-Service-area":6,
"Expressway-toll-station":7,
"dam":8,
"golffield":9,
"groundtrackfield":10,
"harbor":11,
"overpass":12,
"ship":13,
"stadium":14,
"storagetank":15,
"tenniscourt":16,
"trainstation":17,
"vehicle":18,
"windmill":19,
}
def convert_label(image_name, image_width, image_height, orig_label_dir, save_dir):
"""Converts a single image's DOTA annotation to YOLO OBB format and saves it to a specified directory."""
orig_label_path = orig_label_dir / f"{image_name}.txt"
save_path = save_dir / f"{image_name}.txt"
with orig_label_path.open("r") as f, save_path.open("w") as g:
lines = f.readlines()
for line in lines:
parts = line.strip().split()
if len(parts) < 9:
continue
class_name = parts[8]
class_idx = class_mapping[class_name]
coords = [float(p) for p in parts[:8]]
normalized_coords = [
coords[i] / image_width if i % 2 == 0 else coords[i] / image_height for i in range(8)
]
formatted_coords = ["{:.6g}".format(coord) for coord in normalized_coords]
g.write(f"{class_idx} {' '.join(formatted_coords)}\n")
for phase in ["train", "val"]:
image_dir = dota_root_path / phase / "images"
orig_label_dir = dota_root_path / f"{phase}" / "labelTxt"
save_dir = dota_root_path / phase / "labels"
save_dir.mkdir(parents=True, exist_ok=True)
image_paths = list(image_dir.iterdir())
for image_path in TQDM(image_paths, desc=f"Processing {phase} images"):
if image_path.suffix != ".jpg":
continue
image_name_without_ext = image_path.stem
img = cv2.imread(str(image_path))
h, w = img.shape[:2]
convert_label(image_name_without_ext, w, h, orig_label_dir, save_dir)
convert_aod_to_yolo_obb('../UCAS_AOD')
# convert_dior_to_yolo_obb('../dior-r')
#关于dataobb文件下的目录下面会详细说明
detect.py
import cv2
import torch
import numpy as np
from ultralytics import YOLO
def xywhr2xyxyxyxy(center):
# reference: https://github.com/ultralytics/ultralytics/blob/v8.1.0/ultralytics/utils/ops.py#L545
is_numpy = isinstance(center, np.ndarray)
cos, sin = (np.cos, np.sin) if is_numpy else (torch.cos, torch.sin)
ctr = center[..., :2]
w, h, angle = (center[..., i : i + 1] for i in range(2, 5))
cos_value, sin_value = cos(angle), sin(angle)
vec1 = [w / 2 * cos_value, w / 2 * sin_value]
vec2 = [-h / 2 * sin_value, h / 2 * cos_value]
vec1 = np.concatenate(vec1, axis=-1) if is_numpy else torch.cat(vec1, dim=-1)
vec2 = np.concatenate(vec2, axis=-1) if is_numpy else torch.cat(vec2, dim=-1)
pt1 = ctr + vec1 + vec2
pt2 = ctr + vec1 - vec2
pt3 = ctr - vec1 - vec2
pt4 = ctr - vec1 + vec2
return np.stack([pt1, pt2, pt3, pt4], axis=-2) if is_numpy else torch.stack([pt1, pt2, pt3, pt4], dim=-2)
def hsv2bgr(h, s, v):
h_i = int(h * 6)
f = h * 6 - h_i
p = v * (1 - s)
q = v * (1 - f * s)
t = v * (1 - (1 - f) * s)
r, g, b = 0, 0, 0
if h_i == 0:
r, g, b = v, t, p
elif h_i == 1:
r, g, b = q, v, p
elif h_i == 2:
r, g, b = p, v, t
elif h_i == 3:
r, g, b = p, q, v
elif h_i == 4:
r, g, b = t, p, v
elif h_i == 5:
r, g, b = v, p, q
return int(b * 255), int(g * 255), int(r * 255)
def random_color(id):
h_plane = (((id << 2) ^ 0x937151) % 100) / 100.0
s_plane = (((id << 3) ^ 0x315793) % 100) / 100.0
return hsv2bgr(h_plane, s_plane, 1)
if __name__ == "__main__":
model = YOLO('ultralytics/cfg/models/v8/yolov8s-obb.yaml').load('runs/obb/train/weights/last.pt')
from PIL import Image
import os
# 图片所在目录路径
directory = './img'
# 遍历目录中的所有文件
for filename in os.listdir(directory):
# 检查文件是否为图片
if filename.endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
# 构建完整的文件路径
print(filename)
path = os.path.join(directory, filename)
img = cv2.imread(path)
results = model(img)[0]
names = results.names
boxes = results.obb.data.cpu()
confs = boxes[..., 5].tolist()
classes = list(map(int, boxes[..., 6].tolist()))
boxes = xywhr2xyxyxyxy(boxes[..., :5])
for i, box in enumerate(boxes):
confidence = confs[i]
label = classes[i]
color = (0,255,0)
cv2.polylines(img, [np.asarray(box, dtype=int)], True, color, 2)
caption = f"{names[label]} {confidence:.2f}"
# print(caption)
w, h = cv2.getTextSize(caption, 0 ,1, 2)[0]
left, top = [int(b) for b in box[0]]
# cv2.rectangle(img, (left - 3, top - 33), (left + w + 10, top), color, -1)
# cv2.putText(img, caption, (left, top - 5), 0, 1, (0, 0, 0), 2, 16)
cv2.imwrite("./output/pre"+filename,img)
print("save done")
result.csv数据可视化
import csv
import matplotlib.pyplot as plt
# 读取结果文件
results_file = r"/hy-tmp/ultralytics-main/runs/results.csv"
data = {
'val_box': [],
'val_cls': [],
'val_dfl': [],
}
with open(results_file, 'r') as f:
reader = csv.reader(f)
next(reader) # 跳过文件头部分
for line in reader:
data['val_box'].append(float(line[2]))
data['val_cls'].append(float(line[3]))
data['val_dfl'].append(float(line[1]))
# 绘制图形
fig, axs = plt.subplots(1, 3, figsize=(20, 10))
# 训练损失
color = 'blue' # 设置点和线的颜色
# axs[0, 0].plot(data['box'], linewidth=2, color=color)
# axs[0, 0].scatter(range(len(data['box'])), data['box'], s=30, linewidths=0, color=color, label='box_Loss')
# axs[0, 0].set_xlabel('Epoch')
# axs[0, 0].set_ylabel('box_Loss')
# axs[0, 0].legend()
# axs[0, 1].plot(data['cls'], linewidth=2, color=color)
# axs[0, 1].scatter(range(len(data['cls'])), data['cls'], s=30, linewidths=0, color=color, label='cls_Loss')
# axs[0, 1].set_xlabel('Epoch')
# axs[0, 1].set_ylabel('cls_Loss')
# axs[0, 1].legend()
# axs[0, 2].plot(data['dfl'], linewidth=2, color=color)
# axs[0, 2].scatter(range(len(data['dfl'])), data['dfl'], s=30, linewidths=0, color=color, label='dfl_Loss')
# axs[0, 2].set_xlabel('Epoch')
# axs[0, 2].set_ylabel('dfl_Loss')
# axs[0, 2].legend()
# axs[0, 3].plot(data['p'], linewidth=2, color=color)
# axs[0, 3].scatter(range(len(data['p'])), data['p'], s=30, linewidths=0, color=color, label='Precision')
# axs[0, 3].set_xlabel('Epoch')
# axs[0, 3].set_ylabel('Precision')
# axs[0, 3].legend()
# axs[0, 4].plot(data['r'], linewidth=2, color=color)
# axs[0, 4].scatter(range(len(data['r'])), data['r'], s=30, linewidths=0, color=color, label='Recall')
# axs[0, 4].set_xlabel('Epoch')
# axs[0, 4].set_ylabel('Recall')
# axs[0, 4].legend()
# 验证损失
axs[0].plot(data['val_box'], linewidth=2, color=color)
axs[ 0].scatter(range(len(data['val_box'])), data['val_box'], s=30, linewidths=0, color=color, label='Angle&HIoU loss')
axs[ 0].set_xlabel('Epoch')
axs[ 0].set_ylabel('Angle&HIoU loss')
axs[ 0].legend()
axs[ 1].plot(data['val_cls'], linewidth=2, color=color)
axs[ 1].scatter(range(len(data['val_cls'])), data['val_cls'], s=30, linewidths=0, color=color, label='KLD loss')
axs[ 1].set_xlabel('Epoch')
axs[ 1].set_ylabel('KLD loss')
axs[ 1].legend()
axs[ 2].plot(data['val_dfl'], linewidth=2, color=color)
axs[ 2].scatter(range(len(data['val_dfl'])), data['val_dfl'], s=30, linewidths=0, color=color, label='ProbIoU loss')
axs[ 2].set_xlabel('Epoch')
axs[ 2].set_ylabel('ProbIoU loss')
axs[ 2].legend()
# 删除空白的子图
# fig.delaxes(axs[0, 4])
# fig.delaxes(axs[1, 4])
fig.tight_layout()
fig.savefig('/hy-tmp/ultralytics-main/runs/c.jpg')
plt.show()
plt.close()