YOLOv8 OBB 使用DIOR-R

论文拌30天意大利面

已于 2024-05-31 00:42:18 修改

阅读量44

点赞数

文章标签： YOLO

于 2024-03-18 18:04:00 首次发布

原文链接：https://blog.csdn.net/Guo9898/article/details/136945625

版权

第一步：清洗数据

import os
import shutil
import xml.etree.ElementTree as ET
import numpy as np
import cv2

# 输入文件夹路径和输出文件夹路径
input_folder = '../dior-r/Annotations-yuan/Oriented_Bounding_Boxes/'
labels_folder = '../dior-r/labels/'

output_folder = '../dior-r/Annotations/'

kk = 0
jj = 0
VOCdevkit_sets = ['train',  'val']
VOCdevkit_path = '../dior-r'

for image_set in VOCdevkit_sets:
    f_target = open(os.path.join(VOCdevkit_path, 'ImageSets/Main/%s.txt' % (image_set)), 'w', encoding='utf-8')  # 清洗后的ImageSets
    image_ids = open(os.path.join(VOCdevkit_path, 'ImageSets-yuan/Main/%s.txt' % (image_set)),
                     encoding='utf-8').read().strip().split()   # 原始ImageSets ，需要修改名字
    # 遍历输入文件夹中的所有XML文件
    for image_id in image_ids:
        filename = image_id +'.xml'
        kk = kk + 1
        xml_path = os.path.join(input_folder, filename)
        print(f'convert {filename}')

        # 解析XML文件
        tree = ET.parse(xml_path)
        root = tree.getroot()

        # 创建用于存储文本数据的列表
        text_data = []

        # 遍历XML元素并提取所需的信息
        for obj in root.findall('object'):
            robndbox = obj.find('robndbox')
            if robndbox is not None:
                x_left_top = robndbox.find('x_left_top').text
                y_left_top = robndbox.find('y_left_top').text
                x_right_top = robndbox.find('x_right_top').text
                y_right_top = robndbox.find('y_right_top').text
                x_right_bottom = robndbox.find('x_right_bottom').text
                y_right_bottom = robndbox.find('y_right_bottom').text
                x_left_bottom = robndbox.find('x_left_bottom').text
                y_left_bottom = robndbox.find('y_left_bottom').text
                name = obj.find('name').text
                difficult = obj.find('difficult').text

                f1 = float(x_left_top)
                f2 = float(y_left_top)
                f3 = float(x_right_top)
                f4 = float(y_right_top)
                f5 = float(x_right_bottom)
                f6 = float(y_right_bottom)
                f7 = float(x_left_bottom)
                f8 = float(y_left_bottom)

                poly = np.array([[f1,f2], [f3,f4], [f5,f6], [f7,f8]], np.int32) #
                (x, y), (w, h), angle = cv2.minAreaRect(poly)
                if w <= 0.001 or h <= 0.001 or f1 < 0  or f2 < 0 or f3 < 0 or f4 < 0 or f5 < 0 or f6 < 0 or f7 < 0 or f8 < 0:
                    print('++++++++++',filename,w,h,poly)
                else:
                    # 将提取的信息格式化并添加到文本数据列表中
                    text_line = f"{x_left_top} {y_left_top} {x_right_top} {y_right_top} {x_right_bottom} {y_right_bottom} {x_left_bottom} {y_left_bottom} {name} {difficult}\n"
                    text_data.append(text_line)
        if len(text_data) ==0:
            print('-----------',filename)

        else:
            # 创建输出文件路径
            output_path = os.path.join(labels_folder, filename.replace('.xml', '.txt'))

            # 将文本数据写入输出文件
            with open(output_path, 'w') as output_file:
                output_file.writelines(text_data)
            jj = jj + 1
            
            f_target.write(image_id + '\n')
            shutil.copy(xml_path, output_folder + filename)

print("转换完成！",'原始labels总数：',kk,'清洗后的labels总数：',jj)

第二步：移动数据

# -*- coding: utf-8 -*-
import shutil
import os
 
img_label_path = '../dior-r/'
# 更换local_file_name_list，newpath中的路径
def objFileName(oldpath,sets,newpath,name_strip,k):
    local_file_name_list = img_label_path + "ImageSets/Main/{}.txt".format(sets)
    obj_name_list = []
    with open (local_file_name_list,'r',encoding='utf-8') as i:
        p = 0
        for line in i:
            line = line.strip('\n')  # 删除换行符
            dir1, file = oldpath + '/' + line + name_strip ,newpath 
            print(dir1, file)
            assert os.path.exists(dir1)
            shutil.copy(dir1, file)
            p+=1
        print(sets+'的'+ k + '数量为：/{}'.format(p))
    return obj_name_list
 
 
def copy_img():
    # 指定存放图片的目录
    for sets in ["train", "val"]:
        for i,j,k in [("JPEGImages-trainval",".jpg", "images"),("Annotations",".xml", "Annotations"),("labels",".txt", "labelTxt")]:  # 前者放到后者里面
            # 指定要复制的图片路径
            oldpath = img_label_path + i
            newpath = img_label_path + sets 
            print(oldpath ,newpath)
            if not os.path.exists(newpath):
                os.mkdir(newpath)
                
            if not os.path.exists(newpath + '/' + k):
                os.mkdir(newpath + '/' + k)
 
                
            newpath = newpath + '/' + k
            objFileName(oldpath ,sets,newpath,j,k)
            print("finally!!")
 
 
 
if __name__ == '__main__':
    
    copy_img()

第三步：转换数据格式【YOLOv8 专属】

import json
from collections import defaultdict
from pathlib import Path

import cv2
import numpy as np

from ultralytics.utils import LOGGER, TQDM
from ultralytics.utils.files import increment_path


def convert_aod_to_yolo_obb(dota_root_path: str):
    """
    Converts DOTA dataset annotations to YOLO OBB (Oriented Bounding Box) format.

    The function processes images in the 'train' and 'val' folders of the DOTA dataset. For each image, it reads the
    associated label from the original labels directory and writes new labels in YOLO OBB format to a new directory.

    Args:
        dota_root_path (str): The root directory path of the DOTA dataset.

    Example:
        ```python
        from ultralytics.data.converter import convert_dota_to_yolo_obb

        convert_dota_to_yolo_obb('path/to/DOTA')
        ```

    Notes:
        The directory structure assumed for the DOTA dataset:

            - AOD
                ├─ images
                │   ├─ train2017
                │   └─ val2017
                └─ labels-yuan
                    ├─ train2017
                    └─ val2017

        After execution, the function will organize the labels into:

            - AOD
                └─ labels
                    ├─ train2017
                    └─ val2017
    """
    dota_root_path = Path(dota_root_path)

    # Class names to indices mapping
    class_mapping = {
        "car": 0,
        "airplane": 1,
    }

    def convert_label(image_name, image_width, image_height, orig_label_dir, save_dir):
        """Converts a single image's DOTA annotation to YOLO OBB format and saves it to a specified directory."""
        orig_label_path = orig_label_dir / f"{image_name}.txt"
        save_path = save_dir / f"{image_name}.txt"

        with orig_label_path.open("r") as f, save_path.open("w") as g:
            lines = f.readlines()
            for line in lines:
                parts = line.strip().split()
                if len(parts) < 9:
                    continue
                class_name = parts[8]
                class_idx = class_mapping[class_name]
                coords = [float(p) for p in parts[:8]]
                normalized_coords = [
                    coords[i] / image_width if i % 2 == 0 else coords[i] / image_height for i in range(8)
                ]
                formatted_coords = ["{:.6g}".format(coord) for coord in normalized_coords]
                g.write(f"{class_idx} {' '.join(formatted_coords)}\n")

    for phase in ["train2017", "val2017"]:
        image_dir = dota_root_path / "images" / phase
        orig_label_dir = dota_root_path / "labels-yuan" / f"{phase}"
        save_dir = dota_root_path / "labels" / phase

        save_dir.mkdir(parents=True, exist_ok=True)

        image_paths = list(image_dir.iterdir())
        for image_path in TQDM(image_paths, desc=f"Processing {phase} images"):
            if image_path.suffix != ".png":
                continue
            image_name_without_ext = image_path.stem
            img = cv2.imread(str(image_path))
            h, w = img.shape[:2]
            convert_label(image_name_without_ext, w, h, orig_label_dir, save_dir)
            
def convert_dior_to_yolo_obb(dota_root_path: str):  # 洗 ，转格式， 移动
    """
    Converts DOTA dataset annotations to YOLO OBB (Oriented Bounding Box) format.

    The function processes images in the 'train' and 'val' folders of the DOTA dataset. For each image, it reads the
    associated label from the original labels directory and writes new labels in YOLO OBB format to a new directory.

    Args:
        dota_root_path (str): The root directory path of the DOTA dataset.

    Example:
        ```python
        from ultralytics.data.converter import convert_dota_to_yolo_obb

        convert_dota_to_yolo_obb('path/to/DOTA')
        ```

    Notes:
        The directory structure assumed for the DOTA dataset:

    """
    dota_root_path = Path(dota_root_path)

    # Class names to indices mapping
    class_mapping = {"airplane":0,
        "airport":1,
        "baseballfield":2,
        "basketballcourt":3,
        "bridge":4,
        "chimney":5,
        "Expressway-Service-area":6,
        "Expressway-toll-station":7,
        "dam":8,
        "golffield":9,
        "groundtrackfield":10,
        "harbor":11,
        "overpass":12,
        "ship":13,
        "stadium":14,
        "storagetank":15,
        "tenniscourt":16,
        "trainstation":17,
        "vehicle":18,
        "windmill":19,
    }

    def convert_label(image_name, image_width, image_height, orig_label_dir, save_dir):
        """Converts a single image's DOTA annotation to YOLO OBB format and saves it to a specified directory."""
        orig_label_path = orig_label_dir / f"{image_name}.txt"
        save_path = save_dir / f"{image_name}.txt"

        with orig_label_path.open("r") as f, save_path.open("w") as g:
            lines = f.readlines()
            for line in lines:
                parts = line.strip().split()
                if len(parts) < 9:
                    continue
                class_name = parts[8]
                class_idx = class_mapping[class_name]
                coords = [float(p) for p in parts[:8]]
                normalized_coords = [
                    coords[i] / image_width if i % 2 == 0 else coords[i] / image_height for i in range(8)
                ]
                formatted_coords = ["{:.6g}".format(coord) for coord in normalized_coords]
                g.write(f"{class_idx} {' '.join(formatted_coords)}\n")

    for phase in ["train", "val"]:
        image_dir = dota_root_path  / phase / "images"
        orig_label_dir = dota_root_path / f"{phase}" / "labelTxt" 
        save_dir = dota_root_path / phase / "labels" 

        save_dir.mkdir(parents=True, exist_ok=True)

        image_paths = list(image_dir.iterdir())
        for image_path in TQDM(image_paths, desc=f"Processing {phase} images"):
            if image_path.suffix != ".jpg":
                continue
            image_name_without_ext = image_path.stem
            img = cv2.imread(str(image_path))
            h, w = img.shape[:2]
            convert_label(image_name_without_ext, w, h, orig_label_dir, save_dir)
            
convert_aod_to_yolo_obb('../UCAS_AOD')
# convert_dior_to_yolo_obb('../dior-r')
#关于dataobb文件下的目录下面会详细说明

detect.py

import cv2
import torch
import numpy as np
from ultralytics import YOLO

def xywhr2xyxyxyxy(center):
    # reference: https://github.com/ultralytics/ultralytics/blob/v8.1.0/ultralytics/utils/ops.py#L545
    is_numpy = isinstance(center, np.ndarray)
    cos, sin = (np.cos, np.sin) if is_numpy else (torch.cos, torch.sin)

    ctr = center[..., :2]
    w, h, angle = (center[..., i : i + 1] for i in range(2, 5))
    cos_value, sin_value = cos(angle), sin(angle)
    vec1 = [w / 2 * cos_value, w / 2 * sin_value]
    vec2 = [-h / 2 * sin_value, h / 2 * cos_value]
    vec1 = np.concatenate(vec1, axis=-1) if is_numpy else torch.cat(vec1, dim=-1)
    vec2 = np.concatenate(vec2, axis=-1) if is_numpy else torch.cat(vec2, dim=-1)
    pt1 = ctr + vec1 + vec2
    pt2 = ctr + vec1 - vec2
    pt3 = ctr - vec1 - vec2
    pt4 = ctr - vec1 + vec2
    return np.stack([pt1, pt2, pt3, pt4], axis=-2) if is_numpy else torch.stack([pt1, pt2, pt3, pt4], dim=-2)

def hsv2bgr(h, s, v):
    h_i = int(h * 6)
    f = h * 6 - h_i
    p = v * (1 - s)
    q = v * (1 - f * s)
    t = v * (1 - (1 - f) * s)
    
    r, g, b = 0, 0, 0

    if h_i == 0:
        r, g, b = v, t, p
    elif h_i == 1:
        r, g, b = q, v, p
    elif h_i == 2:
        r, g, b = p, v, t
    elif h_i == 3:
        r, g, b = p, q, v
    elif h_i == 4:
        r, g, b = t, p, v
    elif h_i == 5:
        r, g, b = v, p, q

    return int(b * 255), int(g * 255), int(r * 255)

def random_color(id):
    h_plane = (((id << 2) ^ 0x937151) % 100) / 100.0
    s_plane = (((id << 3) ^ 0x315793) % 100) / 100.0
    return hsv2bgr(h_plane, s_plane, 1)

if __name__ == "__main__":

    model = YOLO('ultralytics/cfg/models/v8/yolov8s-obb.yaml').load('runs/obb/train/weights/last.pt')
    
    
    from PIL import Image
    import os

    # 图片所在目录路径
    directory = './img'

    # 遍历目录中的所有文件
    for filename in os.listdir(directory):
        # 检查文件是否为图片
        if filename.endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
            # 构建完整的文件路径
            print(filename)
            path = os.path.join(directory, filename)
            img = cv2.imread(path)
            results = model(img)[0]
            names   = results.names
            boxes   = results.obb.data.cpu()
            confs   = boxes[..., 5].tolist()
            classes = list(map(int, boxes[..., 6].tolist()))
            boxes   = xywhr2xyxyxyxy(boxes[..., :5])

            for i, box in enumerate(boxes):
                confidence = confs[i]
                label = classes[i]
                color = (0,255,0)
                cv2.polylines(img, [np.asarray(box, dtype=int)], True, color, 2)
                caption = f"{names[label]} {confidence:.2f}"
                # print(caption)
                w, h = cv2.getTextSize(caption, 0 ,1, 2)[0]
                left, top = [int(b) for b in box[0]]
                # cv2.rectangle(img, (left - 3, top - 33), (left + w + 10, top), color, -1)
                # cv2.putText(img, caption, (left, top - 5), 0, 1, (0, 0, 0), 2, 16)

            cv2.imwrite("./output/pre"+filename,img)
            print("save done")

result.csv数据可视化

import csv
import matplotlib.pyplot as plt
 
# 读取结果文件
results_file = r"/hy-tmp/ultralytics-main/runs/results.csv"
data = {
    'val_box': [],
    'val_cls': [],
    'val_dfl': [],
}
 
with open(results_file, 'r') as f:
    reader = csv.reader(f)
    next(reader)  # 跳过文件头部分
    for line in reader:
        data['val_box'].append(float(line[2]))
        data['val_cls'].append(float(line[3]))
        data['val_dfl'].append(float(line[1]))
 
# 绘制图形
fig, axs = plt.subplots(1, 3, figsize=(20, 10))
 
# 训练损失
color = 'blue'  # 设置点和线的颜色
# axs[0, 0].plot(data['box'], linewidth=2, color=color)
# axs[0, 0].scatter(range(len(data['box'])), data['box'], s=30, linewidths=0, color=color, label='box_Loss')
# axs[0, 0].set_xlabel('Epoch')
# axs[0, 0].set_ylabel('box_Loss')
# axs[0, 0].legend()
 
# axs[0, 1].plot(data['cls'], linewidth=2, color=color)
# axs[0, 1].scatter(range(len(data['cls'])), data['cls'], s=30, linewidths=0, color=color, label='cls_Loss')
# axs[0, 1].set_xlabel('Epoch')
# axs[0, 1].set_ylabel('cls_Loss')
# axs[0, 1].legend()
 
# axs[0, 2].plot(data['dfl'], linewidth=2, color=color)
# axs[0, 2].scatter(range(len(data['dfl'])), data['dfl'], s=30, linewidths=0, color=color, label='dfl_Loss')
# axs[0, 2].set_xlabel('Epoch')
# axs[0, 2].set_ylabel('dfl_Loss')
# axs[0, 2].legend()
 
# axs[0, 3].plot(data['p'], linewidth=2, color=color)
# axs[0, 3].scatter(range(len(data['p'])), data['p'], s=30, linewidths=0, color=color, label='Precision')
# axs[0, 3].set_xlabel('Epoch')
# axs[0, 3].set_ylabel('Precision')
# axs[0, 3].legend()
 
# axs[0, 4].plot(data['r'], linewidth=2, color=color)
# axs[0, 4].scatter(range(len(data['r'])), data['r'], s=30, linewidths=0, color=color, label='Recall')
# axs[0, 4].set_xlabel('Epoch')
# axs[0, 4].set_ylabel('Recall')
# axs[0, 4].legend()
 
 
# 验证损失
axs[0].plot(data['val_box'], linewidth=2, color=color)
axs[ 0].scatter(range(len(data['val_box'])), data['val_box'], s=30, linewidths=0, color=color, label='Angle&HIoU loss')
axs[ 0].set_xlabel('Epoch')
axs[ 0].set_ylabel('Angle&HIoU loss')
axs[ 0].legend()
 
axs[ 1].plot(data['val_cls'], linewidth=2, color=color)
axs[ 1].scatter(range(len(data['val_cls'])), data['val_cls'], s=30, linewidths=0, color=color, label='KLD loss')
axs[ 1].set_xlabel('Epoch')
axs[ 1].set_ylabel('KLD loss')
axs[ 1].legend()
 
axs[ 2].plot(data['val_dfl'], linewidth=2, color=color)
axs[ 2].scatter(range(len(data['val_dfl'])), data['val_dfl'], s=30, linewidths=0, color=color, label='ProbIoU loss')
axs[ 2].set_xlabel('Epoch')
axs[ 2].set_ylabel('ProbIoU loss')
axs[ 2].legend()
 
 
 
 
# 删除空白的子图
# fig.delaxes(axs[0, 4])
# fig.delaxes(axs[1, 4])
 
fig.tight_layout()
 

fig.savefig('/hy-tmp/ultralytics-main/runs/c.jpg')

plt.show()
plt.close()