YoloV8数据增强扩增数据集OBB(旋转、缩放、噪声、模糊、抖动)

本文链接：https://blog.csdn.net/qq_40938217/article/details/144453547

在Yolo训练自己数据集的时候，如果数据量少为了提升训练效果，通过数据增强的方法让已有的数据扩增是一种有效的方法。

数据增强方案

这里根据以下几种常见的变换和噪声进行随机增强

旋转
平移
缩放

def apply_transform(image: np.ndarray, annotations: List[Tuple[int, List[Tuple[float, float]]]],
                    rotation_angle=0, scale=1.0, translation=(0, 0)) -> Tuple[np.ndarray, List[Tuple[int, List[Tuple[float, float]]]]]:
    """
    对图像和标注框应用仿射变换。
    :param image: 输入图像。
    :param annotations: 标注信息 [(类别ID, [(x1, y1), ..., (x4, y4)])]。
    :param rotation_angle: 旋转角度（度数）。
    :param scale: 缩放比例。
    :param translation: 平移 (dx, dy)。
    :return: 变换后的图像和更新的标注。
    """
    h, w = image.shape[:2]

    # 构建变换矩阵
    center = (w / 2, h / 2)
    M = cv2.getRotationMatrix2D(center, rotation_angle, scale)
    M[:, 2] += translation  # 平移

    # 应用变换到图像
    transformed_image = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT)

    # 更新标注框
    transformed_annotations = []
    for class_id, points in annotations:
        transformed_points = []
        for x, y in points:
            pt = np.dot(M, np.array([x * w, y * h, 1]))  # 归一化坐标转像素坐标后变换
            transformed_points.append((pt[0] / w, pt[1] / h))  # 再归一化
        transformed_annotations.append((class_id, transformed_points))

    return transformed_image, transformed_annotations

噪声

def add_noise(image, noise_level=10):
    """Add random Gaussian noise to an image."""
    # 如果输入是 numpy.ndarray，则转换为 PIL.Image
    if isinstance(image, np.ndarray):
        image = Image.fromarray(image)
    noisy = np.array(image).astype(float)
    noise = np.random.normal(0, noise_level, noisy.shape)
    noisy += noise
    np.clip(noisy, 0, 255, out=noisy)
    return Image.fromarray(noisy.astype(np.uint8))

模糊

def add_blur(image, blur_radius=2):
    """Add Gaussian blur to an image."""
    # 如果输入是 numpy.ndarray，则转换为 PIL.Image
    if isinstance(image, np.ndarray):
        image = Image.fromarray(image)
    return image.filter(ImageFilter.GaussianBlur(blur_radius))

抖动

def add_jitter(image, intensity=5):
    if not isinstance(image, np.ndarray):
        image = np.array(image)  # 转换为 NumPy 数组

    h, w, c = image.shape
    jittered = np.zeros_like(image)

    # 随机生成偏移矩阵
    dx = np.random.randint(-intensity, intensity + 1, size=(h, w))
    dy = np.random.randint(-intensity, intensity + 1, size=(h, w))

    # 生成网格坐标并添加偏移
    x_indices, y_indices = np.meshgrid(np.arange(h), np.arange(w), indexing="ij")
    x_new = np.clip(x_indices + dx, 0, h - 1)
    y_new = np.clip(y_indices + dy, 0, w - 1)

    # 映射像素值
    jittered = image[x_new, y_new]

在旋转、平移、缩放涉及到标签的变化，所以要保证变换后标签也跟随变换

本人数据集是V8 OBB 格式,归一化0-1坐标
classid,x1,y1,x2,y2,x3,y3,x4,y4
完整代码如下

import os
import cv2
import numpy as np
import random
from typing import List, Tuple
from PIL import Image, ImageFilter, ImageEnhance
import time
def add_jitter(image, intensity=5):
    if not isinstance(image, np.ndarray):
        image = np.array(image)  # 转换为 NumPy 数组

    h, w, c = image.shape
    jittered = np.zeros_like(image)

    # 随机生成偏移矩阵
    dx = np.random.randint(-intensity, intensity + 1, size=(h, w))
    dy = np.random.randint(-intensity, intensity + 1, size=(h, w))

    # 生成网格坐标并添加偏移
    x_indices, y_indices = np.meshgrid(np.arange(h), np.arange(w), indexing="ij")
    x_new = np.clip(x_indices + dx, 0, h - 1)
    y_new = np.clip(y_indices + dy, 0, w - 1)

    # 映射像素值
    jittered = image[x_new, y_new]

    return Image.fromarray(jittered)
def add_noise(image, noise_level=10):
    """Add random Gaussian noise to an image."""
    # 如果输入是 numpy.ndarray，则转换为 PIL.Image
    if isinstance(image, np.ndarray):
        image = Image.fromarray(image)
    noisy = np.array(image).astype(float)
    noise = np.random.normal(0, noise_level, noisy.shape)
    noisy += noise
    np.clip(noisy, 0, 255, out=noisy)
    return Image.fromarray(noisy.astype(np.uint8))

def add_blur(image, blur_radius=2):
    """Add Gaussian blur to an image."""
    # 如果输入是 numpy.ndarray，则转换为 PIL.Image
    if isinstance(image, np.ndarray):
        image = Image.fromarray(image)
    return image.filter(ImageFilter.GaussianBlur(blur_radius))

def read_annotations(label_path: str) -> List[Tuple[int, List[Tuple[float, float]]]]:
    """
    读取标注文件。
    :param label_path: 标签路径。
    :return: 标注信息 [(类别ID, [(x1, y1), (x2, y2), (x3, y3), (x4, y4)])]
    """
    annotations = []
    with open(label_path, 'r') as f:
        for line in f:
            parts = line.strip().split()
            class_id = int(parts[0])
            points = [(float(parts[i]), float(parts[i + 1])) for i in range(1, len(parts), 2)]
            annotations.append((class_id, points))
    return annotations
def apply_effects(image, jitter=False, jitter_intensity=5,
                  noise=False, noise_level=10,
                  blur=False, blur_radius=2):
    """Apply selected effects to the image based on given parameters."""
    pre_name = 'damaged_'
    if jitter:
        image = add_jitter(image, jitter_intensity)
        pre_name += 'jitter_{}_'.format(jitter_intensity)
    if noise:
        image = add_noise(image, noise_level)
        pre_name += 'noise_{}_'.format(noise_level)
    if blur:
        image = add_blur(image, blur_radius)
        pre_name += 'blur_{}_'.format(blur_radius)
    return image, pre_name

def apply_transform(image: np.ndarray, annotations: List[Tuple[int, List[Tuple[float, float]]]],
                    rotation_angle=0, scale=1.0, translation=(0, 0)) -> Tuple[np.ndarray, List[Tuple[int, List[Tuple[float, float]]]]]:
    """
    对图像和标注框应用仿射变换。
    :param image: 输入图像。
    :param annotations: 标注信息 [(类别ID, [(x1, y1), ..., (x4, y4)])]。
    :param rotation_angle: 旋转角度（度数）。
    :param scale: 缩放比例。
    :param translation: 平移 (dx, dy)。
    :return: 变换后的图像和更新的标注。
    """
    h, w = image.shape[:2]

    # 构建变换矩阵
    center = (w / 2, h / 2)
    M = cv2.getRotationMatrix2D(center, rotation_angle, scale)
    M[:, 2] += translation  # 平移

    # 应用变换到图像
    transformed_image = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT)

    # 更新标注框
    transformed_annotations = []
    for class_id, points in annotations:
        transformed_points = []
        for x, y in points:
            pt = np.dot(M, np.array([x * w, y * h, 1]))  # 归一化坐标转像素坐标后变换
            transformed_points.append((pt[0] / w, pt[1] / h))  # 再归一化
        transformed_annotations.append((class_id, transformed_points))

    return transformed_image, transformed_annotations


def save_annotations(label_path: str, annotations: List[Tuple[int, List[Tuple[float, float]]]]):
    """
    保存标注文件。
    :param label_path: 标签路径。
    :param annotations: 更新后的标注信息。
    """
    with open(label_path, 'w') as f:
        for class_id, points in annotations:
            points_str = " ".join(f"{x:.6f} {y:.6f}" for x, y in points)
            f.write(f"{class_id} {points_str}\n")


def augment_dataset(image_dir: str, label_dir: str, output_dir: str, num_augmentations: int = 10):
    """
    对数据集进行离线增强。
    :param image_dir: 原始图像目录。
    :param label_dir: 原始标签目录。
    :param output_dir: 输出目录。
    :param num_augmentations: 每张图像生成增强样本数量。
    """
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    images_output = os.path.join(output_dir, "images")
    labels_output = os.path.join(output_dir, "labels")
    os.makedirs(images_output, exist_ok=True)
    os.makedirs(labels_output, exist_ok=True)

    image_files = [f for f in os.listdir(image_dir) if f.endswith(".jpg") or f.endswith(".bmp")]
    # import ipdb; ipdb.set_trace()
    for image_file in image_files:
        base_name = os.path.splitext(image_file)[0]
        image_path = os.path.join(image_dir, image_file)
        label_path = os.path.join(label_dir, base_name + ".txt")

        # 加载图像和标注
        image = cv2.imread(image_path)
        annotations = read_annotations(label_path)

        for i in range(num_augmentations):
            # 随机变换参数
            rotation_angle = random.uniform(-20, 20)  # 随机旋转角度
            scale = random.uniform(0.6, 1.1)  # 随机缩放0.6 1.2
            translation = (random.randint(-15, 15), random.randint(-15, 15))  # 随机平移15

            # 应用变换
            transformed_image, transformed_annotations = apply_transform(
                image, annotations, rotation_angle, scale, translation
            )

            # 噪声添加
            jitter_intensity = random.choice([1,2])
            noise_level = random.choice([7,8,9])   # ,10,11
            blur_radius = random.choice([0,1]) 
            # distortion_level = random.choice([0.06, 0.08, 0.10, 0.12])
            j_flag = random.choice([0, 1])
            n_flag = random.choice([0, 1])
            b_flag = random.choice([0, 1])
            transformed_image, pre_name = apply_effects(transformed_image,
                                        jitter=j_flag, jitter_intensity=jitter_intensity,
                                        noise=n_flag, noise_level=noise_level,
                                        blur=b_flag, blur_radius=blur_radius,
                                        )
            transformed_image = np.array(transformed_image)
            # 保存增强样本
            output_image_name = f"{base_name}_augdzbig_{i:02d}.jpg"
            output_label_name = f"{base_name}_augdzbig_{i:02d}.txt"
            cv2.imwrite(os.path.join(images_output, output_image_name), transformed_image)
            save_annotations(os.path.join(labels_output, output_label_name), transformed_annotations)

            print(f"Saved: {output_image_name}, {output_label_name}")


# 示例运行
image_dir = "/images/train"
label_dir = "/labels/train"
output_dir = "/augx10"
augment_dataset(image_dir, label_dir, output_dir, num_augmentations=10)

更新apply_transform函数

def apply_transform(image: np.ndarray, annotations: List[Tuple[int, List[Tuple[float, float]]]],
                    rotation_angle=0, scale=1.0, translation=(0, 0)) -> Tuple[np.ndarray, List[Tuple[int, List[Tuple[float, float]]]]]:
    """
    对图像和标注框应用仿射变换。
    :param image: 输入图像。
    :param annotations: 标注信息 [(类别ID, [(x1, y1), ..., (x4, y4)])]。
    :param rotation_angle: 旋转角度（度数）。
    :param scale: 缩放比例。
    :param translation: 平移 (dx, dy)。
    :return: 变换后的图像和更新的标注。
    """
    h, w = image.shape[:2]

    # 构建变换矩阵
    center = (w / 2, h / 2)
    M = cv2.getRotationMatrix2D(center, rotation_angle, scale)
    M[:, 2] += translation  # 平移

    # 应用变换到图像
    transformed_image = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT)

    # 更新标注框
    transformed_annotations = []
    for class_id, points in annotations:
        transformed_points = []
        for x, y in points:
            pt = np.dot(M, np.array([x * w, y * h, 1]))  # 归一化坐标转像素坐标后变换
            normalized_pt = (pt[0] / w, pt[1] / h)  # 再归一化
            # 裁剪到 [0, 1] 范围
            clipped_pt = (max(0.0, min(1.0, normalized_pt[0])), max(0.0, min(1.0, normalized_pt[1])))
            transformed_points.append(clipped_pt)

        # 检查越界情况
        num_out_of_bounds = sum(0.0 > x or x > 1.0 or 0.0 > y or y > 1.0 for x, y in transformed_points)

        # 策略：允许部分越界，但过滤完全越界的标注
        if num_out_of_bounds < len(points):  # 至少有一个点在范围内
            transformed_annotations.append((class_id, transformed_points))

    return transformed_image, transformed_annotations