在Yolo训练自己数据集的时候,如果数据量少为了提升训练效果,通过数据增强的方法让已有的数据扩增是一种有效的方法。
数据增强方案
这里根据以下几种常见的变换和噪声进行随机增强
- 旋转
- 平移
- 缩放
def apply_transform(image: np.ndarray, annotations: List[Tuple[int, List[Tuple[float, float]]]],
rotation_angle=0, scale=1.0, translation=(0, 0)) -> Tuple[np.ndarray, List[Tuple[int, List[Tuple[float, float]]]]]:
"""
对图像和标注框应用仿射变换。
:param image: 输入图像。
:param annotations: 标注信息 [(类别ID, [(x1, y1), ..., (x4, y4)])]。
:param rotation_angle: 旋转角度(度数)。
:param scale: 缩放比例。
:param translation: 平移 (dx, dy)。
:return: 变换后的图像和更新的标注。
"""
h, w = image.shape[:2]
# 构建变换矩阵
center = (w / 2, h / 2)
M = cv2.getRotationMatrix2D(center, rotation_angle, scale)
M[:, 2] += translation # 平移
# 应用变换到图像
transformed_image = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT)
# 更新标注框
transformed_annotations = []
for class_id, points in annotations:
transformed_points = []
for x, y in points:
pt = np.dot(M, np.array([x * w, y * h, 1])) # 归一化坐标转像素坐标后变换
transformed_points.append((pt[0] / w, pt[1] / h)) # 再归一化
transformed_annotations.append((class_id, transformed_points))
return transformed_image, transformed_annotations
- 噪声
def add_noise(image, noise_level=10):
"""Add random Gaussian noise to an image."""
# 如果输入是 numpy.ndarray,则转换为 PIL.Image
if isinstance(image, np.ndarray):
image = Image.fromarray(image)
noisy = np.array(image).astype(float)
noise = np.random.normal(0, noise_level, noisy.shape)
noisy += noise
np.clip(noisy, 0, 255, out=noisy)
return Image.fromarray(noisy.astype(np.uint8))
- 模糊
def add_blur(image, blur_radius=2):
"""Add Gaussian blur to an image."""
# 如果输入是 numpy.ndarray,则转换为 PIL.Image
if isinstance(image, np.ndarray):
image = Image.fromarray(image)
return image.filter(ImageFilter.GaussianBlur(blur_radius))
- 抖动
def add_jitter(image, intensity=5):
if not isinstance(image, np.ndarray):
image = np.array(image) # 转换为 NumPy 数组
h, w, c = image.shape
jittered = np.zeros_like(image)
# 随机生成偏移矩阵
dx = np.random.randint(-intensity, intensity + 1, size=(h, w))
dy = np.random.randint(-intensity, intensity + 1, size=(h, w))
# 生成网格坐标并添加偏移
x_indices, y_indices = np.meshgrid(np.arange(h), np.arange(w), indexing="ij")
x_new = np.clip(x_indices + dx, 0, h - 1)
y_new = np.clip(y_indices + dy, 0, w - 1)
# 映射像素值
jittered = image[x_new, y_new]
在旋转、平移、缩放涉及到标签的变化,所以要保证变换后标签也跟随变换
- 本人数据集是V8 OBB 格式,归一化0-1坐标
classid,x1,y1,x2,y2,x3,y3,x4,y4
- 完整代码如下
import os
import cv2
import numpy as np
import random
from typing import List, Tuple
from PIL import Image, ImageFilter, ImageEnhance
import time
def add_jitter(image, intensity=5):
if not isinstance(image, np.ndarray):
image = np.array(image) # 转换为 NumPy 数组
h, w, c = image.shape
jittered = np.zeros_like(image)
# 随机生成偏移矩阵
dx = np.random.randint(-intensity, intensity + 1, size=(h, w))
dy = np.random.randint(-intensity, intensity + 1, size=(h, w))
# 生成网格坐标并添加偏移
x_indices, y_indices = np.meshgrid(np.arange(h), np.arange(w), indexing="ij")
x_new = np.clip(x_indices + dx, 0, h - 1)
y_new = np.clip(y_indices + dy, 0, w - 1)
# 映射像素值
jittered = image[x_new, y_new]
return Image.fromarray(jittered)
def add_noise(image, noise_level=10):
"""Add random Gaussian noise to an image."""
# 如果输入是 numpy.ndarray,则转换为 PIL.Image
if isinstance(image, np.ndarray):
image = Image.fromarray(image)
noisy = np.array(image).astype(float)
noise = np.random.normal(0, noise_level, noisy.shape)
noisy += noise
np.clip(noisy, 0, 255, out=noisy)
return Image.fromarray(noisy.astype(np.uint8))
def add_blur(image, blur_radius=2):
"""Add Gaussian blur to an image."""
# 如果输入是 numpy.ndarray,则转换为 PIL.Image
if isinstance(image, np.ndarray):
image = Image.fromarray(image)
return image.filter(ImageFilter.GaussianBlur(blur_radius))
def read_annotations(label_path: str) -> List[Tuple[int, List[Tuple[float, float]]]]:
"""
读取标注文件。
:param label_path: 标签路径。
:return: 标注信息 [(类别ID, [(x1, y1), (x2, y2), (x3, y3), (x4, y4)])]
"""
annotations = []
with open(label_path, 'r') as f:
for line in f:
parts = line.strip().split()
class_id = int(parts[0])
points = [(float(parts[i]), float(parts[i + 1])) for i in range(1, len(parts), 2)]
annotations.append((class_id, points))
return annotations
def apply_effects(image, jitter=False, jitter_intensity=5,
noise=False, noise_level=10,
blur=False, blur_radius=2):
"""Apply selected effects to the image based on given parameters."""
pre_name = 'damaged_'
if jitter:
image = add_jitter(image, jitter_intensity)
pre_name += 'jitter_{}_'.format(jitter_intensity)
if noise:
image = add_noise(image, noise_level)
pre_name += 'noise_{}_'.format(noise_level)
if blur:
image = add_blur(image, blur_radius)
pre_name += 'blur_{}_'.format(blur_radius)
return image, pre_name
def apply_transform(image: np.ndarray, annotations: List[Tuple[int, List[Tuple[float, float]]]],
rotation_angle=0, scale=1.0, translation=(0, 0)) -> Tuple[np.ndarray, List[Tuple[int, List[Tuple[float, float]]]]]:
"""
对图像和标注框应用仿射变换。
:param image: 输入图像。
:param annotations: 标注信息 [(类别ID, [(x1, y1), ..., (x4, y4)])]。
:param rotation_angle: 旋转角度(度数)。
:param scale: 缩放比例。
:param translation: 平移 (dx, dy)。
:return: 变换后的图像和更新的标注。
"""
h, w = image.shape[:2]
# 构建变换矩阵
center = (w / 2, h / 2)
M = cv2.getRotationMatrix2D(center, rotation_angle, scale)
M[:, 2] += translation # 平移
# 应用变换到图像
transformed_image = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT)
# 更新标注框
transformed_annotations = []
for class_id, points in annotations:
transformed_points = []
for x, y in points:
pt = np.dot(M, np.array([x * w, y * h, 1])) # 归一化坐标转像素坐标后变换
transformed_points.append((pt[0] / w, pt[1] / h)) # 再归一化
transformed_annotations.append((class_id, transformed_points))
return transformed_image, transformed_annotations
def save_annotations(label_path: str, annotations: List[Tuple[int, List[Tuple[float, float]]]]):
"""
保存标注文件。
:param label_path: 标签路径。
:param annotations: 更新后的标注信息。
"""
with open(label_path, 'w') as f:
for class_id, points in annotations:
points_str = " ".join(f"{x:.6f} {y:.6f}" for x, y in points)
f.write(f"{class_id} {points_str}\n")
def augment_dataset(image_dir: str, label_dir: str, output_dir: str, num_augmentations: int = 10):
"""
对数据集进行离线增强。
:param image_dir: 原始图像目录。
:param label_dir: 原始标签目录。
:param output_dir: 输出目录。
:param num_augmentations: 每张图像生成增强样本数量。
"""
if not os.path.exists(output_dir):
os.makedirs(output_dir)
images_output = os.path.join(output_dir, "images")
labels_output = os.path.join(output_dir, "labels")
os.makedirs(images_output, exist_ok=True)
os.makedirs(labels_output, exist_ok=True)
image_files = [f for f in os.listdir(image_dir) if f.endswith(".jpg") or f.endswith(".bmp")]
# import ipdb; ipdb.set_trace()
for image_file in image_files:
base_name = os.path.splitext(image_file)[0]
image_path = os.path.join(image_dir, image_file)
label_path = os.path.join(label_dir, base_name + ".txt")
# 加载图像和标注
image = cv2.imread(image_path)
annotations = read_annotations(label_path)
for i in range(num_augmentations):
# 随机变换参数
rotation_angle = random.uniform(-20, 20) # 随机旋转角度
scale = random.uniform(0.6, 1.1) # 随机缩放0.6 1.2
translation = (random.randint(-15, 15), random.randint(-15, 15)) # 随机平移15
# 应用变换
transformed_image, transformed_annotations = apply_transform(
image, annotations, rotation_angle, scale, translation
)
# 噪声添加
jitter_intensity = random.choice([1,2])
noise_level = random.choice([7,8,9]) # ,10,11
blur_radius = random.choice([0,1])
# distortion_level = random.choice([0.06, 0.08, 0.10, 0.12])
j_flag = random.choice([0, 1])
n_flag = random.choice([0, 1])
b_flag = random.choice([0, 1])
transformed_image, pre_name = apply_effects(transformed_image,
jitter=j_flag, jitter_intensity=jitter_intensity,
noise=n_flag, noise_level=noise_level,
blur=b_flag, blur_radius=blur_radius,
)
transformed_image = np.array(transformed_image)
# 保存增强样本
output_image_name = f"{base_name}_augdzbig_{i:02d}.jpg"
output_label_name = f"{base_name}_augdzbig_{i:02d}.txt"
cv2.imwrite(os.path.join(images_output, output_image_name), transformed_image)
save_annotations(os.path.join(labels_output, output_label_name), transformed_annotations)
print(f"Saved: {output_image_name}, {output_label_name}")
# 示例运行
image_dir = "/images/train"
label_dir = "/labels/train"
output_dir = "/augx10"
augment_dataset(image_dir, label_dir, output_dir, num_augmentations=10)
更新apply_transform函数
def apply_transform(image: np.ndarray, annotations: List[Tuple[int, List[Tuple[float, float]]]],
rotation_angle=0, scale=1.0, translation=(0, 0)) -> Tuple[np.ndarray, List[Tuple[int, List[Tuple[float, float]]]]]:
"""
对图像和标注框应用仿射变换。
:param image: 输入图像。
:param annotations: 标注信息 [(类别ID, [(x1, y1), ..., (x4, y4)])]。
:param rotation_angle: 旋转角度(度数)。
:param scale: 缩放比例。
:param translation: 平移 (dx, dy)。
:return: 变换后的图像和更新的标注。
"""
h, w = image.shape[:2]
# 构建变换矩阵
center = (w / 2, h / 2)
M = cv2.getRotationMatrix2D(center, rotation_angle, scale)
M[:, 2] += translation # 平移
# 应用变换到图像
transformed_image = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT)
# 更新标注框
transformed_annotations = []
for class_id, points in annotations:
transformed_points = []
for x, y in points:
pt = np.dot(M, np.array([x * w, y * h, 1])) # 归一化坐标转像素坐标后变换
normalized_pt = (pt[0] / w, pt[1] / h) # 再归一化
# 裁剪到 [0, 1] 范围
clipped_pt = (max(0.0, min(1.0, normalized_pt[0])), max(0.0, min(1.0, normalized_pt[1])))
transformed_points.append(clipped_pt)
# 检查越界情况
num_out_of_bounds = sum(0.0 > x or x > 1.0 or 0.0 > y or y > 1.0 for x, y in transformed_points)
# 策略:允许部分越界,但过滤完全越界的标注
if num_out_of_bounds < len(points): # 至少有一个点在范围内
transformed_annotations.append((class_id, transformed_points))
return transformed_image, transformed_annotations
- 新增将坐标限制在0-1之间
- 检查如果全部越界直接删除标签