DVS铝基片数据集操作——根据VOC和YOLO标注模式生成mask标注格式的数据集

七倾城

已于 2024-03-26 18:04:43 修改

阅读量581

点赞数 6

文章标签： yolo 计算机视觉

于 2024-03-26 18:01:12 首次发布

本文链接：https://blog.csdn.net/weixin_52686749/article/details/137053330

版权

DVS铝基片数据集操作

裁剪YOLO标签 txt格式

好的，让我来逐个解释每个函数的作用：

read_darknet_labels(label_path): 这个函数的作用是读取 Darknet 格式的标签文件。它接受一个标签文件的路径作为输入，然后将文件中的内容解析成标签列表，并返回这个列表。每个标签由对象类别、中心点的 x 坐标、中心点的 y 坐标、宽度和高度组成。

首先打开指定路径下的标签文件并逐行读取内容。

每一行代表一个对象的标签信息，通常包括类别ID和四个边界框坐标（中心点坐标、宽度、高度），这些信息按空格分隔。

函数将每一行的内容分割后，将类别ID转换为整数，四个边界框坐标转换为浮点数，并将这些信息封装成一个新的列表结构。

最终返回一个二维列表，其中每个子列表表示一张图像上的一个物体的标签信息。

crop_objects(image_path, label_path, output_dir): 此函数根据Darknet格式的标签信息从原始图像中裁剪出物体区域。

首先读取指定路径的图像文件。

然后调用read_darknet_labels()函数读取与该图像对应的标签信息
。
对于标签列表中的每一个物体，根据其边界框坐标计算出实际像素坐标（x1, y1, x2, y2），从而确定裁剪区域。

使用这些坐标从原图中裁剪出物体区域，并保存为新的PNG图片到指定的输出目录下，文件名格式为“原图像名_索引.png”。

denoise_images(image_dir, output_dir): 此函数对指定目录下的所有灰度图像进行去噪处理。

遍历指定目录下的所有文件，对于每个图像文件，使用OpenCV的cv2.imread()函数以灰度模式读取图像数据。

应用基于非局部均值的快速去噪算法cv2.fastNlMeansDenoising()来去除图像噪声。

将去噪后的图像保存到指定的输出目录下，保持原有的文件名。

convert_to_masks(image_dir, output_dir): 这个函数的作用是将指定目录下的灰度图像转换为二值掩模（binary masks）。

同样遍历指定目录下的所有图像文件，读取每个图像为灰度图像。

使用OpenCV的cv2.threshold()函数进行二值化处理，这里设置阈值为127，超过阈值的像素值设为255（白色），低于阈值的像素值设为0（黑色），生成二值图像或“mask”。

将得到的二值图像（即掩模）保存到指定的输出目录下，同样保持原有的文件名。

这些函数组合起来可以用于处理包含对象标签的图像数据集，包括裁剪对象、去噪和生成二值掩模等操作。

在主程序部分，按照如下三个步骤对数据集进行预处理：
根据Darknet标签裁剪出图像中的物体并保存；
对裁剪出的物体图像进行去噪处理；
将去噪后的物体图像进一步转换为二值掩模。在整个过程中，会自动创建所需的输出目录，并确保不会因为目录已存在而引发错误。

import cv2
import numpy as np
import os

# Function to read Darknet format labels
def read_darknet_labels(label_path):
    with open(label_path, 'r') as f:
        lines = f.readlines()
        labels = [line.strip().split() for line in lines]
        labels = [[int(label[0]), float(label[1]), float(label[2]), float(label[3]), float(label[4])] for label in labels]
    return labels

# Function to crop objects from images based on Darknet labels
def crop_objects(image_path, label_path, output_dir):
    image = cv2.imread(image_path)
    labels = read_darknet_labels(label_path)

    for i, label in enumerate(labels):
        obj_class, x_center, y_center, width, height = label

        img_height, img_width, _ = image.shape
        x_center_pixel = int(x_center * img_width)
        y_center_pixel = int(y_center * img_height)
        w_pixel = int(width * img_width)
        h_pixel = int(height * img_height)

        x1 = max(0, x_center_pixel - w_pixel // 2)
        y1 = max(0, y_center_pixel - h_pixel // 2)
        x2 = min(img_width, x_center_pixel + w_pixel // 2)
        y2 = min(img_height, y_center_pixel + h_pixel // 2)

        cropped_img = image[y1:y2, x1:x2]

        cv2.imwrite(os.path.join(output_dir, f"{os.path.basename(image_path)[:-4]}_{i}.png"), cropped_img)

# Function to apply denoising to images
def denoise_images(image_dir, output_dir):
    for filename in os.listdir(image_dir):
        img_path = os.path.join(image_dir, filename)
        image = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)

        # Apply denoising
        denoised_img = cv2.fastNlMeansDenoising(image, None, 10, 7, 21)

        cv2.imwrite(os.path.join(output_dir, filename), denoised_img)

# Function to convert images to binary masks
def convert_to_masks(image_dir, output_dir):
    for filename in os.listdir(image_dir):
        img_path = os.path.join(image_dir, filename)
        image = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)

        # Apply binary thresholding
        _, binary_img = cv2.threshold(image, 127, 255, cv2.THRESH_BINARY)

        cv2.imwrite(os.path.join(output_dir, filename), binary_img)

# Example usage
if __name__ == "__main__":
    # Define paths
    dataset_dir = "path_to_your_dataset_directory"
    output_dir = "output_directory"

    # Create output directories if not exist
    os.makedirs(output_dir, exist_ok=True)

    # Step 1: Crop objects based on Darknet labels
    for filename in os.listdir(dataset_dir):
        if filename.endswith(".txt"):  # Assuming labels have .txt extension
            image_path = os.path.join(dataset_dir, filename[:-4] + ".png")  # Assuming images have .png extension
            label_path = os.path.join(dataset_dir, filename)
            crop_objects(image_path, label_path, output_dir)

    # Step 2: Denoise cropped images
    denoised_output_dir = os.path.join(output_dir, "denoised_images")
    os.makedirs(denoised_output_dir, exist_ok=True)
    denoise_images(output_dir, denoised_output_dir)

    # Step 3: Convert denoised images to binary masks
    mask_output_dir = os.path.join(output_dir, "binary_masks")
    os.makedirs(mask_output_dir, exist_ok=True)
    convert_to_masks(denoised_output_dir, mask_output_dir)

裁剪VOC标签 xml格式

解释与上述yolo相同

import cv2
import numpy as np
import os
import random
import xml.etree.ElementTree as ET

# Function to read XML format labels
def read_xml_labels(xml_path):
    tree = ET.parse(xml_path)
    root = tree.getroot()

    labels = []
    for obj in root.findall('object'):
        obj_name = obj.find('name').text
        bbox = obj.find('bndbox')
        xmin = int(bbox.find('xmin').text)
        ymin = int(bbox.find('ymin').text)
        xmax = int(bbox.find('xmax').text)
        ymax = int(bbox.find('ymax').text)
        labels.append([obj_name, xmin, ymin, xmax, ymax])

    return labels

# Function to crop objects from images based on XML labels
def crop_objects(image_path, xml_path, output_dir):
    image = cv2.imread(image_path)
    if image is None:
        print(f"Failed to read image at path: {image_path}")
        return
    labels = read_xml_labels(xml_path)

    for i, label in enumerate(labels):
        obj_name, xmin, ymin, xmax, ymax = label

        cropped_img = image[ymin:ymax, xmin:xmax]

        out_png = os.path.join(output_dir, f"{os.path.basename(image_path)[:-4]}_{i}.png")
        out_png = out_png.replace("\\", "/")

        cv2.imwrite(out_png, cropped_img)

# Function to apply denoising to images
def denoise_images(image_dir, output_dir):
    for filename in os.listdir(image_dir):
        if filename.endswith('.png'):
            img_path = os.path.join(image_dir, filename)
            img_path = img_path.replace("\\", "/")
            image = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)

            if image is None:
                print(f"Failed to read image at path: {img_path}")
                continue

        # Apply denoising
        denoised_img = cv2.fastNlMeansDenoising(image, None, 10, 7, 21)
        output_path = os.path.join(output_dir, filename)
        # cv2.imwrite(os.path.join(output_dir, filename), denoised_img)
        output_path = output_path.replace("\\", "/")
        cv2.imwrite(output_path, denoised_img)

# Function to convert images to binary masks
def convert_to_masks(image_dir, output_dir):
    for filename in os.listdir(image_dir):
        img_path = os.path.join(image_dir, filename)
        img_path = img_path.replace("\\", "/")
        image = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)

        # Apply binary thresholding
        _, binary_img = cv2.threshold(image, 127, 255, cv2.THRESH_BINARY)

        output_dir1 = os.path.join(output_dir, filename)
        output_dir1 = output_dir1.replace("\\", "/")
        cv2.imwrite(output_dir1, binary_img)



# Function to apply transformations to the mask


# Function to combine disk image and mask
def combine_images(disk_img, mask):
    # Example: Combine disk image and mask using bitwise operations
    # You can modify this function to achieve desired combination
    combined_img = cv2.bitwise_and(disk_img, disk_img, mask=mask)
    return combined_img

# Example usage
if __name__ == "__main__":
    # Define paths
    dataset_dir = "D:/xdu/标签/cxjc/shujuzq/scratch"
    output_dir = "D:/xdu/标签/cxjc/shujuzq/res"

    # Create output directories if not exist
    os.makedirs(output_dir, exist_ok=True)

    # Step 1: Crop objects based on XML labels
    for filename in os.listdir(dataset_dir):
        if filename.endswith(".xml"):  # Assuming labels have .xml extension
            image_path = os.path.join(dataset_dir, filename[:-4] + ".png")  # Assuming images have .png extension
            image_path = image_path.replace("\\", "/")
            xml_path = os.path.join(dataset_dir, filename)
            xml_path = xml_path.replace("\\", "/")
            crop_objects(image_path, xml_path, output_dir)

    # Step 2: Denoise cropped images
    denoised_output_dir = os.path.join(output_dir, "denoised_images")
    denoised_output_dir = denoised_output_dir.replace("\\", "/")
    os.makedirs(denoised_output_dir, exist_ok=True)
    denoise_images(output_dir, denoised_output_dir)

    # Step 3: Convert denoised images to binary masks
    mask_output_dir = os.path.join(output_dir, "binary_masks")
    mask_output_dir = mask_output_dir.replace("\\", "/")
    os.makedirs(mask_output_dir, exist_ok=True)
    convert_to_masks(denoised_output_dir, mask_output_dir)

将裁切下来的标签结合到正常图片并生成mask

find_largest_circle 函数:
　　该函数接收一个由OpenCV HoughCircles算法得到的圆形列表（每个圆形表示为[x, y, r]，其中x和y是圆心坐标，r是半径）。它遍历这个列表，比较每个圆的半径大小，当找到一个新的更大的半径时，更新最大半径及对应的最大圆。最终返回最大的圆及其半径。

find_smallest_circle_in_largest 函数:
　　这个函数用于在给定的圆形列表中寻找最大的圆（largest_circle参数）内包含的所有圆中，找出半径最小的那个圆。函数同样遍历整个圆形列表，对于每一个圆，检查其是否位于最大圆内（通过调用is_circle_inside函数判断），并且半径小于当前已知的最小半径，如果是，则更新最小半径及其对应的圆。最后返回这个最小的圆及其半径。

is_circle_inside 函数:
　　此函数用来判断一个圆（circle）是否位于另一个较大的圆（larger_circle）内部。计算两个圆心之间的距离平方，并将其与较大圆半径减去较小圆半径之后的差值平方做比较，如果前者小于等于后者，则表明较小的圆在较大的圆内。

detect_largest_circle 函数:
　　该函数主要用于读取、处理图像并检测图像中最大的圆。首先对输入图像进行预处理，包括缩小、转为灰度图、模糊处理等操作，然后使用HoughCircles方法检测图像中的所有圆。接下来查找并返回最大的圆及其半径。

random_transform 函数:
　　此函数用于对输入的图像进行随机变换，包括随机缩放（尺度在0.5至1.5之间）和随机旋转（角度在-30度至30度之间）。

overlay_anomaly 函数:
　　该函数的主要目的是将经过随机变换的异常图anomaly_image）叠加到正常图像（normal_image）上，并生成对应的mask。首先对异常图像进行随机变换，然后检测正常图像中的最大圆，依据最大圆的位置和大小确定异常图像放置的位置。在放置过程中，确保异常图像位于最大圆内且不与小圆（假设存在）重叠。如果重叠，则重新调整异常图像的位置。最后，将异常图像叠加到正常图像上，并更新mask，以便记录异常区域。同时，此函数也包含了读取和处理正常图片和异常图片数据集的部分逻辑。

import cv2
import numpy as np
import os


def find_largest_circle(circles):
    max_radius = 0
    max_circle = None
    for circle in circles[0]:
        x, y, r = circle
        if r > max_radius:
            max_radius = r
            max_circle = circle
    return max_circle, max_radius

def find_smallest_circle_in_largest(circles, largest_circle):
    max_radius = largest_circle[2]
    min_radius = np.inf
    min_circle = None
    for circle in circles[0]:
        x, y, r = circle
        if r < max_radius and r < min_radius and is_circle_inside(circle, largest_circle):
            min_radius = r
            min_circle = circle
    return min_circle, min_radius

def is_circle_inside(circle, larger_circle):
    x, y, r = circle
    lx, ly, lr = larger_circle
    distance_squared = (x - lx) ** 2 + (y - ly) ** 2
    return distance_squared <= (lr - r) ** 2

def detect_largest_circle(img):
    # img = cv2.imread(image_path)
    img = cv2.resize(img, None, fx=0.5, fy=0.5)
    GrayImage = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    GrayImage = cv2.blur(GrayImage, (7, 7))  # 均值滤波 滤除背景噪声
    circles = cv2.HoughCircles(GrayImage, cv2.HOUGH_GRADIENT, 1, minDist=20, param1=70, param2=50, minRadius=20,
                               maxRadius=150)

    img = cv2.resize(img, None, fx=2, fy=2)
    circles *= 2

    if circles is not None:
        largest_circle, largest_radius = find_largest_circle(circles)
        # smallest_circle_in_largest, smallest_radius = find_smallest_circle_in_largest(circles, largest_circle)
        return largest_circle, largest_radius
    else:
        return None, None




def random_transform(image):
    # 随机缩放
    scale = np.random.uniform(0.5, 1.5)
    h, w = image.shape[:2]
    image = cv2.resize(image, (int(w * scale), int(h * scale)))

    # 随机旋转
    angle = np.random.uniform(-30, 30)
    center = (w // 2, h // 2)
    M = cv2.getRotationMatrix2D(center, angle, 1.0)
    image = cv2.warpAffine(image, M, (w, h))

    return image


def overlay_anomaly(normal_image, anomaly_image, mask):
    # print(123)
    # 获取正常图片和异常图片的尺寸
    normal_h, normal_w, _ = normal_image.shape
    anomaly_h, anomaly_w, _ = anomaly_image.shape

    # 对异常图片进行随机变换
    transformed_anomaly = random_transform(anomaly_image)
    anomaly_h, anomaly_w, _ = transformed_anomaly.shape

    # 如果异常图片大小为0，则跳过当前异常图片
    if transformed_anomaly.shape[0] == 0 or transformed_anomaly.shape[1] == 0:
        print("异常图片大小为0，跳过当前异常图片")
        return None, mask

    # 检测同心圆
    # circles = detect_circles(normal_image)
    center , r =detect_largest_circle(normal_image)
    # print(circles)

    if r is not None:
        # 取同心圆的最外层圆
        # center_x, center_y, radius = circles[-1]
        center_x = center[0]
        center_y = center[1]
        print(center)
        radius = r
        print(radius)
        r2 = 70

        # 计算异常图片应该贴在正常图片的位置
        x = int(center_x - anomaly_w / 2)
        y = int(center_y - anomaly_h / 2)

        # 确保异常图片的位置在图像范围内
        x = max(0, min(x, normal_w - anomaly_w))
        y = max(0, min(y, normal_h - anomaly_h))

        # 计算异常图片贴在同心圆内的位置
        min_x = max(0, x)
        min_y = max(0, y)
        max_x = min(normal_w - anomaly_w, x)
        max_y = min(normal_h - anomaly_h, y)

        # 计算大圆内小圆外的位置范围
        inner_circle_x_min = center_x - r2
        inner_circle_x_max = center_x + r2
        inner_circle_y_min = center_y - r2
        inner_circle_y_max = center_y + r2


        # 确保异常图片贴在大圆内小圆外的位置
        if x + anomaly_w >= inner_circle_x_min and y + anomaly_h >= inner_circle_y_min and x <= inner_circle_x_max and y <= inner_circle_y_max:
            # 异常图片和小圆有重叠，需要调整位置
            if x < center_x:
                x = center_x - r2 - anomaly_w
            if x > center_x:
                x = center_x + r2 + anomaly_w
            if y < center_y:
                y = center_y - r2 - anomaly_h
            if y > center_y:
                y = center_y + r2 + anomaly_h
        else:
            # 异常图片没有与小圆重叠，随机选择位置
            print("重叠")
            x = np.random.randint(min_x, max_x + 1)
            y = np.random.randint(min_y, max_y + 1)

        x = int(x)
        y = int(y)
        # 创建一个全黑的背景
        background = np.zeros_like(normal_image)

        # 调整异常图片的大小以匹配正常图片
        transformed_anomaly = cv2.resize(transformed_anomaly, (anomaly_w, anomaly_h))
        print(transformed_anomaly.shape)
        if transformed_anomaly.shape[0]== 0 or transformed_anomaly.shape[1] == 0 or transformed_anomaly.shape[2] == 0:
            print("异常图片大小为0")
            return None, mask

        if x==0 or y==0:
            print("异常图片位置为0")
            return None, mask

        # 获取调整后的异常图片的尺寸
        anomaly_h, anomaly_w, _ = transformed_anomaly.shape

        # 将异常图片叠加到背景上
        background[y:y+transformed_anomaly.shape[0], x:x+transformed_anomaly.shape[1]] += transformed_anomaly

        # 更新mask，将异常区域标记为1
        # mask[y:y+transformed_anomaly.shape[0], x:x+transformed_anomaly.shape[1]] = 1

        # mask_1 =np.maximum(transformed_anomaly, background)
        # 将正常图片叠加到背景上
        result_image = np.maximum(normal_image, background)

        return result_image, background
    else:
        print("未检测到同心圆")
        return None, mask



# 读取正常图片数据集
normal_images = []
normal_image_paths = os.listdir("D:\\xdu\\yjs\\2024_3_11\\VAND-APRIL-GAN\\qxjc\\train\\ok")
for image_path in normal_image_paths:
    image = cv2.imread(os.path.join("D:\\xdu\\yjs\\2024_3_11\\VAND-APRIL-GAN\\qxjc\\train\\ok", image_path))
    normal_images.append(image)

# 读取异常图片数据集
anomaly_images = []
anomaly_image_paths = os.listdir("D:\\xdu\\yjs\\2024_3_11\\VAND-APRIL-GAN\\qxjc\\train\\mask\\denoised_images")
for image_path in anomaly_image_paths:
    image = cv2.imread(os.path.join("D:\\xdu\\yjs\\2024_3_11\\VAND-APRIL-GAN\\qxjc\\train\\mask\\denoised_images", image_path))
    anomaly_images.append(image)

# 创建一个空的mask
mask = np.zeros_like(normal_images[0][:, :, 0])


# 对每张正常图片叠加异常图片，并更新mask
combined_images = []
masks = []

ans=0

for i in range(20):
    for normal_image in normal_images:
        # print(ans)
        # ans+=1
        # 随机选择一张异常图片
        anomaly_image = anomaly_images[np.random.randint(len(anomaly_images))]
        if ans == 165:
            print(anomaly_image)

        # 叠加异常图片到正常图片上，并更新mask
        combined_image, mask = overlay_anomaly(normal_image, anomaly_image, mask)

        if combined_image is not None:
            combined_images.append(combined_image)
            masks.append(mask)

        # combined_images.append(combined_image)
        # masks.append(mask)

# 保存叠加后的图像和对应的mask
for i, (image, image_mask) in enumerate(zip(combined_images, masks)):
    cv2.imwrite("D:\\xdu\\yjs\\2024_3_11\\VAND-APRIL-GAN\\qxjc\\train\\zh\\{}.jpg".format(i), image)
    cv2.imwrite("D:\\xdu\\yjs\\2024_3_11\\VAND-APRIL-GAN\\qxjc\\train\\gt\\{}_mask.jpg".format(i), image_mask)