裁剪YOLO标签 txt格式
好的,让我来逐个解释每个函数的作用:
read_darknet_labels(label_path)
: 这个函数的作用是读取 Darknet 格式的标签文件。它接受一个标签文件的路径作为输入,然后将文件中的内容解析成标签列表,并返回这个列表。每个标签由对象类别、中心点的 x 坐标、中心点的 y 坐标、宽度和高度组成。
首先打开指定路径下的标签文件并逐行读取内容。
每一行代表一个对象的标签信息,通常包括类别ID和四个边界框坐标(中心点坐标、宽度、高度),这些信息按空格分隔。
函数将每一行的内容分割后,将类别ID转换为整数,四个边界框坐标转换为浮点数,并将这些信息封装成一个新的列表结构。
最终返回一个二维列表,其中每个子列表表示一张图像上的一个物体的标签信息。
-
crop_objects(image_path, label_path, output_dir)
: 此函数根据Darknet格式的标签信息从原始图像中裁剪出物体区域。首先读取指定路径的图像文件。 然后调用read_darknet_labels()函数读取与该图像对应的标签信息 。 对于标签列表中的每一个物体,根据其边界框坐标计算出实际像素坐标(x1, y1, x2, y2),从而确定裁剪区域。 使用这些坐标从原图中裁剪出物体区域,并保存为新的PNG图片到指定的输出目录下,文件名格式为“原图像名_索引.png”。
-
denoise_images(image_dir, output_dir)
: 此函数对指定目录下的所有灰度图像进行去噪处理。遍历指定目录下的所有文件,对于每个图像文件,使用OpenCV的cv2.imread()函数以灰度模式读取图像数据。 应用基于非局部均值的快速去噪算法cv2.fastNlMeansDenoising()来去除图像噪声。 将去噪后的图像保存到指定的输出目录下,保持原有的文件名。
-
convert_to_masks(image_dir, output_dir)
: 这个函数的作用是将指定目录下的灰度图像转换为二值掩模(binary masks)。同样遍历指定目录下的所有图像文件,读取每个图像为灰度图像。 使用OpenCV的cv2.threshold()函数进行二值化处理,这里设置阈值为127,超过阈值的像素值设为255(白色),低于阈值的像素值设为0(黑色),生成二值图像或“mask”。 将得到的二值图像(即掩模)保存到指定的输出目录下,同样保持原有的文件名。
这些函数组合起来可以用于处理包含对象标签的图像数据集,包括裁剪对象、去噪和生成二值掩模等操作。
在主程序部分,按照如下三个步骤对数据集进行预处理:
根据Darknet标签裁剪出图像中的物体并保存;
对裁剪出的物体图像进行去噪处理;
将去噪后的物体图像进一步转换为二值掩模。在整个过程中,会自动创建所需的输出目录,并确保不会因为目录已存在而引发错误。
import cv2
import numpy as np
import os
# Function to read Darknet format labels
def read_darknet_labels(label_path):
with open(label_path, 'r') as f:
lines = f.readlines()
labels = [line.strip().split() for line in lines]
labels = [[int(label[0]), float(label[1]), float(label[2]), float(label[3]), float(label[4])] for label in labels]
return labels
# Function to crop objects from images based on Darknet labels
def crop_objects(image_path, label_path, output_dir):
image = cv2.imread(image_path)
labels = read_darknet_labels(label_path)
for i, label in enumerate(labels):
obj_class, x_center, y_center, width, height = label
img_height, img_width, _ = image.shape
x_center_pixel = int(x_center * img_width)
y_center_pixel = int(y_center * img_height)
w_pixel = int(width * img_width)
h_pixel = int(height * img_height)
x1 = max(0, x_center_pixel - w_pixel // 2)
y1 = max(0, y_center_pixel - h_pixel // 2)
x2 = min(img_width, x_center_pixel + w_pixel // 2)
y2 = min(img_height, y_center_pixel + h_pixel // 2)
cropped_img = image[y1:y2, x1:x2]
cv2.imwrite(os.path.join(output_dir, f"{os.path.basename(image_path)[:-4]}_{i}.png"), cropped_img)
# Function to apply denoising to images
def denoise_images(image_dir, output_dir):
for filename in os.listdir(image_dir):
img_path = os.path.join(image_dir, filename)
image = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
# Apply denoising
denoised_img = cv2.fastNlMeansDenoising(image, None, 10, 7, 21)
cv2.imwrite(os.path.join(output_dir, filename), denoised_img)
# Function to convert images to binary masks
def convert_to_masks(image_dir, output_dir):
for filename in os.listdir(image_dir):
img_path = os.path.join(image_dir, filename)
image = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
# Apply binary thresholding
_, binary_img = cv2.threshold(image, 127, 255, cv2.THRESH_BINARY)
cv2.imwrite(os.path.join(output_dir, filename), binary_img)
# Example usage
if __name__ == "__main__":
# Define paths
dataset_dir = "path_to_your_dataset_directory"
output_dir = "output_directory"
# Create output directories if not exist
os.makedirs(output_dir, exist_ok=True)
# Step 1: Crop objects based on Darknet labels
for filename in os.listdir(dataset_dir):
if filename.endswith(".txt"): # Assuming labels have .txt extension
image_path = os.path.join(dataset_dir, filename[:-4] + ".png") # Assuming images have .png extension
label_path = os.path.join(dataset_dir, filename)
crop_objects(image_path, label_path, output_dir)
# Step 2: Denoise cropped images
denoised_output_dir = os.path.join(output_dir, "denoised_images")
os.makedirs(denoised_output_dir, exist_ok=True)
denoise_images(output_dir, denoised_output_dir)
# Step 3: Convert denoised images to binary masks
mask_output_dir = os.path.join(output_dir, "binary_masks")
os.makedirs(mask_output_dir, exist_ok=True)
convert_to_masks(denoised_output_dir, mask_output_dir)
裁剪VOC标签 xml格式
解释与上述yolo相同
import cv2
import numpy as np
import os
import random
import xml.etree.ElementTree as ET
# Function to read XML format labels
def read_xml_labels(xml_path):
tree = ET.parse(xml_path)
root = tree.getroot()
labels = []
for obj in root.findall('object'):
obj_name = obj.find('name').text
bbox = obj.find('bndbox')
xmin = int(bbox.find('xmin').text)
ymin = int(bbox.find('ymin').text)
xmax = int(bbox.find('xmax').text)
ymax = int(bbox.find('ymax').text)
labels.append([obj_name, xmin, ymin, xmax, ymax])
return labels
# Function to crop objects from images based on XML labels
def crop_objects(image_path, xml_path, output_dir):
image = cv2.imread(image_path)
if image is None:
print(f"Failed to read image at path: {image_path}")
return
labels = read_xml_labels(xml_path)
for i, label in enumerate(labels):
obj_name, xmin, ymin, xmax, ymax = label
cropped_img = image[ymin:ymax, xmin:xmax]
out_png = os.path.join(output_dir, f"{os.path.basename(image_path)[:-4]}_{i}.png")
out_png = out_png.replace("\\", "/")
cv2.imwrite(out_png, cropped_img)
# Function to apply denoising to images
def denoise_images(image_dir, output_dir):
for filename in os.listdir(image_dir):
if filename.endswith('.png'):
img_path = os.path.join(image_dir, filename)
img_path = img_path.replace("\\", "/")
image = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
if image is None:
print(f"Failed to read image at path: {img_path}")
continue
# Apply denoising
denoised_img = cv2.fastNlMeansDenoising(image, None, 10, 7, 21)
output_path = os.path.join(output_dir, filename)
# cv2.imwrite(os.path.join(output_dir, filename), denoised_img)
output_path = output_path.replace("\\", "/")
cv2.imwrite(output_path, denoised_img)
# Function to convert images to binary masks
def convert_to_masks(image_dir, output_dir):
for filename in os.listdir(image_dir):
img_path = os.path.join(image_dir, filename)
img_path = img_path.replace("\\", "/")
image = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
# Apply binary thresholding
_, binary_img = cv2.threshold(image, 127, 255, cv2.THRESH_BINARY)
output_dir1 = os.path.join(output_dir, filename)
output_dir1 = output_dir1.replace("\\", "/")
cv2.imwrite(output_dir1, binary_img)
# Function to apply transformations to the mask
# Function to combine disk image and mask
def combine_images(disk_img, mask):
# Example: Combine disk image and mask using bitwise operations
# You can modify this function to achieve desired combination
combined_img = cv2.bitwise_and(disk_img, disk_img, mask=mask)
return combined_img
# Example usage
if __name__ == "__main__":
# Define paths
dataset_dir = "D:/xdu/标签/cxjc/shujuzq/scratch"
output_dir = "D:/xdu/标签/cxjc/shujuzq/res"
# Create output directories if not exist
os.makedirs(output_dir, exist_ok=True)
# Step 1: Crop objects based on XML labels
for filename in os.listdir(dataset_dir):
if filename.endswith(".xml"): # Assuming labels have .xml extension
image_path = os.path.join(dataset_dir, filename[:-4] + ".png") # Assuming images have .png extension
image_path = image_path.replace("\\", "/")
xml_path = os.path.join(dataset_dir, filename)
xml_path = xml_path.replace("\\", "/")
crop_objects(image_path, xml_path, output_dir)
# Step 2: Denoise cropped images
denoised_output_dir = os.path.join(output_dir, "denoised_images")
denoised_output_dir = denoised_output_dir.replace("\\", "/")
os.makedirs(denoised_output_dir, exist_ok=True)
denoise_images(output_dir, denoised_output_dir)
# Step 3: Convert denoised images to binary masks
mask_output_dir = os.path.join(output_dir, "binary_masks")
mask_output_dir = mask_output_dir.replace("\\", "/")
os.makedirs(mask_output_dir, exist_ok=True)
convert_to_masks(denoised_output_dir, mask_output_dir)
将裁切下来的标签结合到正常图片并生成mask
find_largest_circle 函数:
该函数接收一个由OpenCV HoughCircles算法得到的圆形列表(每个圆形表示为[x, y, r],其中x和y是圆心坐标,r是半径)。它遍历这个列表,比较每个圆的半径大小,当找到一个新的更大的半径时,更新最大半径及对应的最大圆。最终返回最大的圆及其半径。
find_smallest_circle_in_largest 函数:
这个函数用于在给定的圆形列表中寻找最大的圆(largest_circle参数)内包含的所有圆中,找出半径最小的那个圆。函数同样遍历整个圆形列表,对于每一个圆,检查其是否位于最大圆内(通过调用is_circle_inside函数判断),并且半径小于当前已知的最小半径,如果是,则更新最小半径及其对应的圆。最后返回这个最小的圆及其半径。
is_circle_inside 函数:
此函数用来判断一个圆(circle)是否位于另一个较大的圆(larger_circle)内部。计算两个圆心之间的距离平方,并将其与较大圆半径减去较小圆半径之后的差值平方做比较,如果前者小于等于后者,则表明较小的圆在较大的圆内。
detect_largest_circle 函数:
该函数主要用于读取、处理图像并检测图像中最大的圆。首先对输入图像进行预处理,包括缩小、转为灰度图、模糊处理等操作,然后使用HoughCircles方法检测图像中的所有圆。接下来查找并返回最大的圆及其半径。
random_transform 函数:
此函数用于对输入的图像进行随机变换,包括随机缩放(尺度在0.5至1.5之间)和随机旋转(角度在-30度至30度之间)。
overlay_anomaly 函数:
该函数的主要目的是将经过随机变换的异常图anomaly_image)叠加到正常图像(normal_image)上,并生成对应的mask。首先对异常图像进行随机变换,然后检测正常图像中的最大圆,依据最大圆的位置和大小确定异常图像放置的位置。在放置过程中,确保异常图像位于最大圆内且不与小圆(假设存在)重叠。如果重叠,则重新调整异常图像的位置。最后,将异常图像叠加到正常图像上,并更新mask,以便记录异常区域。同时,此函数也包含了读取和处理正常图片和异常图片数据集的部分逻辑。
import cv2
import numpy as np
import os
def find_largest_circle(circles):
max_radius = 0
max_circle = None
for circle in circles[0]:
x, y, r = circle
if r > max_radius:
max_radius = r
max_circle = circle
return max_circle, max_radius
def find_smallest_circle_in_largest(circles, largest_circle):
max_radius = largest_circle[2]
min_radius = np.inf
min_circle = None
for circle in circles[0]:
x, y, r = circle
if r < max_radius and r < min_radius and is_circle_inside(circle, largest_circle):
min_radius = r
min_circle = circle
return min_circle, min_radius
def is_circle_inside(circle, larger_circle):
x, y, r = circle
lx, ly, lr = larger_circle
distance_squared = (x - lx) ** 2 + (y - ly) ** 2
return distance_squared <= (lr - r) ** 2
def detect_largest_circle(img):
# img = cv2.imread(image_path)
img = cv2.resize(img, None, fx=0.5, fy=0.5)
GrayImage = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
GrayImage = cv2.blur(GrayImage, (7, 7)) # 均值滤波 滤除背景噪声
circles = cv2.HoughCircles(GrayImage, cv2.HOUGH_GRADIENT, 1, minDist=20, param1=70, param2=50, minRadius=20,
maxRadius=150)
img = cv2.resize(img, None, fx=2, fy=2)
circles *= 2
if circles is not None:
largest_circle, largest_radius = find_largest_circle(circles)
# smallest_circle_in_largest, smallest_radius = find_smallest_circle_in_largest(circles, largest_circle)
return largest_circle, largest_radius
else:
return None, None
def random_transform(image):
# 随机缩放
scale = np.random.uniform(0.5, 1.5)
h, w = image.shape[:2]
image = cv2.resize(image, (int(w * scale), int(h * scale)))
# 随机旋转
angle = np.random.uniform(-30, 30)
center = (w // 2, h // 2)
M = cv2.getRotationMatrix2D(center, angle, 1.0)
image = cv2.warpAffine(image, M, (w, h))
return image
def overlay_anomaly(normal_image, anomaly_image, mask):
# print(123)
# 获取正常图片和异常图片的尺寸
normal_h, normal_w, _ = normal_image.shape
anomaly_h, anomaly_w, _ = anomaly_image.shape
# 对异常图片进行随机变换
transformed_anomaly = random_transform(anomaly_image)
anomaly_h, anomaly_w, _ = transformed_anomaly.shape
# 如果异常图片大小为0,则跳过当前异常图片
if transformed_anomaly.shape[0] == 0 or transformed_anomaly.shape[1] == 0:
print("异常图片大小为0,跳过当前异常图片")
return None, mask
# 检测同心圆
# circles = detect_circles(normal_image)
center , r =detect_largest_circle(normal_image)
# print(circles)
if r is not None:
# 取同心圆的最外层圆
# center_x, center_y, radius = circles[-1]
center_x = center[0]
center_y = center[1]
print(center)
radius = r
print(radius)
r2 = 70
# 计算异常图片应该贴在正常图片的位置
x = int(center_x - anomaly_w / 2)
y = int(center_y - anomaly_h / 2)
# 确保异常图片的位置在图像范围内
x = max(0, min(x, normal_w - anomaly_w))
y = max(0, min(y, normal_h - anomaly_h))
# 计算异常图片贴在同心圆内的位置
min_x = max(0, x)
min_y = max(0, y)
max_x = min(normal_w - anomaly_w, x)
max_y = min(normal_h - anomaly_h, y)
# 计算大圆内小圆外的位置范围
inner_circle_x_min = center_x - r2
inner_circle_x_max = center_x + r2
inner_circle_y_min = center_y - r2
inner_circle_y_max = center_y + r2
# 确保异常图片贴在大圆内小圆外的位置
if x + anomaly_w >= inner_circle_x_min and y + anomaly_h >= inner_circle_y_min and x <= inner_circle_x_max and y <= inner_circle_y_max:
# 异常图片和小圆有重叠,需要调整位置
if x < center_x:
x = center_x - r2 - anomaly_w
if x > center_x:
x = center_x + r2 + anomaly_w
if y < center_y:
y = center_y - r2 - anomaly_h
if y > center_y:
y = center_y + r2 + anomaly_h
else:
# 异常图片没有与小圆重叠,随机选择位置
print("重叠")
x = np.random.randint(min_x, max_x + 1)
y = np.random.randint(min_y, max_y + 1)
x = int(x)
y = int(y)
# 创建一个全黑的背景
background = np.zeros_like(normal_image)
# 调整异常图片的大小以匹配正常图片
transformed_anomaly = cv2.resize(transformed_anomaly, (anomaly_w, anomaly_h))
print(transformed_anomaly.shape)
if transformed_anomaly.shape[0]== 0 or transformed_anomaly.shape[1] == 0 or transformed_anomaly.shape[2] == 0:
print("异常图片大小为0")
return None, mask
if x==0 or y==0:
print("异常图片位置为0")
return None, mask
# 获取调整后的异常图片的尺寸
anomaly_h, anomaly_w, _ = transformed_anomaly.shape
# 将异常图片叠加到背景上
background[y:y+transformed_anomaly.shape[0], x:x+transformed_anomaly.shape[1]] += transformed_anomaly
# 更新mask,将异常区域标记为1
# mask[y:y+transformed_anomaly.shape[0], x:x+transformed_anomaly.shape[1]] = 1
# mask_1 =np.maximum(transformed_anomaly, background)
# 将正常图片叠加到背景上
result_image = np.maximum(normal_image, background)
return result_image, background
else:
print("未检测到同心圆")
return None, mask
# 读取正常图片数据集
normal_images = []
normal_image_paths = os.listdir("D:\\xdu\\yjs\\2024_3_11\\VAND-APRIL-GAN\\qxjc\\train\\ok")
for image_path in normal_image_paths:
image = cv2.imread(os.path.join("D:\\xdu\\yjs\\2024_3_11\\VAND-APRIL-GAN\\qxjc\\train\\ok", image_path))
normal_images.append(image)
# 读取异常图片数据集
anomaly_images = []
anomaly_image_paths = os.listdir("D:\\xdu\\yjs\\2024_3_11\\VAND-APRIL-GAN\\qxjc\\train\\mask\\denoised_images")
for image_path in anomaly_image_paths:
image = cv2.imread(os.path.join("D:\\xdu\\yjs\\2024_3_11\\VAND-APRIL-GAN\\qxjc\\train\\mask\\denoised_images", image_path))
anomaly_images.append(image)
# 创建一个空的mask
mask = np.zeros_like(normal_images[0][:, :, 0])
# 对每张正常图片叠加异常图片,并更新mask
combined_images = []
masks = []
ans=0
for i in range(20):
for normal_image in normal_images:
# print(ans)
# ans+=1
# 随机选择一张异常图片
anomaly_image = anomaly_images[np.random.randint(len(anomaly_images))]
if ans == 165:
print(anomaly_image)
# 叠加异常图片到正常图片上,并更新mask
combined_image, mask = overlay_anomaly(normal_image, anomaly_image, mask)
if combined_image is not None:
combined_images.append(combined_image)
masks.append(mask)
# combined_images.append(combined_image)
# masks.append(mask)
# 保存叠加后的图像和对应的mask
for i, (image, image_mask) in enumerate(zip(combined_images, masks)):
cv2.imwrite("D:\\xdu\\yjs\\2024_3_11\\VAND-APRIL-GAN\\qxjc\\train\\zh\\{}.jpg".format(i), image)
cv2.imwrite("D:\\xdu\\yjs\\2024_3_11\\VAND-APRIL-GAN\\qxjc\\train\\gt\\{}_mask.jpg".format(i), image_mask)