YOLO数据集图片分割成小图片数据集
图片和标签的分割
import os
import cv2
from tqdm import tqdm
def get_imgs_pos(img_w, img_h, cut_w, cut_h, w_stride, h_stride):
imgs_pos = []
for beg_w in range(0, img_w, w_stride):
for beg_h in range(0, img_h, h_stride):
x0, y0 = beg_w, beg_h # 左上角的点
x1, y1 = beg_w + cut_w, beg_h + cut_h
if x1 > img_w: # x轴上超出图像边界
x1 = img_w
x0 = img_w - cut_w
if y1 > img_h: # y轴上超出图像边界
y1 = img_h
y0 = img_h - cut_h
imgs_pos.append([x0, y0, x1, y1])
if y1 == img_h: # 如果超出边界
break
return imgs_pos
def save_subimg(cv_img, pos, img_save_dir, img_name, idx):
x0, y0, x1, y1 = pos
crop_img = cv_img[y0:y1, x0:x1]
cv2.imwrite(os.path.join(img_save_dir, img_name[0:-4] + "_" + "{:04d}".format(idx) + ".jpg"), crop_img)
def save_sublabs(sub_labels, label_save_dir, img_name, idx):
lab_path = os.path.join(label_save_dir, img_name[0:-4] + "_" + "{:04d}".format(idx) + ".txt")
with open(lab_path, 'w') as fw:
for lab in sub_labels:
line = " ".join(str(num) for num in lab)
fw.write(line + "\n")
def read_labels(txt_path):
pos = []
with open(txt_path, 'r') as file_to_read:
while True:
lines = file_to_read.readline() # 整行读取数据
if not lines:
break
pass
p_tmp = [float(i) for i in lines.split(' ')]
pos.append(p_tmp) # 添加新读取的数据
pass
return pos
def get_sublabels(pos, labels, img_w, img_h, cut_w, cut_h):
x0, y0, x1, y1 = pos # 得到该子图在大图上的位置,左上角和右下角的坐标
sub_labs = []
for lab in labels:
cx, cy, w, h = lab[1] * img_w, lab[2] * img_h, lab[3] * img_w, lab[4] * img_h # 换算得到真实的中心点及宽高,注意第一个是标签的类别
if x0 < cx < x1 and y0 < cy < y1: # 如果该标签的中心点落到了子图上
# 如果当前的标签落到了子图像的边界上, 处理该标签在子图上的宽的问题
if cx - x0 < w / 2:
w = w / 2 + (cx - x0)
if x1 - cx < w / 2:
w = w / 2 + (x1 - cx)
# 如果当前的标签落到了子图像的边界上, 处理该标签在子图上的高的问题
if cy - y0 < h / 2:
h = h / 2 + (cy - y0)
if y1 - cy < h / 2:
h = h / 2 + (y1 - cy)
cx, cy = cx - x0, cy - y0 # 将当前的坐标换算到子图上(宽高不变,只是中心点的位置发生了改变)
sub_labs.append([int(lab[0]), cx / cut_w, cy / cut_h, w / cut_w, h / cut_h]) #重新归一化
return sub_labs
if __name__ == '__main__':
img_dir = r"C:\Users\Administrator\Desktop\stnplad_yolo\images\val" # 存放原图片数据集
img_list = os.listdir(img_dir)
img_save_dir = r"C:\Users\Administrator\Desktop\stnplad_yolo\images\valx" #分割的图片
label_save_dir = r"C:\Users\Administrator\Desktop\stnplad_yolo\labels\val" # 原标签文件,分割的标签文件,完成后自己提取
cut_w = 1920 # =1280*1.25
cut_h = 1080
w_stride = 1800
h_stride = 1000
count = 0
img_list = [file for file in os.listdir(img_dir) if file.endswith((".jpg", ".JPG"))]
print(img_list)
for img_name in tqdm(img_list):
sub_count = 0
if img_name.endswith((".jpg", ".JPG")):
img_path = os.path.join(img_dir, img_name)
txt_path = os.path.join(label_save_dir, img_name[:-4] + ".txt") # 构造标签文件路径
print(img_path)
print(txt_path)
if os.path.exists(txt_path): # 检查标签文件是否存在
labels = read_labels(txt_path) # 读取标签数据
# print(labels)
cv_img = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)
# # 读取图像
# image = cv2.imread(cv_img)
# # 调整图像大小为缩小的尺寸
# resized_image = cv2.resize(image, (0, 0), fx=0.01, fy=0.01)
# # 显示图像
# cv2.imshow('Image', image)
# cv2.waitKey(0)
# cv2.destroyAllWindows()
img_w, img_h = cv_img.shape[1], cv_img.shape[0]
print(img_w)
print(img_h)
if img_w > cut_w and img_h > cut_h: # 如果原图的大小是大于需要裁剪的图像大小
print(cut_w)
print(cut_h)
imgs_pos = get_imgs_pos(img_w, img_h, cut_w, cut_h, w_stride, h_stride)
print(imgs_pos)
if len(imgs_pos): # 如果原图像被拆分为了多个子图像
print(len(imgs_pos))
for idx, pos in enumerate(imgs_pos): # 逐个对所有子图像寻找其图上的子labels
sub_labels = get_sublabels(pos, labels, img_w, img_h, cut_w, cut_h) # 获取子图像上述修改的代码会在处理每个图像之前检查标签文件是否存在,如果存在,则会读取相应的标签数据,并在保存子图像的同时,保存相应的子图像标签文件。同时,需要确保您已经定义了 `read_labels` 函数来解析文本文件中的标签数据。
save_subimg(cv_img, pos, img_save_dir, img_name, idx) # 保存切割后的小图像
save_sublabs(sub_labels, label_save_dir, img_name, idx) # 保存小图像的标签文件
分割后的图片有一些图片没有标签文件,所以需要删除
标签文件大小为0的,图片全部删除,然后删除大小为0的标签文件
import os
def remove_images_without_labels(image_dir, label_dir):
images_deleted = 0
labels_deleted = 0
for image_file in os.listdir(image_dir):
image_path = os.path.join(image_dir, image_file)
label_file = image_file.replace(".jpg", ".txt")
label_path = os.path.join(label_dir, label_file)
if not os.path.exists(label_path):
os.remove(image_path)
images_deleted += 1
for label_file in os.listdir(label_dir):
label_path = os.path.join(label_dir, label_file)
image_file = label_file.replace(".txt", ".jpg")
image_path = os.path.join(image_dir, image_file)
if not os.path.exists(image_path):
os.remove(label_path)
labels_deleted += 1
return images_deleted, labels_deleted
# 指定图像和标签文件夹的路径
image_folder = r"C:\Users\Administrator\Desktop\datuxiaotu\images11"
label_folder = r"C:\Users\Administrator\Desktop\datuxiaotu\labels11"
# 调用函数删除没有标签的图像和相应的标签
deleted_images, deleted_labels = remove_images_without_labels(image_folder, label_folder)
print("已删除的图像数量:", deleted_images)
print("已删除的标签数量:", deleted_labels)
stnplad数据集分割的效果
标签数量还是2409
1 epochs completed in 0.002 hours.
Optimizer stripped from runs/train/exp24/weights/last.pt, 14.4MB
Optimizer stripped from runs/train/exp24/weights/best.pt, 14.4MB
Validating runs/train/exp24/weights/best.pt...
Fusing layers...
YOLOv5s summary: 157 layers, 7023610 parameters, 0 gradients, 15.8 GFLOPs
Class Images Instances P R mAP50 mAP50-95: 100%|██████████| 3/3 [00:01<00:00, 2.41it/s]
all 151 699 0.000154 0.0426 9.43e-05 1.84e-05
tower 151 65 0 0 0 0
insulator 151 61 0.000315 0.0656 0.00017 4.59e-05
spacer 151 61 0.000455 0.148 0.000301 4.62e-05
damper 151 497 0 0 0 0
plate 151 15 0 0 0 0
Results saved to runs/train/exp24
Validating runs/train/exp25/weights/best.pt...
Fusing layers...
YOLOv5s summary: 157 layers, 7023610 parameters, 0 gradients, 15.8 GFLOPs
Class Images Instances P R mAP50 mAP50-95: 100%|██████████| 6/6 [00:02<00:00, 2.05it/s]
all 377 1710 0.000281 0.0464 0.000158 4.31e-05
tower 377 101 0.000599 0.099 0.000333 9.46e-05
insulator 377 223 0.000605 0.0897 0.000346 0.000103
spacer 377 209 0.000203 0.0431 0.000113 1.81e-05
damper 377 1123 0 0 0 0
plate 377 54 0 0 0 0
Results saved to runs/train/exp25
图片变多效果好了一点
Epoch GPU_mem box_loss obj_loss cls_loss Instances Size
299/299 13.4G 0.0191 0.01417 0.0007366 204 640: 100%|██████████| 12/12 [00:04<00:00, 2.89it/s]
Class Images Instances P R mAP50 mAP50-95: 100%|██████████| 3/3 [00:00<00:00, 4.17it/s]
all 151 699 0.758 0.693 0.7 0.404
300 epochs completed in 0.418 hours.
Optimizer stripped from runs/train/exp26/weights/last.pt, 14.4MB
Optimizer stripped from runs/train/exp26/weights/best.pt, 14.4MB
Validating runs/train/exp26/weights/best.pt...
Fusing layers...
YOLOv5s summary: 157 layers, 7023610 parameters, 0 gradients, 15.8 GFLOPs
Class Images Instances P R mAP50 mAP50-95: 100%|██████████| 3/3 [00:01<00:00, 2.40it/s]
all 151 699 0.791 0.702 0.742 0.411
tower 151 65 0.841 0.323 0.501 0.205
insulator 151 61 0.71 0.672 0.719 0.412
spacer 151 61 0.801 0.79 0.779 0.512
damper 151 497 0.783 0.724 0.764 0.344
plate 151 15 0.818 1 0.947 0.581
Results saved to runs/train/exp26