用于yolov7 yolov5及相同标签格式的数据集的人工检查脚本

最新推荐文章于 2024-04-23 23:05:21 发布

qiong_学海无涯

最新推荐文章于 2024-04-23 23:05:21 发布

阅读量129

点赞数 1

文章标签： YOLO

本文链接：https://blog.csdn.net/m0_73118044/article/details/132721811

版权

由于网上查找的数据集会存在一些不合理的数据或者标签，自己又不想外包给别人检查。所以我写了一个能够自己检查数据集的脚本。需要自己修改的地方存在注释 #需要自己修改代码如下:

import shutil
import numpy as np
import cv2
import torch

#坐标转换，原始存储的是YOLOv5格式
# Convert nx4 boxes from [x, y, w, h] normalized to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
def xywh2xyxy(lb, w1, h1, img):
    for x in lb:
        # 需要自己修改
        labels = ['no mask', 'mask']# 根据标签数量和名称而定 两个种类标签就是 ['xxx1','xxx2'] 三个就是['xxx1','xxx2','xxx3']
        label, x, y, w, h = x
        print("原图宽高:\nw1={}\nh1={}".format(w1, h1))
        #边界框反归一化
        x_t = x*w1
        y_t = y*h1
        w_t = w*w1
        h_t = h*h1
        print("反归一化后输出：\n第一个:{}\t第二个:{}\t第三个:{}\t第四个:{}\t\n\n".format(x_t,y_t,w_t,h_t))

        #计算坐标
        top_left_x = x_t - w_t / 2
        top_left_y = y_t - h_t / 2
        bottom_right_x = x_t + w_t / 2
        bottom_right_y = y_t + h_t / 2
        print('标签:{}'.format(labels[int(label)]))
        print("左上x坐标:{}".format(top_left_x))
        print("左上y坐标:{}".format(top_left_y))
        print("右下x坐标:{}".format(bottom_right_x))
        print("右下y坐标:{}".format(bottom_right_y))

        # 绘图  rectangle()函数需要坐标为整数 可以根据标签种类数量加if语句 需要自己修改
        if int(label)==0:
            cv2.rectangle(img, (int(top_left_x), int(top_left_y)), (int(bottom_right_x), int(bottom_right_y)), (0, 255, 0), 2)
        if int(label)==1:
            cv2.rectangle(img, (int(top_left_x), int(top_left_y)), (int(bottom_right_x), int(bottom_right_y)), (255, 255, 40), 2)
        #...
        #if int(label)==n:
            #cv2.rectangle(img, (int(top_left_x), int(top_left_y)), (int(bottom_right_x), int(bottom_right_y)), 颜色, 2)
    cv2.imshow('show', img)
    cv2.waitKey(0)  # 按键结束
    cv2.destroyAllWindows()


import os
image_path = 'E:\\wangpan\\all_mask\\all_mask\\' #要检查的数据集的图片文件夹 需要自己修改
label_path= 'E:\\wangpan\\all_mask\\all_mask_label\\'#要检查的数据集的标签文件夹 需要自己修改
save_image='E:\\wangpan\\all_mask\\train_my_neet\\images\\'#要保留的数据集的图片文件夹 需要自己修改
save_label='E:\\wangpan\\all_mask\\train_my_neet\\labels\\'#要保留的数据集的标签文件夹 需要自己修改
not_save_image='E:\\wangpan\\all_mask\\train_not_neet\\images\\'#不保留的数据集的图片文件夹 需要自己修改
not_save_label='E:\\wangpan\\all_mask\\train_not_neet\\labels\\'#不保留的数据集的图片文件夹 需要自己修改

jia=os.listdir(image_path)
#读取 labels
for path in jia:
    path_la=path.split('.')
    #label=None
    with open(label_path+path_la[0]+".txt", 'r') as f:
        #label=f.read()

        lb = np.array([x.split() for x in f.read().strip().splitlines()], dtype=np.float32)  # labels
        print(lb)

    # 读取图像文件
    img = cv2.imread(str(image_path+path))
    save=np.copy(img)
    h, w = img.shape[:2]


    xywh2xyxy(lb, w, h, img)
    print("finally")
    l=input("输入1 并回车 保存    回车不保存")
    if l=="1":

        cv2.imwrite(str(save_image+path), save)
        shutil.copyfile(label_path+path_la[0]+".txt", save_label+path_la[0]+".txt")
        os.remove(str(image_path+path))# 删除原文件夹里的数据 使下次启动程序不会包含已经检查过的数据
        os.remove(label_path+path_la[0]+".txt")# 删除原文件夹里的数据 使下次启动程序不会包含已经检查过的数据
        print("finally save")
    else:
        cv2.imwrite(str(not_save_image+path), save)
        shutil.copyfile(label_path+path_la[0]+".txt", not_save_label+path_la[0]+".txt")

        os.remove(str(image_path+path)) # 删除原文件夹里的数据 使下次启动程序不会包含已经检查过的数据
        os.remove(label_path+path_la[0]+".txt")# 删除原文件夹里的数据 使下次启动程序不会包含已经检查过的数据


        print("Not save")
        pass

启动后会加载图片并画出检测框供你检查，如图1:

图1-检查窗口

任意键后会关闭检查窗口，会出现提示(红圈)，如图2:

图2

之后输入1并回车就可以保存图片和标签到你需要保存的数据集的文件夹：

save_image='E:\\wangpan\\all_mask\\train_my_neet\\images\\'#要保留的数据集的图片文件夹

save_label='E:\\wangpan\\all_mask\\train_my_neet\\labels\\'#要保留的数据集的标签文件夹

输入回车就会保存到你指定的另外两个不需要的数据集的文件夹：

not_save_image='E:\\wangpan\\all_mask\\train_not_neet\\images\\'#不保留的数据集的图片文件夹

not_save_label='E:\\wangpan\\all_mask\\train_not_neet\\labels\\'#不保留的数据集的图片文件夹

画检测框算法采用此链接文章：Python 读取YOLO标签并在图像上展示_lbp python yolo_JenKinJia的博客-CSDN博客

后来发现图片保存出现问题报错Premature end of JPEG file 使用以下代码可以解决：

import cv2
import os
 
def read_and_save_images(folder_path, save_folder):
    # 检查保存文件夹是否存在，如果不存在则创建
    if not os.path.exists(save_folder):
        os.makedirs(save_folder)
 
    # 获取文件夹中的所有文件
    file_list = os.listdir(folder_path)
    
    for filename in file_list:
        img_path = os.path.join(folder_path, filename)
        # 读取图片
        img = cv2.imread(img_path)
        
        if img is not None:
            # 保存图片
            save_path = os.path.join(save_folder, filename)
            cv2.imwrite(save_path, img)
            print(f'Saved image: {save_path}')
        else:
            print(f'Failed to read image: {img_path}')
 
# 指定文件夹路径和保存文件夹路径
folder_path = 'your path'
save_folder = 'your save path'
 
read_and_save_images(folder_path, save_folder)

此代码会将破损图片修复并保存到 'your save path' 路径下。

修复的代码忘记是哪篇文章了，如果你们看到了可以评论告诉我，我补链接到此文。