预刷数据脚本（适用于实例分割、目标检测）

unicorn832

已于 2024-08-31 15:47:43 修改

阅读量293

点赞数 5

文章标签：目标检测人工智能计算机视觉

于 2024-08-26 17:03:06 首次发布

本文链接：https://blog.csdn.net/weixin_70267235/article/details/141567809

版权

标记数据是一件枯燥且累的事情，但是训练模型需要大量的数据，此时我们可以利用模型预刷一些数据，利用模型迭代数据，再利用数据迭代模型...

ultralytics-main\ultralytics\cfg\default.yaml这里有一个标识位，save_txt，将这个标志位设为True，就会将检测到的目标的类别和位置信息保存为YOLO格式的信息（txt）。txt转xml和txt转json的脚本如下：

txt2xml

from xml.dom.minidom import Document
import os
import cv2


def makexml(picPath, txtPath, xmlPath):  # txt所在文件夹路径，xml文件保存路径，图片所在文件夹路径
    """此函数用于将yolo格式txt标注文件转换为voc格式xml标注文件"""
    dic = {'0': "empty",  # 创建字典用来对类型进行转换
           '1': "full"}

    files = os.listdir(txtPath)
    for i, name in enumerate(files):
        xmlBuilder = Document()
        annotation = xmlBuilder.createElement("annotation")  # 创建annotation标签
        xmlBuilder.appendChild(annotation)

        txtFile = open(os.path.join(txtPath, name), 'r')
        txtList = txtFile.readlines()
        imgPath = os.path.join(picPath, name[0:-4] + ".jpg")
        img = cv2.imread(imgPath)
        Pheight, Pwidth, Pdepth = img.shape

        folder = xmlBuilder.createElement("folder")  # folder标签
        foldercontent = xmlBuilder.createTextNode("driving_annotation_dataset")
        folder.appendChild(foldercontent)
        annotation.appendChild(folder)  # folder标签结束

        filename = xmlBuilder.createElement("filename")  # filename标签
        filenamecontent = xmlBuilder.createTextNode(name[0:-4] + ".jpg")
        filename.appendChild(filenamecontent)
        annotation.appendChild(filename)  # filename标签结束

        size = xmlBuilder.createElement("size")  # size标签
        width = xmlBuilder.createElement("width")  # size子标签width
        widthcontent = xmlBuilder.createTextNode(str(Pwidth))
        width.appendChild(widthcontent)
        size.appendChild(width)  # size子标签width结束

        height = xmlBuilder.createElement("height")  # size子标签height
        heightcontent = xmlBuilder.createTextNode(str(Pheight))
        height.appendChild(heightcontent)
        size.appendChild(height)  # size子标签height结束

        depth = xmlBuilder.createElement("depth")  # size子标签depth
        depthcontent = xmlBuilder.createTextNode(str(Pdepth))
        depth.appendChild(depthcontent)
        size.appendChild(depth)  # size子标签depth结束

        annotation.appendChild(size)  # size标签结束

        for j in txtList:
            oneline = j.strip().split(" ")
            object = xmlBuilder.createElement("object")  # object 标签
            picname = xmlBuilder.createElement("name")  # name标签
            namecontent = xmlBuilder.createTextNode(dic[oneline[0]])
            picname.appendChild(namecontent)
            object.appendChild(picname)  # name标签结束

            pose = xmlBuilder.createElement("pose")  # pose标签
            posecontent = xmlBuilder.createTextNode("Unspecified")
            pose.appendChild(posecontent)
            object.appendChild(pose)  # pose标签结束

            truncated = xmlBuilder.createElement("truncated")  # truncated标签
            truncatedContent = xmlBuilder.createTextNode("0")
            truncated.appendChild(truncatedContent)
            object.appendChild(truncated)  # truncated标签结束

            difficult = xmlBuilder.createElement("difficult")  # difficult标签
            difficultcontent = xmlBuilder.createTextNode("0")
            difficult.appendChild(difficultcontent)
            object.appendChild(difficult)  # difficult标签结束

            bndbox = xmlBuilder.createElement("bndbox")  # bndbox标签
            xmin = xmlBuilder.createElement("xmin")  # xmin标签
            mathData = int(((float(oneline[1])) * Pwidth + 1) - (float(oneline[3])) * 0.5 * Pwidth)
            xminContent = xmlBuilder.createTextNode(str(mathData))
            xmin.appendChild(xminContent)
            bndbox.appendChild(xmin)  # xmin标签结束

            ymin = xmlBuilder.createElement("ymin")  # ymin标签
            mathData = int(((float(oneline[2])) * Pheight + 1) - (float(oneline[4])) * 0.5 * Pheight)
            yminContent = xmlBuilder.createTextNode(str(mathData))
            ymin.appendChild(yminContent)
            bndbox.appendChild(ymin)  # ymin标签结束

            xmax = xmlBuilder.createElement("xmax")  # xmax标签
            mathData = int(((float(oneline[1])) * Pwidth + 1) + (float(oneline[3])) * 0.5 * Pwidth)
            xmaxContent = xmlBuilder.createTextNode(str(mathData))
            xmax.appendChild(xmaxContent)
            bndbox.appendChild(xmax)  # xmax标签结束

            ymax = xmlBuilder.createElement("ymax")  # ymax标签
            mathData = int(((float(oneline[2])) * Pheight + 1) + (float(oneline[4])) * 0.5 * Pheight)
            ymaxContent = xmlBuilder.createTextNode(str(mathData))
            ymax.appendChild(ymaxContent)
            bndbox.appendChild(ymax)  # ymax标签结束

            object.appendChild(bndbox)  # bndbox标签结束

            annotation.appendChild(object)  # object标签结束

        xmlFilePath = os.path.join(xmlPath, name[0:-4] + ".xml")
        f = open(xmlFilePath, 'w')
        xmlBuilder.writexml(f, indent='\t', newl='\n', addindent='\t', encoding='utf-8')
        f.close()


if __name__ == "__main__":
    picPath =   # 图片所在文件夹路径，后面的/一定要带上
    txtPath =   # txt所在文件夹路径，后面的/一定要带上
    xmlPath =   # xml文件保存路径，后面的/一定要带上
    if not os.path.exists(xmlPath):
        os.makedirs(xmlPath)
    makexml(picPath, txtPath, xmlPath)

txt2json

import os
import json
import cv2

# 定义类别名称到ID的映射
class_names = ['ice']
class_to_id = {name: idx for idx, name in enumerate(class_names)}

# 定义输入和输出路径
label_path = ''
image_path = ''
output_path = ''

# 创建输出路径
os.makedirs(output_path, exist_ok=True)

# 获取所有图像文件
image_files = [f for f in os.listdir(image_path) if f.endswith('.jpg')]

# 遍历每个图像文件
for image_file in image_files:
    # 构建YOLO标签文件路径
    label_file = os.path.splitext(image_file)[0] + '.txt'
    label_file_path = os.path.join(label_path, label_file)

    # 读取YOLO标签文件
    try:
        with open(label_file_path, 'r') as f:
            lines = f.readlines()
    except:
        continue
    # 初始化JSON数据结构
    json_data = {
    "version": "5.4.1",
    "flags": {},
    "shapes": [],
    "imagePath": image_file,
    "imageData": None,
    "imageHeight": None,
    "imageWidth": None
    }

    # 读取图像尺寸
    image = cv2.imread(os.path.join(image_path, image_file))
    height, width, _ = image.shape

    # 更新JSON数据的高度和宽度
    json_data["imageHeight"] = height
    json_data["imageWidth"] = width

    # 处理每一行标签
    for line in lines:
        if line == '\n':
            continue
        data = line.strip().split()
        class_name = class_names[int(data[0])]
        points = list(map(float, data[1:]))
        # 将归一化的坐标转换为像素坐标
        points = [(int(point[0] * width), int(point[1] * height)) for point in zip(*[iter(points)]*2)]
        
        # 添加到JSON数据
        shape = {
        "label": class_name,
        "points": points,
        "group_id": None,
        "description": "",
        "shape_type": "polygon",
        "flags": {},
        "mask": None
        }
        json_data["shapes"].append(shape)

    # 写入JSON文件
    json_file = os.path.splitext(image_file)[0] + '.json'
    json_file_path = os.path.join(output_path, json_file)
    with open(json_file_path, 'w') as f:
        json.dump(json_data, f, indent=4)

print("Conversion completed.")

为了验证转完之后的结果正确与否，我们可以打开labelimg或者labelme，如果不想通过这种方式，也可以用下面的脚本直接在IDE中进行可视化：

vis_txt

import cv2
import os
import numpy as np

# 定义YOLO标签格式与颜色映射
classes = ['empty', 'full']

colors = [(0, 0, 255), (0, 255, 0)]

YOLO_LABELS = {}
for i, label in enumerate(classes):
    YOLO_LABELS[label] = colors[i]


def draw_boxes(image_path, label_path, output_dir):
    """
    读取图像和YOLO标签文件，绘制目标并在指定路径保存绘制后的图像。
    
    Args:
        image_path (str): 图像文件路径
        label_path (str): YOLO格式标签文件路径
        output_dir (str): 输出绘制后图像的目录
    """
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    img_name = os.path.splitext(os.path.basename(image_path))[0]
    output_path = os.path.join(output_dir, img_name + '.jpg')

    # 读取图像并获取其宽高
    img = cv2.imread(image_path)
    img_height, img_width = img.shape[:2]

    # 解析YOLO标签文件
    with open(label_path, 'r') as f:
        for line in f:
            line_split = line.strip().split(' ')
            label_id = int(line_split[0])
            x_center, y_center, w, h = map(float, line_split[1:])
            x_min = int((x_center - w / 2) * img_width)
            y_min = int((y_center - h / 2) * img_height)
            x_max = int((x_center + w / 2) * img_width)
            y_max = int((y_center + h / 2) * img_height)

            # 获取标签名称及对应颜色
            label_name = list(YOLO_LABELS.keys())[label_id]
            color = YOLO_LABELS[label_name]

            # 在图像上绘制矩形框
            cv2.rectangle(img, (x_min, y_min), (x_max, y_max), color=color, thickness=10)

            # # 可选：在矩形框内显示标签名称
            # cv2.putText(img, label_name, (x_min, y_min - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)

    # 保存绘制后的图像
    cv2.imwrite(output_path, img)


# 示例用法：
image_folder = ''
label_folder = ''
output_folder = ''
os.makedirs(output_folder, exist_ok=True)

for image_file in os.listdir(image_folder):
    if image_file.endswith(('.jpg', '.png')):
        image_path = os.path.join(image_folder, image_file)
        label_file = os.path.splitext(image_file)[0] + '.txt'
        label_path = os.path.join(label_folder, label_file)
        draw_boxes(image_path, label_path, output_folder)

print("Annotation process completed.")

vis_xml

import xml.etree.ElementTree as ET
import cv2
import os


colors = {"empty": (0, 0, 255),
          "full": (0, 255, 0)
          }

def visualize_xml_labels(xml_file, image_file, output_image_file):
    # 解析XML文件
    tree = ET.parse(xml_file)
    root = tree.getroot()

    # 加载图像
    image = cv2.imread(image_file)

    # 遍历XML文件中的对象
    for obj in root.findall('object'):
        # 获取类别标签
        name = obj.find('name').text

        # 获取边界框坐标
        bndbox = obj.find('bndbox')
        xmin = int(bndbox.find('xmin').text)
        ymin = int(bndbox.find('ymin').text)
        xmax = int(bndbox.find('xmax').text)
        ymax = int(bndbox.find('ymax').text)
        if name in ["empty", "full"]:
            color = colors[name]
            cv2.rectangle(image, (xmin, ymin), (xmax, ymax), color, 2)

        # 在图像上放置标签
        # cv2.putText(image, name, (xmin, ymin - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)

    # 保存带有标注的图像
    cv2.imwrite(output_image_file, image)


if __name__ == '__main__':
    
    xml_folder = ''
    image_folder = ''
    output_folder = ''
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    for xml_file_name in os.listdir(xml_folder):
        if xml_file_name.endswith('.xml'):
            # 构建完整的XML和图像文件路径
            xml_file_path = os.path.join(xml_folder, xml_file_name)
            image_file_name = os.path.splitext(xml_file_name)[0] + '.jpg'  # 假设图像扩展名为.jpg
            image_file_path = os.path.join(image_folder, image_file_name)
            
            # 构建输出图像文件路径
            output_image_file_path = os.path.join(output_folder, image_file_name)

            # 调用函数处理单个文件对
            visualize_xml_labels(xml_file_path, image_file_path, output_image_file_path)

    print("Visualization complete.")

vis_json

import os
import cv2
import json
import numpy as np

base_path = ''
path_list = [i.split('.')[0] for i in os.listdir(base_path) if 'json' in i]

for path in path_list:
    image_path = f'{base_path}/{path}.jpg'
    image = cv2.imread(image_path)
    h, w, c = image.shape
    label = np.zeros((h, w), dtype=np.uint8)

    with open(f'{base_path}/{path}.json') as f:
        mask = json.load(f)['shapes']

    for i in mask:
        points = np.array([np.array(j) for j in i['points']])
        label = cv2.fillPoly(label, [np.array(points, dtype=np.int32)], color=255)

    image = cv2.bitwise_and(image, image, mask=label)
    cv2.imwrite(image_path.split('.jpg')[0] + '_vis' + '.jpg', image)