Easydata标注的数据集格式转为Cityscapes数据集格式（个人记录）

阿兹卡班首席

已于 2024-03-15 18:20:16 修改

阅读量953

点赞数 21

分类专栏：数据集转格式文章标签： python vscode

于 2024-03-15 11:30:33 首次发布

本文链接：https://blog.csdn.net/weixin_61044335/article/details/136733340

版权

数据集转格式专栏收录该内容

1 篇文章 0 订阅

订阅专栏

1.获取数据集

在 EasyData智能数据服务平台中，注册账号，标注数据，导出标注好的文件

2.将其中的jpg和json文件分类为两个文件夹

因easydata导出的文件jpg和json是混在一起的，因此要先将它们分类

在同路径下新建easydata_jpg文件、easydata_json文件、labelme_json文件

import os
import shutil

def move_files(source_dir, jpg_dest_dir, json_dest_dir):
    for file_name in os.listdir(source_dir):
        if file_name.endswith(".jpg"):
            source_path = os.path.join(source_dir, file_name)
            dest_path = os.path.join(jpg_dest_dir, file_name)
            shutil.copyfile(source_path, dest_path)
        elif file_name.endswith(".json"):
            source_path = os.path.join(source_dir, file_name)
            dest_path = os.path.join(json_dest_dir, file_name)
            shutil.copyfile(source_path, dest_path)

# 指定源文件夹和目标文件夹
source_directory = "/home/lxy/Desktop/test/2024.3.15/easydata/"
jpg_destination_directory = "/home/lxy/Desktop/test/2024.3.15/easydata_jpg"
json_destination_directory = "/home/lxy/Desktop/test/2024.3.15/easydata_json"

# 调用函数移动文件
move_files(source_directory, jpg_destination_directory, json_destination_directory)

3.将Easydata格式的json文件转为labelme格式的json文件

在代码最后的easydata_dir、json_folder、out_dir中修改文件路径为自己路径

在同路径下新建labelme_png文件、labelme_txt文件


import argparse
import json
import pycocotools.mask as mask_util
import cv2
import numpy as np
from imantics import Mask
import os
from tqdm import tqdm

# easyData数据,平台格式默认格式json，转换成labelme的json

# class DecodeError(Exception):
#     pass

def mask2polygons(mask):
    output = []
    # 得到掩码对应的全部像素点
    polygons_list = (Mask(mask).polygons()).points

    # 对像素点进行均匀采样生成多边形边界框
    for polygons in polygons_list:
        saved_length = 15 if len(polygons) > 300 else 10 if len(polygons) > 200 else 5 \
            if len(polygons) > 100 else 2 if len(polygons) > 50 else 1

        polygons = np.concatenate((polygons[::saved_length], polygons[-1:]))
        output.append(polygons.tolist())
    return output[0]


def easydata2labelme(img_path, json_path, out_dir):
    """
    :param img_path: 待转换的图片路径
    :param json_path: Easydata导出的json文件路径
    :param out_dir: 转换后的json文件路径
    :return:
    """
    if not os.path.exists(img_path):
        print(img_path + " is not exists!")
        return
    if not os.path.exists(json_path):
        print(json_path + " is not exists!")
        return
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)

    with open(json_path, 'r', encoding='utf8')as fp:
        results = json.load(fp)

    ori_img = cv2.imread(img_path).astype(np.float32)
    mask_np = np.zeros_like(ori_img, dtype=np.uint8)
    height, width = ori_img.shape[:2]
    data = {}
    # labels = {
    #     '限速标志': 'speed_limit',
    #     '解除限速标志': 'speed_unlimit',
    #     '车道': 'lane',
    #     '绿灯': 'green',
    #     '红灯': 'red',
    #     '黄灯': 'yellow'
    # }

    labels = {
        'grass': 'grass',
        'ignore': '1',
        'shrubs': '2',
        'weed': '3',
        'hay': '4'
    }
    # labels = {
    #     'grass_normal': '0',
    #     'grass_shadow': '1',
    #     'hay_normal': '2',
    #     'hay_shadow': '3',
    #     'green': '4',
    #     'other': '5'
    # }
    # 版本号对应的是环境中安装的labelme的版本
    data["version"] = "5.0.1"
    data["flags"] = {}
    data["shapes"] = []

    for item in results['labels']:
        # Draw bbox
        if item['name'] not in labels:
            label = '1'
        else:
            label = labels[item['name']]  # 所属类别
        points = []
        shape_type = item['shape']
        if shape_type == "brush":
            # Draw mask
            rle_obj = {"counts": item['mask'],
                       "size": [height, width]}
            # 有可能会出现解码错误，需要try一下
            mask = mask_util.decode(rle_obj)
            # mask_np[mask > 0] = 255
            # # 转换为灰度图像
            # gray_mask = cv2.cvtColor(mask_np, cv2.COLOR_BGR2GRAY)
            # # 对掩码应用阈值，获取二值化图像
            # _, thresh = cv2.threshold(gray_mask, 127, 255, cv2.THRESH_BINARY)
            # # 找到轮廓
            # contours, hierarchy = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
            # # 创建一个空列表来存储多个轮廓列表
            # contours_list = []
            # # 对每个轮廓进行判断，并存储到轮廓列表中
            # for contour in contours:
            #     # 计算轮廓的面积
            #     area = cv2.contourArea(contour)
            #     if area > 10:
            #         # 在此进行其他判断条件，如果满足则将轮廓列表添加到contours_list中
            #         # 例如，使用轮廓的面积来判断
            #         contours_list.append(contour)
            # # 对每个轮廓列表进行循环，写入不同的shape
            # for i, contour in enumerate(contours_list):
            #     contour_list = np.squeeze(contour).tolist()
            #     points.extend(contour)
            #     # 创建一个shape字典
            #     shapes = {}
            #     shapes["label"] = label
            #     shapes["points"] = contour_list  # 使用extend()拼接轮廓的点坐标
            #     shapes["group_id"] = None
            #     shapes["shape_type"] = "polygon"
            #     shapes["flags"] = {}
            #
            #     # 将shape字典添加到data["shapes"]中
            #     data["shapes"].append(shapes)

            # 直接对单区域转换
            points = mask2polygons(mask)

            # continue
        elif shape_type == "polygon":
            ori_points = item["meta"]["points"]  # 列表嵌套字典
            points = []
            for idx in ori_points:
                ls = []
                x = idx["x"] if 0 <= idx["x"] <= width else (0 if idx["x"] < 0 else width)
                y = idx["y"] if 0 <= idx["y"] <= height else (0 if idx["y"] < 0 else height)
                ls.append(x)
                ls.append(y)
                points.append(ls)
        elif shape_type == "circle":
            ori_points = item["meta"]
            points = []
            radius = ori_points["radius"]
            center_x = ori_points["center"]["x"]
            center_y = ori_points["center"]["y"]
            points.append([center_x, center_y])
            points.append([center_x + radius, center_y])

        shapes = {}
        shapes["label"]  = label
        shapes["points"] = points
        shapes["group_id"] = None
        shapes["shape_type"] = shape_type
        shapes["flags"] = {}

        data["shapes"].append(shapes)

    # data["imagePath"] = '.' + img_path
        data["imagePath"] = img_path
    data["imageData"] = None
    data["imageHeight"] = height
    data["imageWidth"] = width

    json_name = json_path.split('/')[-1]
    out_path = os.path.join(out_dir, json_name)
    with open(out_path, 'w') as f:
        json.dump(data, f)


def main():

    # easydata_dir = r"/home/ght/xbh/imgae_test/area_test"
    # out_dir = r"/home/ght/xbh/imgae_test/area_test/labelme_json"
    easydata_dir = r"/home/lxy/Desktop/test/2024.3.15/easydata_jpg"
    json_folder = r"/home/lxy/Desktop/test/2024.3.15/easydata_json"
    out_dir = r"/home/lxy/Desktop/test/2024.3.15/labelme_json"
    # for循环拼接路径
    for path in tqdm(os.listdir(easydata_dir)):
        if path.split('.')[-1] == 'json':
            continue
        img_path = os.path.join(easydata_dir, path)
        json_path = json_folder + '/' + path.split('.')[0] + '.json'
        try:
            easydata2labelme(img_path, json_path, out_dir)
        except ValueError as e:
            # 处理解码错误
            print("{}解码错误{}:".format(img_path, e))
        else:
            # 继续执行其他操作
            continue

if __name__ == '__main__':
    main()

4.将labelme.json格式转为mask.png和mask.txt文件，分别存入png和txt文件夹中

import json,os
import numpy as np
import cv2,tqdm
from tqdm import tqdm
import labelme
from PIL import Image, ImageDraw, ImagePath


#配合文件'1_easyData2labelMe.py'使用。
#将labelme格式json转为mask.png和mask.txt文件，分别存入png和txt文件夹中


# 标签名称对应标签。
label_name = {
    'grass': 'grass',
    'ignore': '1',
    'shrubs': '2',
    'weed': '3',
    'hay': '4'
}
# label_name = {
#     'grass_normal': '0',
#     'grass_shadow': '1',
#     'hay_normal': '2',
#     'hay_shadow': '3',
#     'green': '4',
#     'other': '5'
# }

#labels 用于生成mask掩码图，所以需要错位去0
labels = {"grass":1,
          "1":2,
          "2":3,
          "3":4,
          "4":5,
          "5":6
          }



def gray2bgr(mask):
    return cv2.cvtColor(mask,cv2.COLOR_GRAY2BGR)

def run(jsons,jpg,png):
    # read json file
    for file in tqdm(os.listdir(jsons)):
        with open(os.path.join(jsons,file),'r') as f:
            data = f.read()
            # convert str to json objs
            data = json.loads(data)
            # read image to get shape
            image_file = os.path.join(jpg,file.replace(".json",".jpg"))
            image = cv2.imread(image_file)
            
            # create a blank image
            mask = np.zeros_like(image, dtype=np.uint8)
        
            for d in data['shapes']:
                label  = d['label']
                points = d['points']

                # get the points 
                # points = data["shapes"][0]["points"]
                points = np.array(points, dtype=np.int32)   # tips: points location must be int32
                
                
                color = (labels[label],labels[label],labels[label])
                # fill the contour with 255
                cv2.fillPoly(mask, [points], color)
                # mask+=1
                # save the mask 
                # cv2.imwrite("mask.png", mask)
            # cv2.imshow("mask",mask*80)
            # cv2.waitKey(1)
            cv2.imwrite(os.path.join(png,file.replace(".json",".png")),np.array(mask*80,dtype = np.uint8))


            # # 将图像像素值转为字符表示
            # char_image = np.where(image > 0, "1", "0")
            #
            # # 将字符数组转为字符串
            # text_image = ""
            # for row in char_image:
            #     text_image += "".join(row) + "\n"
            #
            # # 添加标签和其他标注信息
            # text_image += f"\nLabel: {label}"
            # # 添加其他标注信息
            # # text_image += f"\nAnnotation: {annotation}"
            #
            # # 将字符串保存到文本文件
            # with open(os.path.join(txt, file.replace(".json", ".txt")), "w") as f:
            #     f.write(text_image)

            # # 提取标注信息
            # annotations = []
            # for shape in data['shapes']:
            #     annotation = {
            #         'label': shape['label'],
            #         'points': shape['points'],
            #         'shape_type': shape['shape_type']
            #     }
            #     annotations.append(annotation)
            #
            # # 保存为txt文件
            # with open(os.path.join(txt,file.replace(".json",".txt")), 'w') as f:
            #     for annotation in annotations:
            #         f.write('label: {}\n'.format(annotation['label']))
            #         f.write('shape_type: {}\n'.format(annotation['shape_type']))
            #         f.write('points:\n')
            #         for point in annotation['points']:
            #             f.write('{}\n'.format(','.join(map(str, point))))
            #         f.write('\n')

def jsontotxt(jsons,txt):
    # 定义输入和输出文件路径
    input_json_dir = jsons
    # input_image_dir = jpg
    output_dir = txt

    # 遍历输入目录中的所有 JSON 文件
    for json_file in tqdm(f for f in os.listdir(input_json_dir) if f.endswith('json')):
        # 读取 JSON 文件
        with open(os.path.join(input_json_dir, json_file), 'r') as f:
            data = json.load(f)
            width = data['imageWidth']
            height = data['imageHeight']
        # 生成输出文件路径
        output_file = os.path.join(output_dir, json_file.replace(".json", ".txt"))

        # 读取图像尺寸
        # image = cv2.imread(os.path.join(input_image_dir, image_file))
        # height, width = image.shape[:2]

        # 设定一个标识符
        grass_label = True


        # 根据 YOLO 格式计算标注框坐标并写入输出文件txt，用于训练
        with open(output_file, "w") as f:

            # 针对测试集，不需要没有grass的数据，需要加入下列代码
            # labels_list = [d['label'] for d in data['shapes']]
            # if str(label_name.get('grass_normal')) not in labels_list and label == str(label_name.get('grass_shadow')) not in labels_list:
            #     grass_label = False

            for d in data['shapes']:
                label = d['label']
                points = d['points']
                # 根据标签名称获取类别 ID
                # label_id = labels.get(label)

                # 自己设置标签id,目前训练只训练grass，对应标签0
                if label == str(label_name.get('grass_normal')) or label == str(label_name.get('grass_shadow')) or label == str(label_name.get('grass')):
                    label_id = "grass"
                # else:
                #     label_id = "1"

                    if grass_label:
                        # 将多边形的坐标转换为相对于图像尺寸的比例
                        coordinates = []
                        for point in points:
                            x = point[0] / width
                            y = point[1] / height
                            x = max(0, min(x, 1))
                            y = max(0, min(y, 1))
                            coordinates.append((x, y))
                        # 将标注框的坐标写入输出文件
                        f.write(f"{label_id} ")
                        for coord in coordinates:
                            f.write(f"{coord[0]} {coord[1]} ")
                        f.write("\n")

        # print(f"Processed {json_file}")

#修改标签区域所有像素的RGB
def polygon_to_pixels(image, points, rgb_values):
    result_image = image.copy()
    # 使用 ImageDraw 创建可绘制对象
    draw = ImageDraw.Draw(result_image)

    # 转换多边形边界点的坐标为整数
    integer_points = [(int(x), int(y)) for x, y in points]

    # 绘制多边形区域并填充颜色
    draw.polygon(integer_points, outline=(255, 255, 255), fill=(255, 255, 255))

    # 获取图像的像素访问对象
    result_pixels = result_image.load()
    original_pixels = image.load()

    # 对多边形区域内的每个像素点执行 RGB -> BGR 变换
    for x in range(image.width):
        for y in range(image.height):
            # 如果像素点在多边形区域内
            if result_pixels[x, y] == (255, 255, 255):
                # 获取像素点的 RGB 值
                r, g, b = original_pixels[x, y]


                # 更新像素点的颜色
                original_pixels[x, y] = (b, 0, r)

    # 返回图像对象
    return result_image

def change_image(jsons, jpg, jpg_output):
    # read json file
    for file in tqdm(os.listdir(jsons)):
        if not file.endswith(".json"):
            continue

        # read image to get shape
        image_name = file.replace(".json", ".jpg")
        image_file = os.path.join(jpg, image_name)
        if not os.path.exists(image_file):
            continue
        # image = cv2.imread(image_file)
        image = Image.open(image_file)
        width, height = image.size
        pixels = image.load()
        # 使用 ImageDraw 创建可绘制对象
        draw = ImageDraw.Draw(image)

        with open(os.path.join(jsons, file), 'r') as f:
            data = f.read()
            # convert str to json objs
            data = json.loads(data)

            for d in data['shapes']:
                label = d['label']
                points = d['points']
                # points = np.array(points, dtype=np.int32)  # tips: points location must be int32

                # 对不同的标签进行不同的颜色变换
                if label == str(label_name.get('hay_normal')) or label == str(label_name.get('hay_shadow')) or label == str(label_name.get('hay')):
                    # 创建填充颜色
                    path = tuple(map(tuple, points))
                    fill_color = (0, 0, 0)  # 在这里设置你想要填充的颜色
                    # 绘制多边形区域并填充颜色
                    draw.polygon(path, fill=fill_color)

                elif label == str(label_name.get('other')) or label == str(label_name.get('weed')):
                    # 创建填充颜色
                    path = tuple(map(tuple, points))
                    fill_color = (0, 0, 0)  # 在这里设置你想要填充的颜色
                    # 绘制多边形区域并填充颜色
                    draw.polygon(path, fill=fill_color)

                elif label == str(label_name.get('green')) or label == str(label_name.get('shrubs')) or label == str(label_name.get('ignore')):
                    # 对多边形区域内的所有像素点进行 RGB 变换
                    rgb_values = (255, 255, 255)
                    polygon_to_pixels(image, points, rgb_values)
                # 保存处理后的图片
            image.save(os.path.join(jpg_output,image_name), quality=100)


def main():
    jpg     = r"/home/lxy/Desktop/test/2024.3.15/easy/easydata_jpg"
    jsons   = r"/home/lxy/Desktop/test/2024.3.15/labelme_json"
    png     = r"/home/lxy/Desktop/test/2024.3.15/labelme_png"
    txt     = r"/home/lxy/Desktop/test/2024.3.15/labelme_txt"
    jpg_output = jpg + "_output"
    
    # root  = "/mnt/DataLocal/data/1869095_1694673129_"
    # jpg   = root+"jpg"
    # jsons = root+"json"
    # png   = root+"png"
    # txt   = root+"txt"

    os.makedirs(png,exist_ok=True)
    run(jsons,jpg,png)
    print('make png finish')

    os.makedirs(txt, exist_ok=True)
    jsontotxt(jsons,txt)
    print('make txt finish')

    # os.makedirs(jpg_output, exist_ok=True)
    # change_image(jsons, jpg, jpg_output)
    # print('change_jpg finish')

if __name__ == "__main__":
    main()

5.新建一个文件夹before，将labelme格式的json文件、easydata原始jpg图像、class_name.txt放在一起

class_name.txt中放标注类别

6.安装labelme

(1) Anaconda Prompt中创建一个环境

conda create --name=labelImg python=3.6

(2) 激活进入刚建立的新环境，

conda activate labelImg

(3) 安装界面支持pyqt5包

pip install pyqt5 -i https://pypi.douban.com/simple/

(4) 下载安装labelme

7.在before路径下打开终端，使用labelme中的labelme_json_to_dataset，生成文件新建一个output，放进去

labelme_json_to_dataset image_1709744593840734000.json

批量生成：

import os

before_dir = "/home/lxy/Desktop/test/2024.3.15/before"

# 遍历before文件夹下的所有文件
for filename in os.listdir(before_dir):
    if filename.endswith(".json"):
        json_file_path = os.path.join(before_dir, filename)
        # print(filename)
        # os.system(f"labelme_json_to_dataset {filename} --o /home/lxy/Desktop/test/2024.3.15/output")
        os.system(f"labelme_json_to_dataset {filename}")

8.用其中的label.png文件（局部文件），生成全局标签文件

get_jpg_and_png.py

import os
from PIL import Image
import numpy as np

def main():
    # 读取原文件夹
    count = os.listdir("/home/lxy/Desktop/test/2024.03.14.2/make_dataset/before/") 
    for i in range(0, len(count)):
        # 如果里的文件以jpg结尾
        # 则寻找它对应的png
        if count[i].endswith("jpg"):
            path = os.path.join("./before", count[i])
            img = Image.open(path)
            img.save(os.path.join("./jpg", count[i]))
 
            # 找到对应的png
            path = "./output/" + count[i].split(".")[0] + "_json/label.png"
            img = Image.open(path)
            img = img.convert("L")  # 将图像转换为8位深度
            img.save(os.path.join("./png", count[i].replace("jpg","png")))
 
            # 找到全局的类
            class_txt = open("./before/class_name.txt","r")
            class_name = class_txt.read().splitlines()
            # ["bk","cat","dog"] 全局的类
            # 打开x_json文件里面存在的类，称其为局部类
            with open("./output/" + count[i].split(".")[0] + "_json/label_names.txt","r") as f:
                names = f.read().splitlines()
                # ["bk","dog"] 局部的类
                # 新建一张空白图片
                new = Image.new("RGB",[np.shape(img)[1],np.shape(img)[0]])
 
                # 找到局部的类在全局中的类的序号
                for name in names:
                    # index_json是x_json文件里存在的类label_names.txt，局部类
                    index_json = names.index(name)
                    # index_all是全局的类,
                    index_all = class_name.index(name)
 
                    # 将局部类转换成为全局类
                    # 将原图img中像素点的值为index_json的像素点乘以其在全局中的像素点的所对应的类的序号 得到 其实际在数据集中像素点的值
                    # 比如dog,在局部类（output/x_json/label_names）中它的序号为1,dog在原图中的像素点的值也为1.
                    # 但是在全局的类（before/classes.txt）中其对应的序号为2，所以在新的图片中要将局部类的像素点的值*全局类的序号，从而得到标签文件
                    new = new + np.expand_dims(index_all*(np.array(img) == index_json),-1)
 
            new = Image.fromarray(np.uint8(new))
            # 将转变后的得到的新的最终的标签图片保存到make_dataset/png文件夹下
            new.save(os.path.join("./png", count[i].replace("jpg","png")))
            # 找到新的标签文件中像素点值的最大值和最小值，最大值为像素点对应的类在class_name.txt中的序号，最小值为背景，即0
            print(np.max(new),np.min(new))

if __name__ == '__main__':
    main()

9.运行moveSrcMasksImage.py

会在output的上一级目录下新建JPEGImages和SegmentationClass文件，储存img.png和label.png

#########################
## moveSrcMasksImage.py ##
#########################
 
import os
import os.path as osp
import shutil
 
def moveSrcMasksImage(json_dir):
    # 获取_json文件夹上级目录
    pre_dir = os.path.abspath(os.path.dirname(os.path.dirname(json_dir)))
 
    img_dir = osp.join(pre_dir, "JPEGImages")
    mask_dir = osp.join(pre_dir, "SegmentationClass")
 
    # 目录不存在创建
    if not osp.exists(img_dir):
        os.makedirs(img_dir) 
    if not osp.exists(mask_dir):
        os.makedirs(mask_dir) 
 
    # 批量移动srcimg和mask到指定目录
    count = 0 # 记录移动次数
    for dirs in os.listdir(json_dir):
        
        dir_name = osp.join(json_dir, dirs)
        if not osp.isdir(dir_name): # 不是目录
            continue
        if dir_name.rsplit('_', 1)[-1] != 'json': # 非_json文件夹
            continue
        if not os.listdir(dir_name): # 目录为空
            continue
        count += 1
        # 所有__json目录下的img.png,label.png 用目录名改为同名文件
        '''
        img.png
        label.png
        label_names.txt
        label_viz.png
        '''
        img_path = osp.join(dir_name, 'img.png')
        label_path = osp.join(dir_name, 'label.png')
        new_name = dirs.rsplit('_', 1)[0] + '.png'
 
        # print('new_name: ', new_name)
        # 先复制文件到源目录，再分别移动到img和masks
        new_name_path = osp.join(dir_name, new_name)
        shutil.copy(img_path, new_name_path) # copy srcimg
        shutil.move(new_name_path, osp.join(img_dir, new_name)) # move img_dir
 
        print('{} ====> {}'.format(new_name, "JPEGImages"))
        shutil.copy(label_path, new_name_path) # copy srcimg
        shutil.move(new_name_path, osp.join(mask_dir, new_name)) # move img_dir
        print('{} ====> {}'.format(new_name, "SegmentationClass"))
 
    print('共整理、移动{}张图像'.format(count))
 
    return img_dir, mask_dir
 
if __name__ == "__main__":
    path = '/home/lxy/Desktop/test/2024.03.14.2/make_dataset/output'
    moveSrcMasksImage(path)

10.然后运行get_gray.py把SegmentationClass文件夹下的图片变成灰度图

import cv2
import os
input_dir = '/home/lxy/Desktop/test/2024.03.14.2/SegmentationClass'      #上一步保存.png图像文件夹
out_dir = '/home/lxy/Desktop/test/2024.03.14.2/SegmentationClass_8'
a = os.listdir(input_dir)
for i in a:
    img = cv2.imread(input_dir+'/'+i)
    gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    cv2.imencode('.png', gray)[1].tofile(out_dir+'/'+i)

11.运行train_val_test.py将上一步得到的图片划分为训练集，验证集，测试集

# train_val_test.py
 
'''
将数据分为train val test
'''
 
import os
import random
import shutil
 
total_list = []
train_list = []
val_list = []
test_list = []
 
 
image_path = '/home/lxy/Desktop/test/2024.03.14.2/JPEGImages'
label_path = '/home/lxy/Desktop/test/2024.03.14.2/SegmentationClass_8'
 
# 清空
for dir in ['train', 'val', 'test']:
    image_dir = os.path.join(image_path, dir)
    label_dir = os.path.join(label_path, dir)
    if os.path.exists(image_dir):
        shutil.rmtree(image_dir)
    os.makedirs(image_dir)
    if os.path.exists(label_dir):
        shutil.rmtree(label_dir)
    os.makedirs(label_dir)
 
 
for root, dirs, files in os.walk(image_path):
    for file in files:
        if file.endswith('png'):
            total_list.append(file)
 
total_size = len(total_list)
train_size = int(total_size * 0.7)
val_size = int(total_size * 0.2)
 
train_list = random.sample(total_list, train_size)
remain_list = list(set(total_list) - set(train_list))
val_list = random.sample(remain_list, val_size)
test_list = list(set(remain_list) - set(val_list))
 
print(len(total_list))
print(len(train_list))
print(len(val_list))
print(len(test_list))
 
# image_path = 'JPEGImages'
# label_path = 'SegmentationClass_8'
 
# 清空
for dir in ['train', 'val', 'test']:
    image_dir = os.path.join(image_path, dir)
    label_dir = os.path.join(label_path, dir)
    if os.path.exists(image_dir):
        shutil.rmtree(image_dir)
        os.makedirs(image_dir)
    if os.path.exists(label_dir):
        shutil.rmtree(label_dir)
        os.makedirs(label_dir)
 
 
for file in total_list:
    image_path_0 = os.path.join(image_path, file)
    label_file = file.split('.')[0] + '.png'
    label_path_0 = os.path.join(label_path, label_file)
    if file in train_list:
        image_path_1 = os.path.join(image_path, 'train', file)
        shutil.move(image_path_0, image_path_1)
 
        label_path_1 = os.path.join(label_path, 'train', label_file)
        shutil.move(label_path_0, label_path_1)
 
    elif file in val_list:
        image_path_1 = os.path.join(image_path, 'val', file)
        shutil.move(image_path_0, image_path_1)
 
        label_path_1 = os.path.join(label_path, 'val', label_file)
        shutil.move(label_path_0, label_path_1)
 
    elif file in test_list:
        image_path_1 = os.path.join(image_path, 'test', file)
        shutil.move(image_path_0, image_path_1)
 
        label_path_1 = os.path.join(label_path, 'test', label_file)
        shutil.move(label_path_0, label_path_1)

12.将得到的JPEGImages和SegmentationClass_8放在一个新建的data文件夹下

13.运行train_val_test_txt.py得到对应的train,val,test的txt文件

####################
## train_val_test_txt.py ##
####################
 
 
import os
import tqdm
 
input_dir_train =os.listdir('/home/lxy/Desktop/test/2024.03.14.2/data/JPEGImages/train')
with open('/home/lxy/Desktop/test/2024.03.14.2/data/train.txt', "w", encoding='utf-8')as f:
    for jpg_name in input_dir_train:
        name = jpg_name.split('.')[0]
        jpg_name = 'JPEGImages/train/'+jpg_name
        png_name = 'SegmentationClass_8/train/'+name + '.png'
        txt = jpg_name + ',' + png_name
        f.write(txt)
        f.write('\n')
 
 
 
input_dir_test =os.listdir('/home/lxy/Desktop/test/2024.03.14.2/data/JPEGImages/test')
with open('/home/lxy/Desktop/test/2024.03.14.2/data/test.txt', "w", encoding='utf-8')as f:
    for jpg_name in input_dir_test:
        name = jpg_name.split('.')[0]
        jpg_name = 'JPEGImages/test/'+jpg_name
        png_name = 'SegmentationClass_8/test/'+name + '.png'
        txt = jpg_name + ',' + png_name
        f.write(txt)
        f.write('\n')
 
 
input_dir_val =os.listdir('/home/lxy/Desktop/test/2024.03.14.2/data/JPEGImages/val')
with open('/home/lxy/Desktop/test/2024.03.14.2/data/val.txt', "w", encoding='utf-8')as f:
    for jpg_name in input_dir_val:
        name = jpg_name.split('.')[0]
        jpg_name = 'JPEGImages/val/'+jpg_name
        png_name = 'SegmentationClass_8/val/'+name + '.png'
        txt = jpg_name + ',' + png_name
        f.write(txt)
        f.write('\n')