【LogoDetection 数据集处理】（3）将训练集按照类别划分为多个文件夹

最新推荐文章于 2023-06-22 08:30:08 发布

ctrl A_ctrl C_ctrl V

最新推荐文章于 2023-06-22 08:30:08 发布

阅读量520

点赞数

分类专栏： # LogoDetection 数据集处理文章标签： python tensorflow 开发语言

本文链接：https://blog.csdn.net/qq_43799400/article/details/125190017

版权

LogoDetection 数据集处理专栏收录该内容

6 篇文章 1 订阅

订阅专栏

参考：

【LogoDetection 数据集处理】（1）将数据集切分为训练集和验证集

【LogoDetection 数据集处理】（2）画出训练集图片的标注框

原始的数据集是将所有类别的图片都放在一个文件夹中，不方便查看。所以有必要将训练集按照类别划分为多个文件夹，这里数据集中有50个类别，所以我们划分为50个文件夹，每个类别的图片放在一个文件夹中。

代码如下：

"""
将数据集按照json标注文件划分为50个classes，每个class的图片放到一个文件夹中。
"""""


import os
import json
import shutil
from tqdm import tqdm


# 因为json中的类别名字中包含斜线“/”和中文，会产生路径和文件命名的问题，因此对categories进行了重命名
categories_list=[
    {
        "id": 1,
        "name": "bingdundun"
    },
    {
        "id": 2,
        "name": "sanyo"
    },
    {
        "id": 3,
        "name": "Eifini"
    },
    {
        "id": 4,
        "name": "PSALTER"
    },
    {
        "id": 5,
        "name": "Beaster"
    },
    {
        "id": 6,
        "name": "ON"
    },
    {
        "id": 7,
        "name": "BYREDO"
    },
    {
        "id": 8,
        "name": "Ubras"
    },
    {
        "id": 9,
        "name": "Eternelle"
    },
    {
        "id": 10,
        "name": "PerfectDiary"
    },
    {
        "id": 11,
        "name": "huaxizi"
    },
    {
        "id": 12,
        "name": "Clarins"
    },
    {
        "id": 13,
        "name": "Loccitane"
    },
    {
        "id": 14,
        "name": "Versace"
    },
    {
        "id": 15,
        "name": "Mizuno"
    },
    {
        "id": 16,
        "name": "Lining"
    },
    {
        "id": 17,
        "name": "DoubleStar"
    },
    {
        "id": 18,
        "name": "YONEX"
    },
    {
        "id": 19,
        "name": "ToryBurch"
    },
    {
        "id": 20,
        "name": "Gucci"
    },
    {
        "id": 21,
        "name": "LouisVuitton"
    },
    {
        "id": 22,
        "name": "CARTELO"
    },
    {
        "id": 23,
        "name": "JORDAN"
    },
    {
        "id": 24,
        "name": "KENZO"
    },
    {
        "id": 25,
        "name": "UNDEFEATED"
    },
    {
        "id": 26,
        "name": "BoyLondon"
    },
    {
        "id": 27,
        "name": "TREYO"
    },
    {
        "id": 28,
        "name": "carhartt"
    },
    {
        "id": 29,
        "name": "jierou"
    },
    {
        "id": 30,
        "name": "Blancpain"
    },
    {
        "id": 31,
        "name": "GXG"
    },
    {
        "id": 32,
        "name": "ledin"
    },
    {
        "id": 33,
        "name": "Diadora"
    },
    {
        "id": 34,
        "name": "TUCANO"
    },
    {
        "id": 35,
        "name": "Loewe"
    },
    {
        "id": 36,
        "name": "GraniteGear"
    },
    {
        "id": 37,
        "name": "DESCENTE"
    },
    {
        "id": 38,
        "name": "OSPREY"
    },
    {
        "id": 39,
        "name": "Swatch"
    },
    {
        "id": 40,
        "name": "erke"
    },
    {
        "id": 41,
        "name": "MassimoDutti"
    },
    {
        "id": 42,
        "name": "PINKO"
    },
    {
        "id": 43,
        "name": "PALLADIUM"
    },
    {
        "id": 44,
        "name": "origins"
    },
    {
        "id": 45,
        "name": "Trendiano"
    },
    {
        "id": 46,
        "name": "yiner"
    },
    {
        "id": 47,
        "name": "MonsterGuardians"
    },
    {
        "id": 48,
        "name": "fuerjia"
    },
    {
        "id": 49,
        "name": "IPSA"
    },
    {
        "id": 50,
        "name": "Schwarzkopf"
    }
]


# 原始数据路径
data_path = "dataset/fewshotlogodetection_round1_train_202204/train"
annoations_path = os.path.join(data_path, "annotations/instances_train2017.json")
images_path = os.path.join(data_path, "images")


# 读取annoations的json文件
with open(annoations_path, 'r', encoding='utf-8') as f:
    annoations_dict = json.load(f)
images_list=annoations_dict["images"]
annotations_list=annoations_dict["annotations"]


# 创建最外层文件夹
trainset_dir="TrainSet_50Classes"
isExists_trainset_dir = os.path.exists(trainset_dir)
if not isExists_trainset_dir:
    os.mkdir(trainset_dir)


# 创建50个子文件夹,以及class_id与文件夹名称的字典。
class_dict={}
for i in range(len(categories_list)):
    dir_name=categories_list[i]["name"]
    dir_name_path=os.path.join(trainset_dir,dir_name)
    class_dict[i + 1] = dir_name_path
    isExists = os.path.exists(dir_name_path)
    if isExists:
        shutil.rmtree(dir_name_path)
    os.makedirs(dir_name_path,exist_ok=True)


# 移动图片到对应类别的文件夹
images_name = os.listdir(images_path)
for image_name in tqdm(images_name,desc="process"):
    image_path=os.path.join(images_path,image_name)
    for i in range(len(images_list)):
        if image_name==images_list[i]["file_name"]:
            image_id=images_list[i]["id"]
            for j in range(len(annotations_list)):
                if image_id==annotations_list[j]["image_id"]:
                    cls_id= annotations_list[j]["category_id"]
                    pic_file_path=class_dict[cls_id]
                    shutil.copy(image_path, pic_file_path)

划分结果如下：

在这里插入图片描述

ctrl A_ctrl C_ctrl V

关注

0
点赞
踩
3

收藏

觉得还不错? 一键收藏
打赏
0
评论
【LogoDetection 数据集处理】（3）将训练集按照类别划分为多个文件夹

原始的数据集是将所有类别的图片都放在一个文件夹中，不方便查看。所以有必要将训练集按照类别划分为多个文件夹，这里数据集中有50个类别，所以我们划分为50个文件夹，每个类别的图片放在一个文件夹中。...
复制链接

扫一扫