【LogoDetection 数据集处理】(3)将训练集按照类别划分为多个文件夹

参考:

【LogoDetection 数据集处理】(1)将数据集切分为训练集和验证集

【LogoDetection 数据集处理】(2)画出训练集图片的标注框

原始的数据集是将所有类别的图片都放在一个文件夹中,不方便查看。所以有必要将训练集按照类别划分为多个文件夹,这里数据集中有50个类别,所以我们划分为50个文件夹,每个类别的图片放在一个文件夹中。

代码如下:

"""
将数据集按照json标注文件划分为50个classes,每个class的图片放到一个文件夹中。
"""""


import os
import json
import shutil
from tqdm import tqdm


# 因为json中的类别名字中包含斜线“/”和中文,会产生路径和文件命名的问题,因此对categories进行了重命名
categories_list=[
    {
        "id": 1,
        "name": "bingdundun"
    },
    {
        "id": 2,
        "name": "sanyo"
    },
    {
        "id": 3,
        "name": "Eifini"
    },
    {
        "id": 4,
        "name": "PSALTER"
    },
    {
        "id": 5,
        "name": "Beaster"
    },
    {
        "id": 6,
        "name": "ON"
    },
    {
        "id": 7,
        "name": "BYREDO"
    },
    {
        "id": 8,
        "name": "Ubras"
    },
    {
        "id": 9,
        "name": "Eternelle"
    },
    {
        "id": 10,
        "name": "PerfectDiary"
    },
    {
        "id": 11,
        "name": "huaxizi"
    },
    {
        "id": 12,
        "name": "Clarins"
    },
    {
        "id": 13,
        "name": "Loccitane"
    },
    {
        "id": 14,
        "name": "Versace"
    },
    {
        "id": 15,
        "name": "Mizuno"
    },
    {
        "id": 16,
        "name": "Lining"
    },
    {
        "id": 17,
        "name": "DoubleStar"
    },
    {
        "id": 18,
        "name": "YONEX"
    },
    {
        "id": 19,
        "name": "ToryBurch"
    },
    {
        "id": 20,
        "name": "Gucci"
    },
    {
        "id": 21,
        "name": "LouisVuitton"
    },
    {
        "id": 22,
        "name": "CARTELO"
    },
    {
        "id": 23,
        "name": "JORDAN"
    },
    {
        "id": 24,
        "name": "KENZO"
    },
    {
        "id": 25,
        "name": "UNDEFEATED"
    },
    {
        "id": 26,
        "name": "BoyLondon"
    },
    {
        "id": 27,
        "name": "TREYO"
    },
    {
        "id": 28,
        "name": "carhartt"
    },
    {
        "id": 29,
        "name": "jierou"
    },
    {
        "id": 30,
        "name": "Blancpain"
    },
    {
        "id": 31,
        "name": "GXG"
    },
    {
        "id": 32,
        "name": "ledin"
    },
    {
        "id": 33,
        "name": "Diadora"
    },
    {
        "id": 34,
        "name": "TUCANO"
    },
    {
        "id": 35,
        "name": "Loewe"
    },
    {
        "id": 36,
        "name": "GraniteGear"
    },
    {
        "id": 37,
        "name": "DESCENTE"
    },
    {
        "id": 38,
        "name": "OSPREY"
    },
    {
        "id": 39,
        "name": "Swatch"
    },
    {
        "id": 40,
        "name": "erke"
    },
    {
        "id": 41,
        "name": "MassimoDutti"
    },
    {
        "id": 42,
        "name": "PINKO"
    },
    {
        "id": 43,
        "name": "PALLADIUM"
    },
    {
        "id": 44,
        "name": "origins"
    },
    {
        "id": 45,
        "name": "Trendiano"
    },
    {
        "id": 46,
        "name": "yiner"
    },
    {
        "id": 47,
        "name": "MonsterGuardians"
    },
    {
        "id": 48,
        "name": "fuerjia"
    },
    {
        "id": 49,
        "name": "IPSA"
    },
    {
        "id": 50,
        "name": "Schwarzkopf"
    }
]


# 原始数据路径
data_path = "dataset/fewshotlogodetection_round1_train_202204/train"
annoations_path = os.path.join(data_path, "annotations/instances_train2017.json")
images_path = os.path.join(data_path, "images")


# 读取annoations的json文件
with open(annoations_path, 'r', encoding='utf-8') as f:
    annoations_dict = json.load(f)
images_list=annoations_dict["images"]
annotations_list=annoations_dict["annotations"]


# 创建最外层文件夹
trainset_dir="TrainSet_50Classes"
isExists_trainset_dir = os.path.exists(trainset_dir)
if not isExists_trainset_dir:
    os.mkdir(trainset_dir)


# 创建50个子文件夹,以及class_id与文件夹名称的字典。
class_dict={}
for i in range(len(categories_list)):
    dir_name=categories_list[i]["name"]
    dir_name_path=os.path.join(trainset_dir,dir_name)
    class_dict[i + 1] = dir_name_path
    isExists = os.path.exists(dir_name_path)
    if isExists:
        shutil.rmtree(dir_name_path)
    os.makedirs(dir_name_path,exist_ok=True)


# 移动图片到对应类别的文件夹
images_name = os.listdir(images_path)
for image_name in tqdm(images_name,desc="process"):
    image_path=os.path.join(images_path,image_name)
    for i in range(len(images_list)):
        if image_name==images_list[i]["file_name"]:
            image_id=images_list[i]["id"]
            for j in range(len(annotations_list)):
                if image_id==annotations_list[j]["image_id"]:
                    cls_id= annotations_list[j]["category_id"]
                    pic_file_path=class_dict[cls_id]
                    shutil.copy(image_path, pic_file_path)

划分结果如下:

在这里插入图片描述

  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

ctrl A_ctrl C_ctrl V

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值