转化成txt文件

大可爱就是我

已于 2023-07-13 11:29:46 修改

阅读量4.5k

点赞数 2

文章标签： json python 开发语言

于 2023-03-30 15:21:29 首次发布

本文链接：https://blog.csdn.net/tinglutang/article/details/129858220

版权

第一种方法

# 处理同一个数据集下多个json文件时，仅运行一次class_txt即可
import json
import os


"存储标签与预测框到txt文件中"
def json_txt(json_path, txt_path):
    "json_path: 需要处理的json文件的路径"
    "txt_path: 将json文件处理后txt文件存放的文件夹名"
    # 生成存放json文件的路径
    if not os.path.exists(txt_path):
        os.mkdir(txt_path)
    # 读取json文件
    with open(json_path, 'r') as f:
        dict = json.load(f)
    # 得到images和annotations信息
    images_value = dict.get("images")  # 得到某个键下对应的值
    annotations_value = dict.get("annotations")  # 得到某个键下对应的值
    # 使用images下的图像名的id创建txt文件
    list=[]  # 将文件名存储在list中
    for i in images_value:
        open(txt_path + str(i.get("id")) + '.txt', 'w')
        #open(txt_path + str(i.get("file_name")) + '.txt', 'w')
        list.append(i.get("id"))


    # 将id对应图片的bbox写入txt文件中
    for i in list:
        for j in annotations_value:
            if j.get("image_id") == i:
                # bbox标签归一化处理
                num = sum(j.get('bbox'))
                new_list = [round(m / num, 6) for m in j.get('bbox')]  # 保留六位小数
                with open(txt_path + str(i) + '.txt', 'a') as file1:  # 写入txt文件中
                    #print(j.get("category_id"), new_list[0], new_list[1], new_list[2], new_list[3], file=file1) #Json文件中categories的id是从1开始的，而YOLOV5的要求标签是从0开始的，所以需要进行一个“-1”的操作。
                    print(j.get("category_id") - 1, new_list[0], new_list[1], new_list[2], new_list[3], file=file1) #修改后的


"将id对应的标签存储在class.txt中"
def class_txt(json_path, class_txt_path):
    "json_path: 需要处理的json文件的路径"
    "txt_path: 将json文件处理后存放所需的txt文件名"
    # 生成存放json文件的路径
    with open(json_path, 'r') as f:
        dict = json.load(f)
    # 得到categories下对应的信息
    categories_value = dict.get("categories")  # 得到某个键下对应的值
    # 将每个类别id与类别写入txt文件中
    with open(class_txt_path, 'a') as file0:
        for i in categories_value:
            print(i.get("id"), i.get('name'), file=file0)


json_txt("instances_test.json", "test_annotations/")
# class_txt("eval.json", "id_categories.txt")

1.Json文件中categories的id是从1开始的，而YOLOV5的要求标签是从0开始的，所以需要进行一个“-1”的操作。

2.避免了图片名称和txt文件名称不一致的问题。

3.如果直接利用bbox里面的值则会造成误差偏大，所以需要对bbox里的值进行归一化处理。

另一个方法~这个适合文件名为图片名的！

#处理同一个数据集下多个json文件时，仅运行一次class_txt即可
import json
import os
 
 
"存储标签与预测框到txt文件中"
def json_txt(json_path, txt_path):
    "json_path: 需要处理的json文件的路径"
    "txt_path: 将json文件处理后txt文件存放的文件夹名"
 
    #生成存放json文件的路径
    if not os.path.exists(txt_path):
        os.mkdir(txt_path)
    # 读取json文件
    with open(json_path, 'r') as f:
        dict = json.load(f)
    # 得到images和annotations信息
    images_value = dict.get("images")  # 得到某个键下对应的值
    annotations_value = dict.get("annotations")  # 得到某个键下对应的值
    # 使用images下的图像名的id创建txt文件
 
    #nano_path = './images/train'
    #need_path = './labels/train/'
    nano_path = './images/test'
    need_path = './labels/test/'
    dir = os.listdir(nano_path)
 
    for i in dir:
        file_name = os.path.basename(i)
        file_name1 = file_name.split('.')[0]
        print(file_name1)
        open(need_path + file_name1 + '.txt', 'w')
 
    for i in images_value:
        open(txt_path + str(i.get("id")) + '.txt', 'w')
    #将id对应图片的bbox写入txt文件中
    print(len(images_value))
 
    for i in images_value:
        a = i.get('id')
        b = i.get('file_name')
 
        d = b.split('.')[0]
        for j in annotations_value:
            if j.get("image_id") == a:
                #bbox标签归一化处理
                num = sum(j.get('bbox'))
                new_list = [round(i / num, 6) for i in j.get('bbox')]  # 保留六位小数
                with open(need_path + str(d) + '.txt', 'a') as file1:  # 写入txt文件中
                    print(j.get("category_id")-1, new_list[0], new_list[1], new_list[2], new_list[3], file=file1)
 
 
 
 "将id对应的标签存储在class.txt中"
 
def class_txt(json_path, class_txt_path):
    "json_path: 需要处理的json文件的路径"
    "txt_path: 将json文件处理后存放所需的txt文件名"
    # 生成存放json文件的路径
    with open(json_path, 'r') as f:
        dict = json.load(f)
    # 得到categories下对应的信息
    categories_value = dict.get("categories")  # 得到某个键下对应的值
    # 将每个类别id与类别写入txt文件中
    with open(class_txt_path, 'a') as file0:
        for i in categories_value:
            print(i.get("id"), i.get('name'), file=file0)
 
 

json_txt("instances_test.json", "test_img/")
json_txt('instances_train.json', 'train_img/')
#class_txt("eval.json", "class.txt")

这次主要遇到了两个坑，一个是图片名称和txt文件名称不一致，另外一个是标签的类别要从0开始。

查看标签种类labels

import os
from pycocotools.coco import COCO
from PIL import Image, ImageDraw
import matplotlib.pyplot as plt

json_path = "instances_train.json"
#img_path = "E:\Pycharm\PycharmProjects\datasets\DUO\images\train"

# load coco data
coco = COCO(annotation_file=json_path)

# get all image index info
ids = list(sorted(coco.imgs.keys()))
print("number of images: {}".format(len(ids)))

# get all coco class labels
coco_classes = dict([(v["id"], v["name"]) for k, v in coco.cats.items()])
print("classes: {}".format(coco_classes))