记录标注文件json格式转换为txt,改造yolov4预选框聚类方法

最新推荐文章于 2023-11-10 11:28:25 发布

后知前觉

最新推荐文章于 2023-11-10 11:28:25 发布

阅读量1.2k

点赞数 2

分类专栏：图像识别

本文链接：https://blog.csdn.net/h363924219/article/details/113561818

版权

图像识别专栏收录该内容

13 篇文章 5 订阅

订阅专栏

场景：

从网上下载了阿里云算法大赛图像识别的数据集，提供的是json格式，计划通过yolo算法进行练习，所以需要改造标注文件格式，记录一下以备以后使用。
原json格式数据集如下：

[
    {
        "name": "223_89_t20201125085855802_CAM3.jpg",
        "image_height": 3500,
        "image_width": 4096,
        "category": 4,
        "bbox": [
            1702.79,
            2826.53,
            1730.79,
            2844.53
        ]
    },
    {
        "name": "235_2_t20201127123021723_CAM2.jpg",
        "image_height": 6000,
        "image_width": 8192,
        "category": 5,
        "bbox": [
            1876.06,
            998.04,
            1883.06,
            1004.04
        ]
    },
    {
        "name": "235_2_t20201127123021723_CAM2.jpg",
        "image_height": 6000,
        "image_width": 8192,
        "category": 5,
        "bbox": [
            1655.06,
            1094.04,
            1663.06,
            1102.04
        ]
    },
    ......

1、改造为yolov5需要的txt格式

yolov5输入的txt是相对坐标，需要进行计算。第一步：把json文件中图片名称转换为单独的txt文件，代码如下：

#json 转 yolov5需要的txt

import os
import json


json_dir = 'train_annos.json'  # json文件路径
out_dir = 'output/'  # 输出的 txt 文件路径


def main():
    # 读取 json 文件数据
    with open(json_dir, 'r') as load_f:
        content = json.load(load_f)
    # 循环处理
    for t in content:
        tmp = t['name'].split('.')
        filename = out_dir + tmp[0] + '.txt'

        if not os.path.exists(filename):
            fp = open(filename, mode="w", encoding="utf-8")
            fp.close()  
        # 计算 yolo 数据格式所需要的中心点的 相对 x, y 坐标, w,h 的值
        dw=1./t['image_width']
        dh=1./t['image_height']
        x = (t['bbox'][0] + t['bbox'][2]) / 2.0 -1
        y = (t['bbox'][1] + t['bbox'][3]) / 2.0 -1
        w = (t['bbox'][2] - t['bbox'][0]) 
        h = (t['bbox'][3] - t['bbox'][1])
        x=x*dw
        w=x*dw
        y=y*dh
        h=h*dh


        fp = open(filename, mode="r+", encoding="utf-8")
        file_str = str(t['category']) + ' ' + str(x) + ' ' + str(y) + ' ' + str(round(w, 6)) + \
                   ' ' + str(h)
        line_data = fp.readlines()

        if len(line_data) != 0:
            fp.write('\n' + file_str)
        else:
            fp.write(file_str)
        fp.close()



if __name__ == '__main__':
    main()

第二步：拆分训练集、测试集和验证集，先拆lables

#拆分lables

import os
import random 
import glob
import shutil 


xmlfilepath='/home/conda_work/conda_hsz/yolov5-master-aliyun/output/'
saveBasePath="/home/conda_work/conda_hsz/yolov5-master-aliyun/VOC/labels/train/"
saveBasePath1="/home/conda_work/conda_hsz/yolov5-master-aliyun/VOC/labels/val/"

trainval_percent=1
train_percent=0.8

temp_xml = os.listdir(xmlfilepath)
total_xml = []
for xml in temp_xml:
    if xml.endswith(".txt"):
        total_xml.append(xml)

num=len(total_xml)  
list=range(num)  
tv=int(num*trainval_percent)  
tr=int(tv*train_percent)  
trainval= random.sample(list,tv)  
train=random.sample(trainval,tr) 


for i  in list:  
    name=total_xml[i] 
    if i in train:  
        shutil.copy(xmlfilepath + name, saveBasePath + name)  
    else:  
        shutil.copy(xmlfilepath + name, saveBasePath1 + name)

拆分图片

#拆分 图片

xmlfilepath2="/home/conda_work/conda_hsz/yolov5-master-aliyun/train_imgs/"
saveBasePath2="/home/conda_work/conda_hsz/yolov5-master-aliyun/VOC/images/train/"
saveBasePath3="/home/conda_work/conda_hsz/yolov5-master-aliyun/VOC/images/val/"

for i in list:  
    name=total_xml[i].split(".")[0]+".jpg"
    if i in train:  
        shutil.copy(xmlfilepath2 + name, saveBasePath2 + name) 
    else:  
        shutil.copy(xmlfilepath2 + name, saveBasePath3 + name)

2、改造为yolov4需要的txt格式

yolov4需要的txt是真实坐标，第一步，拆分为单个txt文件，代码如下：

import codecs
import json
import glob
# import cv2
import shutil
from sklearn.model_selection import train_test_split
import os

#转换坐标

def conver(json_dir,out_dir):
    # 读取 json 文件数据
    with open(json_dir, 'r') as load_f:
        content = json.load(load_f)
    # 循环处理
    for t in content:
        tmp = t['name'].split('.')
        filename = out_dir + tmp[0] + '.txt'

        if not os.path.exists(filename):
            fp = open(filename, mode="w", encoding="utf-8")
            fp.close()  
        # 计算 yolo 数据格式所需要的中心点的 相对 x, y 坐标, w,h 的值

        x = float(t['bbox'][0])
        y = float(t['bbox'][1])
        w = float(t['bbox'][2])
        h = float(t['bbox'][3])
        fp = open(filename, mode="r+", encoding="utf-8")
        file_str =   str(x) + ',' + str(y) + ',' + str(w) + \
                   ',' + str(h)+ ',' + str(t['category'])+ ' '
        line_data = fp.readlines()

        if len(line_data) != 0:
            fp.write(file_str)
        else:
            fp.write(file_str)
        fp.close()

#开始转换       
json_dir="/home/conda_work/conda_hsz/yolov4-keras-master_416/train_annos.json"   #json文件位置
out_dir = "VOCdevkit/VOC2007/Annotations/"
conver(json_dir,out_dir)

第二步，整合到一个txt文件。

#整合到txt
import glob
out_dir = "VOCdevkit/VOC2007/Annotations/"

org_txt_files = sorted(glob.glob(os.path.join(out_dir, '*.txt')))
org_txt_file_names = [i.split("\\")[-1].split(".txt")[0] for i in org_txt_files]
#org_txt_file_names

img_path="/home/conda_work/conda_hsz/yolov4-keras-master-aliyun/train_imgs/"
list_file=open('./2007_train.txt','w')

org_img_files = sorted(glob.glob(os.path.join(img_path, '*.jpg')))
org_img_file_names = [i.split("\\")[-1].split(".jpg")[0].split("/")[-1] for i in org_img_files]
for i, json_file_ in enumerate(org_txt_files):
    box_txt=open(json_file_, "r", encoding="utf-8").readline()
    image_path = os.path.join(img_path, org_txt_file_names[i].split("/")[-1]+'.jpg')

    if org_txt_file_names[i].split("/")[-1] in org_img_file_names:
        list_file.write(image_path+ ' '+box_txt)
        list_file.write('\n')
list_file.close()

3、改造yolov4中kmeans_for_anchors.py文件

主要是改造文件中的load_data方法，原来是基于xml读取标注数据，改造后基于json格式读取标注数据。原始及改造后函数内容如下：

#原始方法
def load_data1(path):
    data = []
    # 对于每一个xml都寻找box
    for xml_file in glob.glob('{}/*xml'.format(path)):
        tree = ET.parse(xml_file)
        height = int(tree.findtext('./size/height'))
        width = int(tree.findtext('./size/width'))
        # 对于每一个目标都获得它的宽高
        for obj in tree.iter('object'):
            xmin = int(float(obj.findtext('bndbox/xmin'))) / width
            ymin = int(float(obj.findtext('bndbox/ymin'))) / height
            xmax = int(float(obj.findtext('bndbox/xmax'))) / width
            ymax = int(float(obj.findtext('bndbox/ymax'))) / height

            xmin = np.float64(xmin)
            ymin = np.float64(ymin)
            xmax = np.float64(xmax)
            ymax = np.float64(ymax)
            # 得到宽高
            data.append([xmax-xmin,ymax-ymin])
    return np.array(data)

改造后

def load_data(path):
    data = []
# 对于每一个xml都寻找box
    with open(path, 'r') as load_f:
        content = json.load(load_f)
    for t in content:
        height = t['image_height']
        width =  t['image_width']
        # 对于每一个目标都获得它的宽高
        xmin = float(t['bbox'][0]) / width
        ymin = float(t['bbox'][1]) / height
        xmax = float(t['bbox'][2]) / width
        ymax = float(t['bbox'][3]) / height

        xmin = np.float64(xmin)
        ymin = np.float64(ymin)
        xmax = np.float64(xmax)
        ymax = np.float64(ymax)
        # 得到宽高
        data.append([xmax-xmin,ymax-ymin])
    return np.array(data)