python实现tsinghua-tencent 100000数据集的json文件转为xml文件

最新推荐文章于 2024-08-27 21:53:33 发布
小布米
最新推荐文章于 2024-08-27 21:53:33 发布
阅读量301
点赞数
分类专栏：神经网络与深度学习
本文链接：https://blog.csdn.net/xiaobumi123/article/details/114886368
版权
神经网络与深度学习专栏收录该内容
59 篇文章
订阅专栏
算法解决问题：
1，从json文件中抽取数据集中的小目标pne、p11、i5、w57四种交通标志数据集
2.小目标判断标注像素范围在[32,32]
3.转为对应的xml文件
4.标出选出的类别的个数
import os
import time
import json
import cv2


xml_head = '''<annotation>
    <folder>traffic-sign</folder>
    <!--文件名-->
    <filename>{}</filename>  
    <size>
        <width>2048</width>
        <height>2048</height>
        <depth>3</depth>
    </size>
    <segmented>0</segmented>
    <category>{}</category>
    '''
xml_obj = '''
    <object>     
        <!--图片中目标类别-->   
        <name>{}</name>
        <!--是否被裁减，0表示完整，1表示不完整-->
        <truncated>0</truncated>
        <!--是否容易识别，0表示容易，1表示困难-->
        <difficult>0</difficult>
        <bndbox>
            <xmin>{}</xmin>
            <ymin>{}</ymin>
            <xmax>{}</xmax>
            <ymax>{}</ymax>
        </bndbox>
    </object>
    '''
xml_end = '''
</annotation>'''
# json文件路径和用于存放xml文件的路径
anno = '/home/xiaobumidm/Yolo_mark-master/annotations.json'
xml_dir = '/home/xiaobumidm/Yolo_mark-master/jsonxml/'
labels = ['pne','p11','i5','w57']  # label for datasets
print("labels类型：",type(labels))
# 读取json文件内容,返回字典格式
with open(anno,'r',encoding='utf8')as fp:
    json_data = json.load(fp)
    #imgs数据，字典类型
    json_imgs=json_data['imgs']
    #记录小目标总数目,共2908
    count=0
    #count_pne的小目标数目
    count_pne=0
    #count_p11的小目标数目
    count_p11=0
    #count_i5的小目标数目
    count_i5=0
    #count_w57的小目标数目
    count_w57=0
    #count_pne_trian训练文件夹中pne数量
    count_pne_trian=0
    # count_p11_trian训练文件夹中p11数量
    count_p11_trian = 0
    # count_i5_trian训练文件夹中i5数量
    count_i5_trian = 0
    # count_w57_trian训练文件夹中w57数量
    count_w57_trian = 0
    #训练pne数量
    count_img_pne_train=0
    #训练p11数量
    count_img_p11_train=0
    #训练i5数量
    count_img_i5_train=0
    #训练w57数量
    count_img_w57_train=0
    # 测试pne数量
    count_img_pne_test=0
    # 测试p11数量
    count_img_p11_test=0
    # 测试i5数量
    count_img_i5_test=0
    # 测试w57数量
    count_img_w57_test=0

    #图片总数
    count_image=0;
    table=['pne']
    for item in json_imgs.items():
        #item是tuple类型 ,item元组中共有两个元素，一开始的ID和图片详细信息
        #detail_info为图片详细信息，是字典类型，含有path，objects，id三个key值
        detail_info=item[1]
        #path图片路径
        image_path=detail_info['path']
        if image_path.find('train')!=-1 or image_path.find('test')!=-1:
            #图片含有的目标,list类型
            image_objects=detail_info['objects']
            # 图片的id
            image_id = detail_info['id']
            # flag_category记录category的标记
            flag_category = ''
            #obj表示一个图片中目标信息
            obj=''
            for info in range(len(image_objects)):
                # object_info是object的详细信息,数据类型为字典，包含category，objects
                object_info = image_objects[info]
                #object_category是object的category,字符串
                object_category = object_info['category']
                # object_bbox为字典类型，存放目标信息xmin,ymin,xmax,ymax
                object_bbox = object_info['bbox']
                #x_rang目标的x间的大小，xmax-xmin
                x_rang=object_bbox['xmax']-object_bbox['xmin']
                #目标的y间的大小，ymax-ymin
                y_rang=object_bbox['ymax']-object_bbox['ymin']
                #flag=0
                if x_rang <= 32 and y_rang <= 32:
                    if object_category == 'pne':
                        if flag_category == '':
                            flag_category = object_category
                        obj+=xml_obj.format(labels[0],str(object_bbox['xmin']),str(object_bbox['ymin']),
                                            str(object_bbox['xmax']),str(object_bbox['ymax']))
                        if image_path.find('train') != -1:
                            count_pne_trian+=1
                        count_pne += 1
                        count += 1
                    if object_category == 'p11':
                        if flag_category == '':
                            flag_category = object_category
                        obj += xml_obj.format(labels[1], str(object_bbox['xmin']),
                                              str(object_bbox['ymin']),
                                              str(object_bbox['xmax']), str(object_bbox['ymax']))
                        if image_path.find('train') != -1:
                            count_p11_trian+=1
                        count_p11 += 1
                        count += 1
                    if object_category == 'i5':
                        if flag_category == '':
                            flag_category = object_category
                        obj += xml_obj.format(labels[2], str(object_bbox['xmin']),
                                              str(object_bbox['ymin']),
                                              str(object_bbox['xmax']), str(object_bbox['ymax']))
                        if image_path.find('train') != -1:
                            count_i5_trian+=1
                        count_i5 += 1
                        count += 1
                    if object_category == 'w57':
                        if flag_category == '':
                            flag_category = object_category
                        obj += xml_obj.format(labels[3], str(object_bbox['xmin']),
                                              str(object_bbox['ymin']),
                                              str(object_bbox['xmax']), str(object_bbox['ymax']))
                        if image_path.find('train') != -1:
                            count_w57_trian+=1
                        count_w57 += 1
                        count += 1
                    print('x_rang:', x_rang)
                    print('y_range:', y_rang)
            #表示有合适类别的小目标的时候开始创建xml文件,存放在了test和train文件夹下面
            if flag_category!='' :
                if image_path.find('train') != -1:
                    if flag_category=='pne':
                        count_img_pne_train+=1
                        xml_name = os.path.join(xml_dir + 'trains/pne/', str(image_id) + '.xml')
                    if flag_category=='p11':
                        count_img_p11_train+=1
                        xml_name = os.path.join(xml_dir + 'trains/p11/', str(image_id) + '.xml')
                    if flag_category=='i5':
                        count_img_i5_train+=1
                        xml_name = os.path.join(xml_dir + 'trains/i5/', str(image_id) + '.xml')
                    if flag_category=='w57':
                        count_img_w57_train+=1
                        xml_name = os.path.join(xml_dir + 'trains/w57/', str(image_id) + '.xml')
                else :
                    if flag_category=='pne':
                        count_img_pne_test+=1
                        xml_name = os.path.join(xml_dir + 'tests/pne/', str(image_id) + '.xml')
                    if flag_category=='p11':
                        count_img_p11_test+=1
                        xml_name = os.path.join(xml_dir + 'tests/p11/', str(image_id) + '.xml')
                    if flag_category=='i5':
                        count_img_i5_test+=1
                        xml_name = os.path.join(xml_dir + 'tests/i5/', str(image_id) + '.xml')
                    if flag_category=='w57':
                        count_img_w57_test+=1
                        xml_name = os.path.join(xml_dir + 'tests/w57/', str(image_id) + '.xml')
                count_image+=1
                print("xml_name:"+xml_name)
                print("image_id:",image_id)
                #xml的头部信息传入参数值，文件名字和类别
                head=xml_head.format(str(xml_name),str(flag_category))
                #xml的尾部信息
                end=xml_end
                #向xml文件中写入内容
                with open(xml_name, 'w') as f:
                    f.write(head+obj+end)
    print('count:', count)

    print('count_pne:', count_pne)
    print('count_pne_trian:',count_pne_trian)
    print('count_pne_test:',count_pne - count_pne_trian)

    print('count_p11:', count_p11)
    print('count_p11_trian:', count_p11_trian)
    print('count_p11_test:', count_p11 - count_p11_trian)

    print('count_i5:', count_i5)
    print('count_i5_trian:', count_i5_trian)
    print('count_i5_test:', count_i5 - count_i5_trian)

    print('count_w57:', count_w57)
    print('count_w57_trian:', count_w57_trian)
    print('count_w57e_test:', count_w57 - count_w57_trian)

    print('count_img_pne_train:',count_img_pne_train)
    print('count_img_p11_train:',count_img_p11_train)
    print('count_img_i5_train:',count_img_i5_train)
    print('count_img_w57_train:',count_img_w57_train)

    print('count_img_pne_test:',count_img_pne_test)
    print('count_img_p11_test:',count_img_p11_test)
    print('count_img_i5_test:',count_img_i5_test)
    print('count_img_w57_test:',count_img_w57_test)
    print('count_image:', count_image)
count值：