【自用代码】yolov5数据预处理代码:python批量修改xml某字段,Coco格式转VOC格式

情景1:使用labelimg标注时,标签名写错了,需要批量改名,代码如下:

'''
task:改变name字段由WuLongTea-sandeliwww 改为WuLongTea-sandeli
date:2022-4-15
author:epiphany
ref:https://blog.csdn.net/weixin_42384743/article/details/113982063
'''
import os
import os.path
import xml.dom.minidom
path="H:\\Acolab\\goods\\WuLongTea-sandeli\\annotations"  #存放大量xml文件的路径,win下为两个\\
files=os.listdir(path)                                    #返回文件夹中的文件名列表
#print(files)
s=[]

for xmlFile in files:
    if not os.path.isdir(xmlFile):                         #os.path.isdir()用于判断对象是否为一个目录;如果不是目录,则直接打开

        print(xmlFile)
        # dom=xml.dom.minidom.parse(path+'\\'+xmlFile) 与 dom=xml.dom.minidom.parse(os.path.join(path,xmlFile)) 作用相同
        #作用是打开文档,并解析文档元素对象
        dom=xml.dom.minidom.parse(os.path.join(path,xmlFile))
        root=dom.documentElement
        #print(dom)

        name = root.getElementsByTagName('name')
        for i in range(len(name)):
            print(name[i].firstChild.data)
            name[i].firstChild.data='WuLongTea-sandeli'        #重新赋值
            print(name[i].firstChild.data)
        with open(os.path.join(path,xmlFile),'w') as fh:
            dom.writexml(fh)
            print("ok!!!!")

情景2:公开数据集RPC官方给定的为COCO,需要改为VOC格式,才可以训练yolov5模型,代码如下:

'''
一个coco的json文件-->N个voc的xml
'''
import os
import json
import cv2
from lxml import etree
import xml.etree.cElementTree as ET
import time
import pandas as pd
from tqdm import tqdm
from xml.dom.minidom import Document
anno = "H:/GOODSDATASET/dataset/Annotations/instances_val2019.json"
xml_dir = "H:/GOODSDATASET/dataset/voc/annotations"
# dttm = time.strftime("%Y%m%d%H%M%S", time.localtime())
# if os.path.exists(xml_dir):
#     os.rename(xml_dir,xml_dir+dttm)
# os.mkdir(xml_dir)
import json



with open(anno, 'r') as load_f:
    f = json.load(load_f)
df_anno = pd.DataFrame(f['annotations'])
imgs = f['images']
cata={}
nameList=[ '1_puffed_food', '2_puffed_food', '3_puffed_food', '4_puffed_food', '5_puffed_food', '6_puffed_food', '7_puffed_food',
           '8_puffed_food', '9_puffed_food', '10_puffed_food', '11_puffed_food', '12_puffed_food', '13_dried_fruit', '14_dried_fruit', '15_dried_fruit',
           '16_dried_fruit', '17_dried_fruit', '18_dried_fruit', '19_dried_fruit', '20_dried_fruit', '21_dried_fruit', '22_dried_food', '23_dried_food',
           '24_dried_food', '25_dried_food', '26_dried_food', '27_dried_food', '28_dried_food', '29_dried_food', '30_dried_food', '31_instant_drink',
           '32_instant_drink', '33_instant_drink', '34_instant_drink', '35_instant_drink', '36_instant_drink', '37_instant_drink', '38_instant_drink',
           '39_instant_drink', '40_instant_drink', '41_instant_drink', '42_instant_noodles', '43_instant_noodles', '44_instant_noodles',
           '45_instant_noodles', '46_instant_noodles', '47_instant_noodles', '48_instant_noodles', '49_instant_noodles', '50_instant_noodles',
           '51_instant_noodles', '52_instant_noodles', '53_instant_noodles', '54_dessert', '55_dessert', '56_dessert', '57_dessert', '58_dessert',
           '59_dessert', '60_dessert', '61_dessert', '62_dessert', '63_dessert', '64_dessert', '65_dessert', '66_dessert', '67_dessert', '68_dessert',
           '69_dessert', '70_dessert', '71_drink', '72_drink', '73_drink', '74_drink', '75_drink', '76_drink', '77_drink', '78_drink', '79_alcohol',
           '80_alcohol', '81_drink', '82_drink', '83_drink', '84_drink', '85_drink', '86_drink', '87_drink', '88_alcohol', '89_alcohol', '90_alcohol',
           '91_alcohol', '92_alcohol', '93_alcohol', '94_alcohol', '95_alcohol', '96_alcohol', '97_milk', '98_milk', '99_milk', '100_milk', '101_milk',
           '102_milk', '103_milk', '104_milk', '105_milk', '106_milk', '107_milk', '108_canned_food', '109_canned_food', '110_canned_food',
           '111_canned_food', '112_canned_food', '113_canned_food', '114_canned_food', '115_canned_food', '116_canned_food', '117_canned_food',
           '118_canned_food', '119_canned_food', '120_canned_food', '121_canned_food', '122_chocolate', '123_chocolate', '124_chocolate', '125_chocolate',
           '126_chocolate', '127_chocolate', '128_chocolate', '129_chocolate', '130_chocolate', '131_chocolate', '132_chocolate', '133_chocolate', '134_gum',
           '135_gum', '136_gum', '137_gum', '138_gum', '139_gum', '140_gum', '141_gum', '142_candy', '143_candy', '144_candy', '145_candy', '146_candy',
           '147_candy', '148_candy', '149_candy', '150_candy', '151_candy', '152_seasoner', '153_seasoner', '154_seasoner', '155_seasoner', '156_seasoner',
           '157_seasoner', '158_seasoner', '159_seasoner', '160_seasoner', '161_seasoner', '162_seasoner', '163_seasoner', '164_personal_hygiene',
           '165_personal_hygiene', '166_personal_hygiene', '167_personal_hygiene', '168_personal_hygiene', '169_personal_hygiene', '170_personal_hygiene',
           '171_personal_hygiene', '172_personal_hygiene', '173_personal_hygiene', '174_tissue', '175_tissue', '176_tissue', '177_tissue', '178_tissue',
           '179_tissue', '180_tissue', '181_tissue', '182_tissue', '183_tissue', '184_tissue', '185_tissue', '186_tissue', '187_tissue', '188_tissue',
           '189_tissue', '190_tissue', '191_tissue', '192_tissue', '193_tissue', '194_stationery', '195_stationery', '196_stationery', '197_stationery',
           '198_stationery', '199_stationery', '200_stationery']
flag=0
def createCate():
    global cata
    df_cate = f['categories']
    for item in df_cate:
        cata[item['id']]=item['name']

def json2xml():
    global cata
    global flag
    #global imageSum
    for im in imgs:
        #imageSum = imageSum+1
        flag = 0
        filename = im['file_name']
        height = im['height']
        img_id = im['id']
        width = im['width']
        doc = Document()
        annotation = doc.createElement('annotation')
        doc.appendChild(annotation)
        filenamedoc = doc.createElement("filename")
        annotation.appendChild(filenamedoc)
        filename_txt=doc.createTextNode(filename)
        filenamedoc.appendChild(filename_txt)
        size = doc.createElement("size")
        annotation.appendChild(size)
        widthdoc = doc.createElement("width")
        size.appendChild(widthdoc)
        width_txt = doc.createTextNode(str(width))
        widthdoc.appendChild(width_txt)

        heightdoc = doc.createElement("height")
        size.appendChild(heightdoc)
        height_txt = doc.createTextNode(str(height))
        heightdoc.appendChild(height_txt)

        annos = df_anno[df_anno["image_id"].isin([img_id])]
        for index, row in annos.iterrows():
            bbox = row["bbox"]
            category_id = row["category_id"]
            cate_name = cata[category_id]
            if cate_name not in nameList:
                print(cate_name+",don`t in namelis")
                continue
            flag=1
            #nameNum[cate_name]=nameNum[cate_name]+1
            object = doc.createElement('object')
            annotation.appendChild(object)

            name = doc.createElement('name')
            object.appendChild(name)
            name_txt = doc.createTextNode(cate_name)
            name.appendChild(name_txt)

            pose = doc.createElement('pose')
            object.appendChild(pose)
            pose_txt = doc.createTextNode('Unspecified')
            pose.appendChild(pose_txt)

            truncated = doc.createElement('truncated')
            object.appendChild(truncated)
            truncated_txt = doc.createTextNode('0')
            truncated.appendChild(truncated_txt)

            difficult = doc.createElement('difficult')
            object.appendChild(difficult)
            difficult_txt = doc.createTextNode('0')
            difficult.appendChild(difficult_txt)

            bndbox = doc.createElement('bndbox')
            object.appendChild(bndbox)

            xmin = doc.createElement('xmin')
            bndbox.appendChild(xmin)
            xmin_txt = doc.createTextNode(str(int(bbox[0])))
            xmin.appendChild(xmin_txt)

            ymin = doc.createElement('ymin')
            bndbox.appendChild(ymin)
            ymin_txt = doc.createTextNode(str(int(bbox[1])))
            ymin.appendChild(ymin_txt)

            xmax = doc.createElement('xmax')
            bndbox.appendChild(xmax)
            xmax_txt = doc.createTextNode(str(int(bbox[0]+bbox[2])))
            xmax.appendChild(xmax_txt)

            ymax = doc.createElement('ymax')
            bndbox.appendChild(ymax)
            ymax_txt = doc.createTextNode(str(int(bbox[1]+bbox[3])))
            ymax.appendChild(ymax_txt)
        if flag==1:
            xml_path = os.path.join(xml_dir,filename.replace('.jpg','.xml'))

            f = open(xml_path, "w")
            f.write(doc.toprettyxml(indent="  "))
            f.close()

createCate()

json2xml()
#print('imagenum:',imageSum)
#print(nameNum)

  • 1
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值