情景1:使用labelimg标注时,标签名写错了,需要批量改名,代码如下:
'''
task:改变name字段由WuLongTea-sandeliwww 改为WuLongTea-sandeli
date:2022-4-15
author:epiphany
ref:https://blog.csdn.net/weixin_42384743/article/details/113982063
'''
import os
import os.path
import xml.dom.minidom
path="H:\\Acolab\\goods\\WuLongTea-sandeli\\annotations" #存放大量xml文件的路径,win下为两个\\
files=os.listdir(path) #返回文件夹中的文件名列表
#print(files)
s=[]
for xmlFile in files:
if not os.path.isdir(xmlFile): #os.path.isdir()用于判断对象是否为一个目录;如果不是目录,则直接打开
print(xmlFile)
# dom=xml.dom.minidom.parse(path+'\\'+xmlFile) 与 dom=xml.dom.minidom.parse(os.path.join(path,xmlFile)) 作用相同
#作用是打开文档,并解析文档元素对象
dom=xml.dom.minidom.parse(os.path.join(path,xmlFile))
root=dom.documentElement
#print(dom)
name = root.getElementsByTagName('name')
for i in range(len(name)):
print(name[i].firstChild.data)
name[i].firstChild.data='WuLongTea-sandeli' #重新赋值
print(name[i].firstChild.data)
with open(os.path.join(path,xmlFile),'w') as fh:
dom.writexml(fh)
print("ok!!!!")
情景2:公开数据集RPC官方给定的为COCO,需要改为VOC格式,才可以训练yolov5模型,代码如下:
'''
一个coco的json文件-->N个voc的xml
'''
import os
import json
import cv2
from lxml import etree
import xml.etree.cElementTree as ET
import time
import pandas as pd
from tqdm import tqdm
from xml.dom.minidom import Document
anno = "H:/GOODSDATASET/dataset/Annotations/instances_val2019.json"
xml_dir = "H:/GOODSDATASET/dataset/voc/annotations"
# dttm = time.strftime("%Y%m%d%H%M%S", time.localtime())
# if os.path.exists(xml_dir):
# os.rename(xml_dir,xml_dir+dttm)
# os.mkdir(xml_dir)
import json
with open(anno, 'r') as load_f:
f = json.load(load_f)
df_anno = pd.DataFrame(f['annotations'])
imgs = f['images']
cata={}
nameList=[ '1_puffed_food', '2_puffed_food', '3_puffed_food', '4_puffed_food', '5_puffed_food', '6_puffed_food', '7_puffed_food',
'8_puffed_food', '9_puffed_food', '10_puffed_food', '11_puffed_food', '12_puffed_food', '13_dried_fruit', '14_dried_fruit', '15_dried_fruit',
'16_dried_fruit', '17_dried_fruit', '18_dried_fruit', '19_dried_fruit', '20_dried_fruit', '21_dried_fruit', '22_dried_food', '23_dried_food',
'24_dried_food', '25_dried_food', '26_dried_food', '27_dried_food', '28_dried_food', '29_dried_food', '30_dried_food', '31_instant_drink',
'32_instant_drink', '33_instant_drink', '34_instant_drink', '35_instant_drink', '36_instant_drink', '37_instant_drink', '38_instant_drink',
'39_instant_drink', '40_instant_drink', '41_instant_drink', '42_instant_noodles', '43_instant_noodles', '44_instant_noodles',
'45_instant_noodles', '46_instant_noodles', '47_instant_noodles', '48_instant_noodles', '49_instant_noodles', '50_instant_noodles',
'51_instant_noodles', '52_instant_noodles', '53_instant_noodles', '54_dessert', '55_dessert', '56_dessert', '57_dessert', '58_dessert',
'59_dessert', '60_dessert', '61_dessert', '62_dessert', '63_dessert', '64_dessert', '65_dessert', '66_dessert', '67_dessert', '68_dessert',
'69_dessert', '70_dessert', '71_drink', '72_drink', '73_drink', '74_drink', '75_drink', '76_drink', '77_drink', '78_drink', '79_alcohol',
'80_alcohol', '81_drink', '82_drink', '83_drink', '84_drink', '85_drink', '86_drink', '87_drink', '88_alcohol', '89_alcohol', '90_alcohol',
'91_alcohol', '92_alcohol', '93_alcohol', '94_alcohol', '95_alcohol', '96_alcohol', '97_milk', '98_milk', '99_milk', '100_milk', '101_milk',
'102_milk', '103_milk', '104_milk', '105_milk', '106_milk', '107_milk', '108_canned_food', '109_canned_food', '110_canned_food',
'111_canned_food', '112_canned_food', '113_canned_food', '114_canned_food', '115_canned_food', '116_canned_food', '117_canned_food',
'118_canned_food', '119_canned_food', '120_canned_food', '121_canned_food', '122_chocolate', '123_chocolate', '124_chocolate', '125_chocolate',
'126_chocolate', '127_chocolate', '128_chocolate', '129_chocolate', '130_chocolate', '131_chocolate', '132_chocolate', '133_chocolate', '134_gum',
'135_gum', '136_gum', '137_gum', '138_gum', '139_gum', '140_gum', '141_gum', '142_candy', '143_candy', '144_candy', '145_candy', '146_candy',
'147_candy', '148_candy', '149_candy', '150_candy', '151_candy', '152_seasoner', '153_seasoner', '154_seasoner', '155_seasoner', '156_seasoner',
'157_seasoner', '158_seasoner', '159_seasoner', '160_seasoner', '161_seasoner', '162_seasoner', '163_seasoner', '164_personal_hygiene',
'165_personal_hygiene', '166_personal_hygiene', '167_personal_hygiene', '168_personal_hygiene', '169_personal_hygiene', '170_personal_hygiene',
'171_personal_hygiene', '172_personal_hygiene', '173_personal_hygiene', '174_tissue', '175_tissue', '176_tissue', '177_tissue', '178_tissue',
'179_tissue', '180_tissue', '181_tissue', '182_tissue', '183_tissue', '184_tissue', '185_tissue', '186_tissue', '187_tissue', '188_tissue',
'189_tissue', '190_tissue', '191_tissue', '192_tissue', '193_tissue', '194_stationery', '195_stationery', '196_stationery', '197_stationery',
'198_stationery', '199_stationery', '200_stationery']
flag=0
def createCate():
global cata
df_cate = f['categories']
for item in df_cate:
cata[item['id']]=item['name']
def json2xml():
global cata
global flag
#global imageSum
for im in imgs:
#imageSum = imageSum+1
flag = 0
filename = im['file_name']
height = im['height']
img_id = im['id']
width = im['width']
doc = Document()
annotation = doc.createElement('annotation')
doc.appendChild(annotation)
filenamedoc = doc.createElement("filename")
annotation.appendChild(filenamedoc)
filename_txt=doc.createTextNode(filename)
filenamedoc.appendChild(filename_txt)
size = doc.createElement("size")
annotation.appendChild(size)
widthdoc = doc.createElement("width")
size.appendChild(widthdoc)
width_txt = doc.createTextNode(str(width))
widthdoc.appendChild(width_txt)
heightdoc = doc.createElement("height")
size.appendChild(heightdoc)
height_txt = doc.createTextNode(str(height))
heightdoc.appendChild(height_txt)
annos = df_anno[df_anno["image_id"].isin([img_id])]
for index, row in annos.iterrows():
bbox = row["bbox"]
category_id = row["category_id"]
cate_name = cata[category_id]
if cate_name not in nameList:
print(cate_name+",don`t in namelis")
continue
flag=1
#nameNum[cate_name]=nameNum[cate_name]+1
object = doc.createElement('object')
annotation.appendChild(object)
name = doc.createElement('name')
object.appendChild(name)
name_txt = doc.createTextNode(cate_name)
name.appendChild(name_txt)
pose = doc.createElement('pose')
object.appendChild(pose)
pose_txt = doc.createTextNode('Unspecified')
pose.appendChild(pose_txt)
truncated = doc.createElement('truncated')
object.appendChild(truncated)
truncated_txt = doc.createTextNode('0')
truncated.appendChild(truncated_txt)
difficult = doc.createElement('difficult')
object.appendChild(difficult)
difficult_txt = doc.createTextNode('0')
difficult.appendChild(difficult_txt)
bndbox = doc.createElement('bndbox')
object.appendChild(bndbox)
xmin = doc.createElement('xmin')
bndbox.appendChild(xmin)
xmin_txt = doc.createTextNode(str(int(bbox[0])))
xmin.appendChild(xmin_txt)
ymin = doc.createElement('ymin')
bndbox.appendChild(ymin)
ymin_txt = doc.createTextNode(str(int(bbox[1])))
ymin.appendChild(ymin_txt)
xmax = doc.createElement('xmax')
bndbox.appendChild(xmax)
xmax_txt = doc.createTextNode(str(int(bbox[0]+bbox[2])))
xmax.appendChild(xmax_txt)
ymax = doc.createElement('ymax')
bndbox.appendChild(ymax)
ymax_txt = doc.createTextNode(str(int(bbox[1]+bbox[3])))
ymax.appendChild(ymax_txt)
if flag==1:
xml_path = os.path.join(xml_dir,filename.replace('.jpg','.xml'))
f = open(xml_path, "w")
f.write(doc.toprettyxml(indent=" "))
f.close()
createCate()
json2xml()
#print('imagenum:',imageSum)
#print(nameNum)