目录
统计标签类别名称和数量
统计XML文件内标签的种类和其数量_鲁破特之泪的博客-CSDN博客_统计xml
import os
from tqdm import tqdm
import xml.dom.minidom
def ReadXml(FilePath):
if os.path.exists(FilePath) is False:
return None
dom = xml.dom.minidom.parse(FilePath)
root_ = dom.documentElement
object_ = root_.getElementsByTagName('object')
info = []
for object_1 in object_:
name = object_1.getElementsByTagName("name")[0].firstChild.data
bndbox = object_1.getElementsByTagName("bndbox")[0]
xmin = int(bndbox.getElementsByTagName("xmin")[0].firstChild.data)
ymin = int(bndbox.getElementsByTagName("ymin")[0].firstChild.data)
xmax = int(bndbox.getElementsByTagName("xmax")[0].firstChild.data)
ymax = int(bndbox.getElementsByTagName("ymax")[0].firstChild.data)
info.append([xmin, ymin, xmax, ymax, name])
return info
def CountLabelKind(Path):
LabelDict = {}
print("Star to count label kinds....")
for root, dirs, files in os.walk(Path):
for file in tqdm(files):
if file[-1] == 'l':
Infos = ReadXml(root + "\\" + file)
for Info in Infos:
if Info[-1] not in LabelDict.keys():
LabelDict[Info[-1]] = 1
else:
LabelDict[Info[-1]] += 1
return dict(sorted(LabelDict.items(), key=lambda x: x[0]))
if __name__ == '__main__':
SrcDir = "./Annotations"
LabelDict = CountLabelKind(SrcDir)
KeyDict = sorted(LabelDict)
print("%d kind labels and %d labels in total:" % (len(KeyDict), sum(LabelDict.values())))
print(KeyDict)
print("Label Name and it's number:")
for key in KeyDict:
print("%s\t: %d" % (key, LabelDict[key]))
修改标签名称
python实现对LabelImg标注的xml文件修改其标签名字_一颗小萌新的博客-CSDN博客_labelimg修改标签名称
通过python批量修改已经标注的数据标签名字
import os.path
import glob
import xml.etree.ElementTree as ET
path = './Annotations' # 存储标签的路径,修改为自己的Annotations标签路径
for xml_file in glob.glob(path + '/*.xml'):
# 返回解析树
tree = ET.parse(xml_file)
# 获取根节点
root = tree.getroot()
# 对所有目标进行解析
for member in root.findall('object'):
objectname = member.find('name').text
if objectname == '原来的标签名字': # 原来的标签名字
print(objectname)
member.find('name').text = str('替换的标签名字') # 替换的标签名字
tree.write(xml_file)
检查JPG与标签XML一致
检查JPG与其标签的XML是否一一对应_鲁破特之泪的博客-CSDN博客
在手动标注自己的数据集,或请人帮忙标注数据集之后,应对检查是否每一个JPG文件都有一个对应的XML文件(检查是否标全),每一个XML文件是否都对应一个JPG文件(图片标了觉得图片太模糊而将图片删除,会遗留多余的XML文件)
修改JpgDir为JPG所在目录,XmlDir为XML文件所在目录,具体代码如下:
import os
XmlDir = "./Annotations"
JpgDir = "./images"
NoXml = []
NoJpg = []
for root, dirs, files in os.walk(JpgDir):
for file in files:
if file[-1] == 'g':
if os.path.exists(XmlDir + "\\"+file[:-3] + "xml") is False:
NoXml.append(XmlDir+"\\"+file)
for root, dirs, files in os.walk(XmlDir):
for file in files:
if file[-1] == 'l':
if os.path.exists(JpgDir + "\\"+file[:-3] + "jpg") is False:
NoJpg.append(JpgDir+"\\"+file)
if len(NoXml) == 0:
print("All jpg are labeled")
else:
print("%d unlabeled" % len(NoXml))
print(NoXml)
if len(NoJpg) == 0:
print("All xml have a jpg")
else:
print("%d xmls have no jpg" % len(NoJpg))
print(NoJpg)