#计算txt标签格式每个类别的数量
import os
from tqdm import tqdm
def get_every_class_num(txt_path):
# 需修改,根据自己的类别,注意一一对应
class_categories=['Cahua', 'Crazing', 'Patches', 'Inclusion', 'Uneven', 'Blowhole', 'Break', 'Crack', 'Yueyawan', 'Zhehen', 'Siban', 'Shuiban', 'Hanfeng', 'Yiwu', 'Youban', 'Yahen', 'Chongkong', 'Yaozhe', 'Pengshang', 'Pitted_Surface', 'Rolled-in_Scale', 'Scratches', 'Tufen']
class_num = len(class_categories) # 样本类别数
class_list = [i for i in range(class_num)]
class_num_list = [0 for i in range(class_num)]
labels_list = os.listdir(txt_path)
for i in tqdm(labels_list):
file_path = os.path.join(txt_path, i)
file = open(file_path, 'r') # 打开文件
file_data = file.readlines() # 读取所有行
for every_row in file_data:
class_val = every_row.split(' ')[0]
class_ind = class_list.index(int(class_val))
class_num_list[class_ind] += 1
file.close()
# 输出每一类的数量以及总数
result=dict(zip(class_categories,class_num_list))
for name,num in result.items():
print(name,":",num)
print("-----------------------------------")
print('total:', sum(class_num_list))
if __name__ == '__main__':
# 需修改,txt文件所在路径
txt_path = r'D:\desk\defect_dataset\labels\val'
get_every_class_num(txt_path)
需要修改部分讲解
第5行
根据自己的类别进行修改,注意对应自己的类别
# 需修改,根据自己的类别,注意一一对应
class_categories=['Cahua', 'Crazing', 'Patches', 'Inclusion', 'Uneven', 'Blowhole', 'Break', 'Crack', 'Yueyawan', 'Zhehen', 'Siban', 'Shuiban', 'Hanfeng', 'Yiwu', 'Youban', 'Yahen', 'Chongkong', 'Yaozhe', 'Pengshang', 'Pitted_Surface', 'Rolled-in_Scale', 'Scratches', 'Tufen']
第27行
修改为自己的标签所在路径即可
# 需修改,txt文件所在路径
txt_path = r'D:\desk\defect_dataset\labels\val'
实验准备
YOLO格式标签若干张
运行结果
附:利用VOC格式统计类别数
# -*- coding:utf-8 -*-
# 根据xml文件统计目标种类以及数量
import os
import xml.etree.ElementTree as ET
import numpy as np
import sys
np.set_printoptions(threshold=sys.maxsize)
from PIL import Image
def parse_obj(xml_path, filename):
tree = ET.parse(xml_path + filename)
objects = []
for obj in tree.findall('object'):
obj_struct = {}
obj_struct['name'] = obj.find('name').text
objects.append(obj_struct)
return objects
def read_image(image_path, filename):
im = Image.open(image_path + filename)
W = im.size[0]
H = im.size[1]
area = W * H
im_info = [W, H, area]
return im_info
if __name__ == '__main__':
#需修改为自己的VOC标签路径
xml_path = 'D:\desk\defect_dataset\VOC/'
filenamess = os.listdir(xml_path)
filenames = []
for name in filenamess:
name = name.replace('.xml', '')
filenames.append(name)
recs = {}
obs_shape = {}
classnames = []
num_objs = {}
obj_avg = {}
for i, name in enumerate(filenames):
recs[name] = parse_obj(xml_path, name + '.xml')
for name in filenames:
for object in recs[name]:
if object['name'] not in num_objs.keys():
num_objs[object['name']] = 1
else:
num_objs[object['name']] += 1
if object['name'] not in classnames:
classnames.append(object['name'])
for name in classnames:
print('{}:{}个'.format(name, num_objs[name]))
print('信息统计算完毕。')
运行此代码仅需修改
第29行即可运行
本文代码借鉴了:Python 统计YOLO(txt)格式标签中各类别样本数_yolo中如何查看标签中每个类别数量多少-CSDN博客
目标检测中计算数据集每一类别框的数量_目标检测预测结果可以知道预测框的数量吗-CSDN博客
代码仅供自己学习使用