脚本功能:
提示:遍历文件夹中的json文件,统计标签数量
效果图:类别被我截掉了!!!
Yolo官方训练技巧
- 111/ 是我的文件夹
- classes.txt 是类别文件
"""
功能:
统计json文件中的类别,绘制柱形图并标记高度(每一个类别的数量)
"""
import json
import os
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['SimHei'] #正常显示中文
plt.rcParams['axes.unicode_minus'] = False #x,y轴的负号显示
classes_file = 'classes.txt' # 类别文件路径
base_path = r'111/' # json路径
read_classes = open(classes_file)
classes_dict = {}
for i in read_classes.readlines():
classes_dict[i.replace('\n','')] = 0
print("类别数:",len(classes_dict))
def main():
filelist = os.listdir(base_path)
filelist.sort() #升序排列
for name in filelist:
filename = os.path.splitext(name)[0] #文件名
filename_suffix = os.path.splitext(name)[1] #后缀
try:
if filename_suffix == '.json':
fullname = base_path + filename + filename_suffix
dataJson = json.load(open("{}".format(fullname),encoding='UTF-8'))
# print("dataJson------", dataJson)
label_name = dataJson['shapes']
for _ in label_name:
classes_dict[_['label']]+=1
else:
pass
except:
pass
print("classes_dict",classes_dict)
fig,ax = plt.subplots(figsize=(10,8))
plt.title("数量")
plt.xticks(rotation = 90) #设置x轴文字方向旋转90度
bars = plt.bar(classes_dict.keys(),classes_dict.values())
print("bar:",bars)
for b in bars:
height = b.get_height()
ax.annotate("{}".format(height),
# xy控制的是,标注哪个点,x=x坐标+width/2,y=height,即柱子上平面的中间
xy = (b.get_x()+b.get_width() / 2,height),
xytext=(0,3), #文本位置的位置,如果有textcoords,则表示是针对xy位置的偏移,否则是图中的固定位置
textcoords = 'offset points', #两个选项,'offset pixels','offset pixels'
va = 'bottom',ha = 'center'
)
plt.show()
if __name__ == '__main__':
main()
希望对你有所帮助,下期更新 "txt"版本的 标签统计!!!!!!!!!!!!
import matplotlib.pyplot as plt
import os
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
cn_path=open("classes.txt")
class_dict={}
classes=[i.replace("/n","") for i in cn_path.readlines()]
print(classes)
for i in classes:
class_dict[i]=0
print("类别数",len(class_dict))
def main():
base_path=r"111/"
FileList=os.listdir(base_path)
# print("F+++",FileList)
for file in FileList:
if file.endswith(".txt"):
with open(base_path+file,'r') as f:
for i in f.readlines():
i=i.split(' ')
print(i)
class_dict[classes[int(i[0])]]+=1
print(class_dict)
fig, ax = plt.subplots(figsize=(10, 8))
plt.title('数量')
plt.xticks(rotation=90) # 设置x轴文字方向旋转90度
bars = plt.bar(class_dict.keys(), class_dict.values())
for b in bars:
height = b.get_height()
ax.annotate('{}'.format(height),
# xy控制的是,标注哪个点,x=x坐标+width/2, y=height,即柱子上平面的中间
xy=(b.get_x() + b.get_width() / 2, height),
xytext=(0, 3), # 文本放置的位置,如果有textcoords,则表示是针对xy位置的偏移,否则是图中的固定位置
textcoords="offset points", # 两个选项 'offset pixels','offset pixels'
va='bottom', ha='center'# 代表verticalalignment 和horizontalalignment,控制水平对齐和垂直对齐。
)
plt.show()
if __name__ == '__main__':
main()
在做训练之前,务必做好标签的统计,保证数据均衡