个人记录,写的不清楚,主要是给自己看的。
时间:2023.10.26
情况说明:
1.从mongoDB读取信息
2.根据需求筛选字段,字段在yaml文件中,统计满足字段的数据集的分布情况
数据分布要求:
1.总数用柱状图表述。
2.颜色,饱和度低一些好看
3.显示柱状图的每个坐标
4.一张图中显示多个子饼图。
遇到的问题:
1.横坐标显示超出图像范围,解决:plt.tight_layout()
2.或者横坐标之间文字重叠,解决:旋转横坐标plt.xticks(rotation=20)
3.在柱状图的右侧添加文字批注,文字显示不全,解决:plt.tight_layout()
4.颜色修改的好看些,低饱和度,解决:colors = ['#FF3838', '#FF9D97', '#FF701F', '#FFB21D', '#CFD231', '#48F90A', '#92CC17', '#3DDB86', '#1A9334', '#00D4BB', '#2C99A8', '#00C2FF', '#344593', '#6473FF', '#0018EC', '#8438FF', '#520085', '#CB38FF', '#FF95C8', '#FF37C7']
这是yolov5中采用的colors,还是不错的
5.多个子饼状图,每个都很小,解决:对每个子图设置图像大小,fig.set_size_inches(10, 7) # 设置子图大小
6.饼状图的某几个比例特别小,文字重叠在一起,解决:不在饼状图中显示,用图例显示;
axs[i, j].pie(y_data,colors=cur_colors, explode=explode,labeldistance=1.4, pctdistance=1.2, radius=1)
labels = ['{} {:1.3f}% ({})'.format(i, j, v) for i, j, v in zip(x_label, pre_data, y_data)] # 构造图例数据
axs[i, j].legend(labels= labels, loc='lower left')
代码中有带没用到的:
1.引导线,没用原因:用了引导线,文字还是重叠,引导线不适合我,应该可以改起末坐标,但不想改了。
整体代码如下:
is_in_rows: ["False","True"]
occlusion: ["no","light","medium","heavy"]
occlusion_type: ["unknown","objects","non_objects"]
truncation: ["no","light","medium","heavy"]
orientation: ["0","1","2","3","4","5","6","7","8"]
# 注意:
# False 和 True需要写成string格式"False","True"
#python 文件
import tqdm
import os
import sys
from ingot import common_functions as helper
from pymongo import MongoClient
import csv
import matplotlib.pyplot as plt
import yaml
import argparse
import numpy as np
import pandas as pd
def stardust_match_category(old_c):
if old_c in ["Vehicle_Car", "Vehicle_Suv", "Vehicle_Mpv", "Vehicle_Special", "Vehicle_SmallManpowerVehicle"]:
return "four_wheeler_small"
elif old_c in ["Vehicle_Bus", "Vehicle_Truck", "Vehicle_Unknown", "Vehicle_Other"]:
return "four_wheeler_large"
elif old_c in ["Vehicle_Bicycle", "Vehicle_Motorcycle", "Bicycle_Motorcycle", "Bicycle_Bicycle", "Bicycle_Other"]:
return "two_wheeler"
elif old_c in ["Human_Pedestrian", "Human_Other", "Human_Trafficpolice"]:
return "pedestrian"
elif old_c in ["Barrier_Cone"]:
return "barrier_cone"
elif old_c in ["Barrier_Locker_Locked", "Barrier_Locker_Unlocked", "Barrier_Locker"]:
return "locker"
else:
return False
def Statistic(json_data,writer,count,input_propertys_keys):
objects = json_data['objects']
for object in objects:
property = object["property"] # 信息都在property中
category = property["category"]
category_ = stardust_match_category(category) # 分为5大类
if not category_:continue
cur=[category_]
for input_property in input_propertys_keys:
if input_property in property:
cur.append(str(property[input_property]))
else:
cur.append("-")
if cur in writer:
index = writer.index(cur)
count[index] += 1
else:
writer.append(cur)
count.append(1)
return writer,count
def drawpie(categery,context,allnum,input_propertys,save_root):
# 颜色设置
colors = ['#FF3838', '#FF9D97', '#FF701F', '#FFB21D', '#CFD231', '#48F90A', '#92CC17', '#3DDB86', '#1A9334', '#00D4BB',
'#2C99A8', '#00C2FF', '#344593', '#6473FF', '#0018EC', '#8438FF', '#520085', '#CB38FF', '#FF95C8', '#FF37C7']
# colors = ['#FF3838','#FFB21D', '#92CC17', '#3DDB86', '#1A9334', '#00D4BB',
# '#2C99A8', '#00C2FF', '#344593', '#6473FF', '#0018EC', '#8438FF', '#520085', '#CB38FF', '#FF95C8', '#FF37C7']
# 颜色分配
colors_dic={}
i=0
# 数据统计
head = list(input_propertys.keys())
count_head={}
for k , vs in input_propertys.items():
count_head[k]={}
for v in vs:
count_head[k][v]=0# 数据统计
if v not in colors_dic.keys():# 颜色分配
colors_dic[v]=colors[i]
i+=1
colors_dic['-']=colors[i]
for line in context:
for i in range(1,len(line)-1):
pro = head[i - 1]
if line[i] not in count_head[pro].keys():
count_head[pro][line[i]] = int(line[-1])
else:
count_head[pro][line[i]]+=int(line[-1])
# 检查每个pro,如果当前有“-”,说明该类别没有合格字段,则删除,如果某个字段数量为0,则删除
for k in list(count_head.keys()):
kv=count_head[k]
if "-" in kv.keys():
del count_head[k]
head.remove(k)
continue
for ck in list(kv.keys()):
if kv[ck] == 0:
del kv[ck]
# 图分布格式
len_count_head=len(list(count_head.keys()))
if len_count_head%2==0:
row=len_count_head//2
col=len_count_head//row
else:
col = (len_count_head+1)//2
row = (len_count_head+1)//col
# 绘制饼状图
plt.figure(figsize=(len_count_head,len_count_head))# 设置画布大小
fig, axs = plt.subplots(row, col)
fig.suptitle(categery, fontsize=14, fontweight='bold') # 设置整个图片的标题
fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=0.5, hspace=0.5)
plt.axis('equal') # 显示为圆(避免比例压缩为椭圆)
plt.tight_layout() # 防止文字溢出图片
# 绘制饼图
label_set = set()
count_head_pos=0
for i in range(row):
for j in range(col):
if count_head_pos>len(head)-1:
fig.delaxes(axs[i][j]) # 删除对应坐标
count_head_pos += 1
continue
y_data = list(count_head[head[count_head_pos]].values()) # 标签数值
x_label = list(count_head[head[count_head_pos]].keys()) # 标签
# label_set.update(x_label)
pre_data = list(map(lambda x:x/allnum,y_data)) # 占比
explode =[] # 突出设置
kw = 0
for x in pre_data:
if x < 0.001:
explode.append(0.3)
kw +=1
elif x < 0.01:
explode.append(0.2)
kw +=1
elif x < 0.05: explode.append(0.1)
else: explode.append(0)
cur_colors = [] # 颜色设置
for x in x_label:cur_colors.append(colors_dic[x])
kw = dict(arrowprops=dict(arrowstyle="-"), zorder=0, va="center") if kw>1 else False
if row==1:
axs[j].set_title(head[count_head_pos])
if kw:
wedges = axs[j].pie(y_data, colors=cur_colors, explode=explode, autopct='%3.3f%%',
labeldistance=1.2, pctdistance=1.4, radius=1)
else:
wedges = axs[j].pie(y_data, labels=x_label, colors=cur_colors, explode=explode, autopct='%3.3f%%',
labeldistance=1.1, pctdistance=0.7, radius=1)
# axs[j].legend(labels=x_label,loc='best')# 右上角坐标显示
# axs[j].legend(frameon=False)
else:
axs[i, j].set_title(head[count_head_pos])
if kw:
wedges = axs[i, j].pie(y_data,colors=cur_colors, explode=explode,
labeldistance=1.4, pctdistance=1.2, radius=1)
labels = ['{} {:1.3f}% ({})'.format(i, j, v) for i, j, v in zip(x_label, pre_data, y_data)] # 构造图例数据
axs[i, j].legend(labels= labels, loc='lower left')
# axs[i, j].legend(labels=x_label, loc='lower left') # 右上角坐标显示
plt.tight_layout()
else:
wedges = axs[i, j].pie(y_data, labels=x_label, colors=cur_colors, explode=explode, autopct='%3.3f%%',
labeldistance=1.1, pctdistance=0.7, radius=1)
plt.tight_layout()
# axs[i, j].legend(frameon=False)
# if kw: # 当有较小的数值时,为防止文字重叠,采用引导线
# # 遍历饼块绘制注释标签和引导线
# for ii, p in enumerate(wedges[0]):
# ang = (p.theta2 - p.theta1) / 2.0 + p.theta1
# # 根据角度的弧度计算 饼块均分点的坐标(引导线的起点x,y)
# y = np.sin(np.deg2rad(ang)) #
# x = np.cos(np.deg2rad(ang))
# horizontalalignment = {-1: "right", 1: "left"}[int(np.sign(x))] # 由x判断左右
# # 设置引导线的连接方式
# connectionstyle = "angle,angleA=0,angleB={}".format(ang)
# kw["arrowprops"].update({"connectionstyle": connectionstyle})
# # 绘制注释标签和引导线
# if row==1:
# axs[j].annotate(
# x_label[ii],
# xy=(x, y),
# xytext=(1.35 * np.sign(x), 1.4 * y),
# horizontalalignment=horizontalalignment,
# **kw
# )
# else:
# axs[i, j].annotate(
# x_label[ii],
# xy=(x, y),
# xytext=(1.5 * np.sign(x), 1.4* y),
# horizontalalignment=horizontalalignment,
# **kw
# )
fig.set_size_inches(10, 7) # 设置子图大小
# plt.tight_layout() # 防止文字溢出图片
count_head_pos+=1
# fig.legend(label_set, loc='lower right',prop={'size': 8}) # 图例位置,大小
plt.tight_layout()
plt.savefig(os.path.join(save_root, categery+'.png'))
plt.close()
def drawFigure(context,input_propertys,save_root):
colors = ['#FF3838', '#FF9D97', '#FF701F', '#FFB21D', '#CFD231', '#48F90A', '#92CC17', '#3DDB86', '#1A9334', '#00D4BB',
'#2C99A8', '#00C2FF', '#344593', '#6473FF', '#0018EC', '#8438FF', '#520085', '#CB38FF', '#FF95C8', '#FF37C7']
x_data = ["four_wheeler_small","four_wheeler_large","two_wheeler","pedestrian","barrier_cone","locker"]
# 选择要绘制的指标
four_wheeler_small_dic=[]
four_wheeler_large_dic=[]
two_wheeler_dic=[]
pedestrian_dic=[]
barrier_cone_dic=[]
locker_dic=[]
# 选择要绘制的指标
four_wheeler_small_AllNum=0
four_wheeler_large_AllNum=0
two_wheeler_AllNum=0
pedestrian_AllNum=0
barrier_cone_AllNum=0
locker_AllNum=0
head = list(input_propertys.keys())
for line in context:
ok = True
for key, value in input_propertys.items():
i = head.index(key)
p = line[i+1]
if p not in value+["-"]:
ok = False
break
if ok:
if line[0]=="four_wheeler_small":
four_wheeler_small_AllNum+=int(line[len(head)+1])
four_wheeler_small_dic.append(line)
elif line[0]=="four_wheeler_large":
four_wheeler_large_AllNum+=int(line[len(head)+1])
four_wheeler_large_dic.append(line)
elif line[0] == "two_wheeler":
two_wheeler_AllNum += int(line[len(head)+1])
two_wheeler_dic.append(line)
elif line[0] == "pedestrian":
pedestrian_AllNum += int(line[len(head) + 1])
pedestrian_dic.append(line)
elif line[0] == "barrier_cone":
barrier_cone_AllNum += int(line[len(head)+1])
barrier_cone_dic.append(line)
elif line[0] == "locker":
locker_AllNum += int(line[len(head)+1])
locker_dic.append(line)
else:
print("有误")
print(f"four_wheeler_small={four_wheeler_small_AllNum},four_wheeler_large={four_wheeler_large_AllNum},"
f"two_wheeler={two_wheeler_AllNum},pedestrian={pedestrian_AllNum},barrier_cone={barrier_cone_AllNum},locker={locker_AllNum}")
y_data = [four_wheeler_small_AllNum,four_wheeler_large_AllNum,two_wheeler_AllNum,pedestrian_AllNum,barrier_cone_AllNum,locker_AllNum]
# 第一张图,总体显示,柱状
fig = plt.figure(figsize=(8, 6),dpi=900)
fig.suptitle('Total Statistics', fontsize=14, fontweight='bold')
for i in range(len(x_data)):
plt.bar(x_data[i], y_data[i],color =colors[i])
for i, j in zip(x_data, y_data):
plt.text(i, j, str(int(j)),ha="center", va="bottom", fontsize=10)
plt.legend(frameon=False, loc='upper right', labels=x_data)# 右上角坐标显示
plt.xticks(rotation=20) # x轴上的标签旋转45度
plt.tick_params(axis='both', labelsize=10)# 设置显示字体的大小
# 添加批注 ,显示筛选条件
string="Filter Criteria:\n"
for k, v in input_propertys.items():
string+=str(k)+str(v)+"\n"
plt.text(len(y_data), (four_wheeler_small_AllNum // 3) * 2, string, fontsize=8, ha='left')
plt.tight_layout() # 防止文字溢出图片
plt.savefig(os.path.join(save_root, 'TotalStatistics.png'))
plt.close()
# 第2-第7张,显示每个类别的具体信息。
drawpie("four_wheeler_small", four_wheeler_small_dic,four_wheeler_small_AllNum,input_propertys,save_root)
drawpie("four_wheeler_large", four_wheeler_large_dic,four_wheeler_large_AllNum,input_propertys,save_root)
drawpie("two_wheeler", two_wheeler_dic,two_wheeler_AllNum,input_propertys,save_root)
drawpie("pedestrian", pedestrian_dic, pedestrian_AllNum, input_propertys, save_root)
drawpie("barrier_cone", barrier_cone_dic, barrier_cone_AllNum, input_propertys, save_root)
drawpie("locker", locker_dic, locker_AllNum, input_propertys, save_root)
def read_yaml(file_path):
with open(file_path, "r") as f:
return yaml.safe_load(f)
def parse_opt():
parser = argparse.ArgumentParser()
parser.add_argument('--yaml_file', type=str, default="StatisticalData_basedColumnFields.yaml", help='Statistics Field')
parser.add_argument('--img_save_root', type=str, default="/media/zjm/065ba217-589f-490c-b6f7-31a14613bf8d/zjm/data/processdata_code/drawstatistic", help='Image Save Path')
parser.add_argument('--cvs_save_file', default="/media/zjm/065ba217-589f-490c-b6f7-31a14613bf8d/zjm/data/processdata_code/drawstatistic/statistic_20231024.csv", help='CSV Save Path or False')
opt = parser.parse_args()
return opt
def main(opt):
# 参数设置
yaml_file=opt.yaml_file
img_save_root=opt.img_save_root
cvs_save_file = opt.cvs_save_file
input_propertys = read_yaml(yaml_file)
'''
{'db_name': 'perception',
'db_url': 'mongodb://cluster_user:s9cSDt%407%23ySA4Gp78%23@10.44.161.8:28017,10.44.161.9:28017,10.44.161.10:28017/',
'qingstor_s3_config_data': {'host': 'stor.ecarxmap.com', 'port': 80, 'protocol': 'http', 'connection_retries': 10, 'log_level': 'info', 'access_key_id': 'RFURYCNCUJEZNUMXYQUO', 'secret_access_key': 'VlikEqgejVhSC5zQ9biMCF1OUdqCIsf2MpVeUb59'},
'qingstor_s3_bucket_name': 'perception',
'qingstor_s3_zone_name': 'ecarx'
}
'''
# # 连接mongodb
# # init_dict = helper.get_secret_db_creds(use_test_db=False)
# # db_url = init_dict["db_url"]
# db_url = 'mongodb://cluster_user:s9cSDt%407%23ySA4Gp78%23@10.44.161.8:28017,10.44.161.9:28017,10.44.161.10:28017/'
# mongo_collection = MongoClient(db_url)
# db_name = "ParkEXcalibur"
# collection = "manual_label"
# collection_handler = mongo_collection[db_name][collection]
# manual_label_jsons = collection_handler.find(no_cursor_timeout = True)
#
# writer=[]
# count = []
# for json_data in tqdm.tqdm(manual_label_jsons):
# writer,count = Statistic(json_data,writer,count,list(input_propertys.keys()))
#
# # 生成详细的数据统计csv
# context=[]
# if cvs_save_file:
# with open(cvs_save_file, 'w', newline='') as f:
# w = csv.writer(f)
#
# w.writerow(["category"]+list(input_propertys.keys())+["count"])
# for i in range(len(writer)):
# cur = writer[i]+[count[i]]
# context.append(cur)
# w.writerow(cur)
# else:
# for i in range(len(writer)):
# cur = writer[i] + [count[i]]
# context.append(cur)
# 生成每个类别的饼状图
# 需要显示的重要指标
context = []
with open(cvs_save_file, "r") as csvfile:
csvreader = csv.reader(csvfile)
for row in csvreader:
context.append(row)
drawFigure(context,input_propertys,img_save_root)
if __name__ == "__main__":
opt = parse_opt()
main(opt)