箱线图:【Python】箱图boxplot--统计数据、观察数据利器-CSDN博客
多个子图:python 可视化:fig, ax = plt.subplots()画多表图的3中常见样例 & 自定义图表格式-CSDN博客
常规的绘制:
import matplotlib.pyplot as plt
performance = {
"zero_shot": {
"gpt-3.5-turbo-0613": {
"precision": 0.7919133278407181,
"recall": 0.806282722513089,
"f1": 0.7807530967691199
},
"gpt-4-0613": {
"precision": 0.9314722577069027,
"recall": 0.9267015706806283,
"f1": 0.9271956481845013
}
},
"few_shot": {
"gpt-3.5-turbo-0613": {
"precision": 0.8435247936255214,
"recall": 0.8586387434554974,
"f1": 0.8447984162323493
},
"gpt-4-0613": {
"precision": 0.9407759040163695,
"recall": 0.9267015706806283,
"f1": 0.9302632275594479
}
}
}
# Transform data into a new dictionary with four keys
by_model_and_context = {}
for context_type, models_data in performance.items():
for model, metrics in models_data.items():
key = f"{model}_{context_type}"
by_model_and_context[key] = metrics
# Extracting the model names and the metric values
models = list(by_model_and_context.keys())
metrics = list(by_model_and_context[models[0]].keys())
# Plotting the bar chart with metric scores on top of each bar
fig, ax = plt.subplots(figsize=(10, 4)) # 先创建一个画布
width = 0.2 # 间隔宽度
x = range(len(models)) # 每个元素的初始刻度
for i, metric in enumerate(metrics):
metric_values = [by_model_and_context[model][metric] for model in models]
ax.bar([pos + width * i for pos in x], metric_values, width, label=metric) # 一次绘制一个所有模型的一个指标,方便之后的legend
# Displaying the metric scores on top of each bar
for pos, val in zip(x, metric_values):
ax.text(pos + width * i, val, f'{val:.3f}', ha='center', va='bottom', fontsize=9)
ax.set_xticks([pos + width for pos in x])
ax.set_xticklabels(models, rotation=0, ha='center', fontsize=8)
ax.set_ylabel('Performance')
ax.set_title('GPT Benchmarks')
ax.legend(loc='upper left', bbox_to_anchor=(1, 1))
plt.tight_layout()
plt.show()
效果图如下: