读取一个MSCOCO格式的json文件,统计数据集中的boundingbox尺寸分布,把height、width信息保存到两个list中去,然后绘制统计图,可视化尺寸分布。
还可以自己加一些别的功能
import json
import matplotlib.pyplot as plt
def bbox_distribution(json_file, small_h, small_w):
with open(json_file, 'r') as f:
data = json.load(f)
bbox_widths = []
bbox_heights = []
for annotation in data['annotations']:
bbox = annotation['bbox']
bbox_widths.append(bbox[2])
bbox_heights.append(bbox[3])
# 如果height或者width小于阈值,把annotation信息保存到一个txt文件
if bbox[2] < small_w or bbox[3] < small_h:
with open('small.txt', 'a') as f:
f.write(str(annotation) + '\n')
return bbox_widths, bbox_heights
def plot_distribution(widths, heights):
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.hist(widths, bins=40, color='blue', edgecolor='black')
plt.title('Distribution of BBox Widths')
plt.xlabel('Width')
plt.ylabel('Count')
plt.subplot(1, 2, 2)
plt.hist(heights, bins=40, color='blue', edgecolor='black')
plt.title('Distribution of BBox Heights')
plt.xlabel('Height')
plt.ylabel('Count')
plt.tight_layout()
plt.show()
# 绘制长在0-100之间的bbox的分布
def plot_distribution_small(widths, heights):
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.hist(widths, bins=20, color='blue', edgecolor='black', range=(0, 100))
plt.title('Distribution of BBox Widths')
plt.xlabel('Width')
# x轴的刻度
plt.xticks(range(0, 100, 5))
plt.ylabel('Count')
plt.subplot(1, 2, 2)
plt.hist(heights, bins=20, color='blue', edgecolor='black', range=(0, 100))
plt.title('Distribution of BBox Heights')
plt.xlabel('Height')
plt.xticks(range(0, 100, 5))
plt.ylabel('Count')
plt.tight_layout()
plt.show()
def main():
json_file = 'RSOD/train.json'
small_h = 5
small_w = 5
bbox_widths, bbox_heights = bbox_distribution(json_file, small_h, small_w)
plot_distribution(bbox_widths, bbox_heights)
plot_distribution_small(bbox_widths, bbox_heights)
if __name__ == "__main__":
main()