Python AI方向 使用sns分析数据分布并画图

'''麦穗数据集分析'''
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# 读取数据
train_data_label = pd.read_csv('./data/wheatData/train_data.csv')

# 预览数据
train_data_label.head()
# 获取数据集大小
print(f'数据集大小:{len(train_data_label)}')


# 图片中目标框数量分布
counts = train_data_label['image_id'].value_counts()
sns.displot(counts, kde=True, color="g")  # 核密度图
plt.title('数据集框数量')
plt.show()

# 标注框左上角坐标的分布
sns.histplot(data=train_data_label, x='x', y='y', bins=50, pmax=0.9)
plt.xlabel('x')
plt.ylabel('y')
plt.show()

train_data_label['cx'] = train_data_label['x'] + train_data_label['width'] / 2
train_data_label['cy'] = train_data_label['y'] + train_data_label['height'] / 2
# # 标注框中心点坐标的分布
sns.histplot(data=train_data_label, x='cx', y='cy', bins=50, pmax=0.9)
plt.xlabel('cx')
plt.ylabel('cy')
plt.show()
#
# # 标注框宽高比分布
sns.histplot(data=train_data_label, x='w', y='h', bins=50, pmax=0.9)
plt.xlabel('w')
plt.ylabel('h')
plt.show()

# 标注框面积的分布:用来设置anchor的尺度
aeras = train_data_label['w'] * train_data_label['h']
print("目标框最小面积为{}最大面积为{}".format(min(aeras), max(aeras)))
sns.histplot(aeras, bins=50, kde=False)
plt.show()


def show_images(imgs, num_rows=1, num_cols=2):
    fig = plt.figure()
    ax = []
    for i in range(num_rows * num_cols):
        img = imgs[i]
        ax.append(fig.add_subplot(num_rows, num_cols, i + 1))
        plt.imshow(img)
        plt.axis('off')
    return ax


def show_bboxes(ax, bboxes, labels=None, colors=None):
    if colors is None:
        colors = ['r']
    for bbox in bboxes:
        x1, y1, x2, y2 = bbox
        rect = plt.Rectangle((x1, y1), x2 - x1, y2 - y1, fill=False, edgecolor=colors[0], linewidth=1)
        ax.add_patch(rect)
        if labels is not None:
            label = labels[int(bbox[4])]
            plt.text(x1 + 5, y1 + 15, label, color='w', fontsize=16)


# 图片及标注框预览
# 设置坐标轴的个数
num_rows, num_cols = 1, 2
# 获取标注信息
ids = train_data_label['image_id'].unique()[100:100 + num_rows * num_cols]
train_data_dir = './data/wheatData/train'
# 读取图片
imgs = [plt.imread(f'{train_data_dir}/{n}.jpg') for n in ids]
# 图片显示
axes = show_images(imgs, num_rows, num_cols)
# 显示标注框
for ax, id in zip(axes, ids):
    datas = train_data_label[train_data_label['image_id'] == id]
    bboxes = [(d['x'], d['y'], d['x'] + d['w'], d['y'] + d['h']) for _, d in datas.iterrows()]
    show_bboxes(ax, bboxes, labels=None, colors=['r'])
plt.show()

画图结果:

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值