python可视化-直方图

diluosixu

已于 2024-08-27 19:37:05 修改

阅读量182

点赞数 7

分类专栏： python 文章标签：信息可视化机器学习人工智能

于 2024-08-27 19:34:44 首次发布

本文链接：https://blog.csdn.net/u012763126/article/details/141610118

版权

python 专栏收录该内容

10 篇文章 0 订阅

订阅专栏

1、加载数据

import pandas as pd
from sklearn.datasets import load_iris
import warnings
 
# 禁用所有警告信息
warnings.filterwarnings('ignore')

# 加载数据
iris = load_iris()
iris

iris.keys()

df = pd.DataFrame(iris.data, columns=iris.feature_names)
df['target'] = iris.target
df.head()

2、基于seaborn的直方图

sns.displot(df['sepal length (cm)'], kind='hist', rug=True)

sns.displot(y=df['sepal length (cm)'])

3、基于matplotlib的直方图

import matplotlib.pyplot as plt

# 初始画布
fig, ax = plt.subplots(figsize=(4, 3))

ax.hist(df['sepal length (cm)'], edgecolor='black')
plt.show()

4、绘制子图对比

# 构造子图
fig, ax = plt.subplots(3, 2, constrained_layout=True, figsize=(12, 16))

sns.histplot(data=df, x='sepal length (cm)', ax=ax[0][0])
sns.histplot(data=df, y='sepal length (cm)', ax=ax[0][1])

# 自定义分箱数量bins
ax_sub = sns.histplot(data=df, x='sepal length (cm)', bins=20, ax=ax[1][0])
ax_sub.set_title('自定义分箱')


# 添加密度曲线kde
ax_sub = sns.histplot(data=df, x='sepal length (cm)', kde=True, ax=ax[1][1])
ax_sub.set_title('添加密度曲线')

# 增加密度曲线和数据分布（小短条）
# rug参数用于绘制出一维数组中数据点实际的分布位置情况，单纯的将记录值在坐标轴上表现出来
ax_sub = sns.histplot(data=df, x="sepal length (cm)", kde=True, ax=ax[2][0])
sns.rugplot(data=df, x="sepal length (cm)", ax=ax_sub.axes)
ax_sub.set_title('添加kde+rug')


# 自定义密度曲线+自定义数据分布（kde+rug）
ax_sub = sns.histplot(data=df, x="sepal length (cm)", stat="density", ax=ax[2][1])
sns.kdeplot(data=df, x="sepal length (cm)", color="g", linewidth=5, alpha=0.3, ax=ax_sub.axes)
sns.rugplot(data=df, x="sepal length (cm)", color="r", linewidth=2, alpha=0.3, height=0.1, ax=ax_sub.axes)
ax_sub.set_title('自定义kde+rug')

plt.show()

# 构造子图
fig, ax = plt.subplots(3, 2, constrained_layout=True, figsize=(12, 12))

sns.histplot(data=df, ax=ax[0][0])
sns.histplot(data=df, shrink=.8, multiple='stack', ax=ax[0][1])
# 重叠
ax_sub = sns.histplot(data=df, x='sepal length (cm)', hue='target', ax=ax[1][0])
ax_sub.set_title('重叠（覆盖）图')
# 堆叠
ax_sub = sns.histplot(data=df, x='sepal length (cm)', hue='target', multiple='stack', ax=ax[1][1])
ax_sub.set_title('堆叠图')

sns.histplot(df, x='sepal length (cm)', hue='target', element='step', ax=ax[2][0])
sns.histplot(df, x='sepal length (cm)', hue='target', element='step', stat="density", ax=ax[2][1])

plt.show()

# 构造子图
fig, ax = plt.subplots(2, 1, constrained_layout=True, figsize=(4, 6))

sns.histplot(df, x='sepal length (cm)', y='petal length (cm)', ax=ax[0])
sns.histplot(df, x='sepal length (cm)', y='petal length (cm)', hue='target', ax=ax[1])

5、一图绘制多个变量

sns.histplot(df, x='sepal length (cm)', label='sepal length (cm)', kde=True)

sns.histplot(df, x='sepal width (cm)', label='sepal width (cm)', kde=True)

plt.legend()
plt.show()

# 直方图+散点图 ：散点图可以观测两个变量的关系，直方图能够更好的展示数据分布

sns.jointplot(x=df["sepal length (cm)"], y=df["sepal width (cm)"], kind='scatter')

diluosixu

关注

7
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫

专栏目录