分类数据可视化 - 统计图
barplot() / countplot() / pointplot()
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
sns.set_style("whitegrid")
sns.set_context("paper")
import warnings
warnings.filterwarnings('ignore')
1. 柱状图 - 置信区间估计
置信区间:样本均值+抽样误差
titanic = sns.load_dataset('titanic')
sns.barplot(x='sex', y='survived', hue='class', data=titanic, palette='hls',
order=['male', 'female'],
capsize=0.05,
saturation=0.8,
errcolor='gray', errwidth=2,
ci='sd'
)
tips = sns.load_dataset('tips')
sns.barplot(x='day', y='total_bill', hue='sex', data=tips, palette='Blues', edgecolor='w')
tips.groupby(['day','sex']).mean()
多柱状图 - 置信区间估计
crashes = sns.load_dataset('car_crashes').sort_values('total', ascending=False)
f, ax = plt.subplots(figsize=(6,15))
sns.set_color_codes('pastel')
sns.barplot(x='total', y='abbrev', data=crashes, label='Total', color='b', edgecolor='w')
sns.set_color_codes('muted')
sns.barplot(x='alcohol', y='abbrev', data=crashes, label='Alcohol-involved', color='b', edgecolor='w')
ax.legend(ncol=2, loc='lower right')
sns.despine(left=True, bottom=True)
计数柱状图:countplot()
sns.countplot(x='class', hue='who', data=titanic, palette='magma')
折线图 - 置信区间估计:pointplot()
sns.pointplot(x='time', y='total_bill', hue='smoker', data=tips,
palette='hls',
dodge=True,
join=True,
makers=['o', 'x'], linestyles=['-', '--']
)
tips.groupby(['time', 'smoker']).mean()['total_bill']