python cut函数_Python pandas 模块,cut() 实例源码 - 编程字典

def bubble_plot(df, x, y, z_boolean=None, ordered_x_values=None, ordered_y_values=None, bins_x=10,

bins_y=10, fontsize=16, figsize=(10,5), maximal_bubble_size=4000,

normalization_by_all = False, log=False):

"""

:param df: dataframe

:param x: name of first numerical/categorical field (string) (for x-axis)

:param y: name of second numerical/categorical field (string) (for y-axis)

:param z_boolean: name of categorical field with two categories / boolean field (for coloring)

:param ordered_x_values: the values we would like to map from x categorical variable

according to the order we would like to present them

:param ordered_y_values: the values we would like to map from the y categorical variable

according to the order we would like to present them

:param bins_x: the bins for x values if x is numberic

:param bins_y: the bins for y values if y is numberic

:param normalization_by_all: True - shows joint distribution p(x,y), False - shows conditional distribution p(y|x)

:param maximal_bubble_size: if the bubbles are too big or too small this is the parameter you should change!

:param log: whether to apply log on the count (influence the size of the bubbles)

:return: nice bubble plot, bubble size is propotional to the frequency of the bucket :)

"""

plt.figure(figsize=figsize)

x_is_numeric = df[x].dtype in (float, int) and ordered_x_values is None

y_is_numeric = df[y].dtype in (float, int) and ordered_y_values is None

count_table = pd.concat([pd.cut(df[x], bins=bins_x) if x_is_numeric else df[x],

pd.cut(df[y], bins=bins_y) if y_is_numeric else df[y]], axis=1)

count_table = count_table.groupby(x)[y].value_counts().unstack().fillna(0)

ordered_x_values = count_table.index.values if ordered_x_values is None else ordered_x_values

ordered_y_values = count_table.columns if ordered_y_values is None else ordered_y_values

if z_boolean is not None:

count_table_long, xticks, yticks, xticklabels, yticklabels = plot_with_z(df, x, y, z_boolean, bins_x, bins_y, x_is_numeric, y_is_numeric, ordered_x_values, ordered_y_values, maximal_bubble_size,

normalization_by_all=normalization_by_all)

else:

count_table_long, xticks, yticks, xticklabels, yticklabels = plot_without_z(df, x, y, z_boolean, count_table, bins_x, bins_y, x_is_numeric, y_is_numeric, ordered_x_values, ordered_y_values,

normalization_by_all=normalization_by_all, log=log, maximal_bubble_size=maximal_bubble_size )

plt.xticks(xticks, xticklabels,fontsize=fontsize)

plt.yticks(yticks, yticklabels,fontsize=fontsize)

plt.xlabel(x, fontsize=fontsize)

plt.ylabel(y, fontsize=fontsize)

if z_boolean is None:

plt.title("{} vs {} ".format(y,x),fontsize=fontsize+4);

else:

plt.title("{} vs {} and {} (in colors)".format(y,x, z_boolean),fontsize=fontsize+4);

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值