# python里面的pandas_在Python中用pandas分区数据帧

import numpy as np

import pandas

df = pandas.DataFrame({"a": np.random.random(100),

"b": np.random.random(100),

"id": np.arange(100)})

# Bin the data frame by "a" with 10 bins...

bins = np.linspace(df.a.min(), df.a.max(), 10)

groups = df.groupby(np.digitize(df.a, bins))

# Get the mean of each bin:

print groups.mean() # Also could do "groups.aggregate(np.mean)"

# Similarly, the median:

print groups.median()

# Apply some arbitrary function to aggregate binned data

print groups.aggregate(lambda x: np.mean(x[x > 0.5]))

groups.mean().b

import numpy as np

import pandas

df = pandas.DataFrame({"a": np.random.random(100),

"b": np.random.random(100) + 10})

# Bin the data frame by "a" with 10 bins...

bins = np.linspace(df.a.min(), df.a.max(), 10)

groups = df.groupby(pandas.cut(df.a, bins))

# Get the mean of b, binned by the values in a

print groups.mean().b

a

(0.00186, 0.111] 10.421839

(0.111, 0.22] 10.427540

(0.22, 0.33] 10.538932

(0.33, 0.439] 10.445085

(0.439, 0.548] 10.313612

(0.548, 0.658] 10.319387

(0.658, 0.767] 10.367444

(0.767, 0.876] 10.469655

(0.876, 0.986] 10.571008

Name: b

• 0
点赞
• 0
收藏
觉得还不错? 一键收藏
• 0
评论
04-20 444
04-17 1981
06-24 1644
11-17 2516
01-16 2万+
08-14 4696
04-07 2576
10-30 1万+
04-10 1299

### “相关推荐”对你有帮助么？

• 非常没帮助
• 没帮助
• 一般
• 有帮助
• 非常有帮助

1.余额是钱包充值的虚拟货币，按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载，可以购买VIP、付费专栏及课程。