生成一个随机数据
data=np.random.randint(1,1000,500)#生成500个1-1000内的整数数据
incomeranges = pd.cut(data,25)#将数据分为25份
#分份的区间可以自定义
incomeranges = pd.cut(data,[0,100,300,500,700,1000])
pd.value_counts(incomeranges)
Out[10]:
(700, 1000] 140
(300, 500] 122
(100, 300] 111
(500, 700] 83
(0, 100] 44
dtype: int64
统计每个区间内数据的分布情况:
pd.value_counts(incomeranges)
(279.88, 319.72] 32
(319.72, 359.56] 29
(240.04, 279.88] 26
(439.24, 479.08] 26
(399.4, 439.24] 25
(797.8, 837.64] 22
(200.2, 240.04] 22
(359.56, 399.4] 21
(40.84, 80.68] 21
(837.64, 877.48] 21
(757.96, 797.8] 21
(678.28, 718.12] 21
(120.52, 160.36] 20
(479.08, 518.92] 19
(598.6, 638.44] 19
(957.16, 997.0] 18
(917.32, 957.16] 17
(160.36, 200.2] 17
(80.68, 120.52] 17
(638.44, 678.28] 16
(518.92, 558.76] 15
(718.12, 757.96] 15
(558.76, 598.6] 14
(877.48, 917.32] 13
(0.004, 40.84] 13
dtype: int64