python 自带的等宽分箱函数pd.cut()
import numpy as np
import pandas as pd
from pandas import Series,DataFrame
score_list=np.random.randint(30,100,size=20)
print(score_list)
bins=[0,59,70,80,100]
score_cat=pd.cut(score_list,bins)
print(score_cat)
自定义实现
def create_bins(lower_bound,width,quantity):
bins=[]
for low in range(lower_bound,lower_bound+quantity*width+1,width):
bins.append((low,low+width))
return bins
bins=create_bins(lower_bound=50,width=4,quantity=10)
print(bins)
def find_bin(value,bins):
'''
查找分箱
'''
for i in range(0,len(bins)):
if bins[i][0] <=value<bins[i][1]:
return i
return -1
from collections import Counter
weight_of_persons=[49,73.4, 69.3, 64.9, 75.6, 74.9, 80.3,
78.6, 84.1, 88.9, 90.3, 83.4, 69.3,
52.4, 58.3, 67.4, 74.0, 89.3, 63.4]
binned_weight=[]
for value in weight_of_persons:
bin_index=find_bin(value,bins)
print(value,bin_index,bins[bin_index])
binned_weight.append(bin_index)
frequencies=Counter(binned_weight)
print(frequencies)
[(50, 54), (54, 58), (58, 62), (62, 66), (66, 70), (70, 74), (74, 78), (78, 82), (82, 86), (86, 90), (90, 94)]
49 -1 (90, 94)
73.4 5 (70, 74)
69.3 4 (66, 70)
64.9 3 (62, 66)
75.6 6 (74, 78)
74.9 6 (74, 78)
80.3 7 (78, 82)
78.6 7 (78, 82)
84.1 8 (82, 86)