需求:我想等频分箱,就是比如一个score有不同的分数,先按照分数升序排列,排列后按照人数的分位数进行分箱,最终实现下图效果
数据下载链接网址https://bbs.pinggu.org/forum.php?mod=viewthread&tid=9485342&extra=
解答:#------------------------------------------------------------
#导入数据
#--------------------------------------------------------------
import pandas as pd
import numpy as np
data1=pd.read_excel("D:\\forver.xlsx")
#计算分位数 0.2。。。,设定分箱区间
qq=data1["score1"].quantile([0.2,0.4,0.6,0.8])
min1=data1["score1"].min()
max1=data1["score1"].max()
intervel=[0]+list(qq)+[max1]
#对变量进行排序
data1.sort_values(by="score1",ascending=True,inplace=True)
#对相关变量进行分箱
data1["score1分箱l"]=pd.cut(data1.score1,intervel)
data1["score1分箱"]=pd.cut(data1.score1,intervel,labels=[1,2,3,4,5])
#---------------------------------------------------------------
# 准备画图用数据
#-----------------------------------------------------------------
#统计各箱包含的样本个数
#计算各箱的bad%
#说明target取值为1则为bad,0为good
score1各箱人数=data1["score1分箱l"].value_counts()
score1各箱人数.sort_index(inplace=True)
score1badcount=data1[data1["target"]==1]["score1分箱l"].value_counts()
score1goodcount=data1[data1["target"]==0]["score1分箱l"].value_counts()
#将几个指标按照索引进行横向合并
picdata=pd.concat([score1各箱人数,score1badcount,score1goodcount],axis=1)
picdata.columns=["score1各箱人数","score1badcount","score1goodcount"]
#计算各箱的bad%
picdata["badper"]=picdata["score1badcount"]/picdata["score1各箱人数"]
#
#---------------------------------------------------------------
# 画图
#-------------------------------------------------------------------
xticklabel=[]
for i in range(len(intervel)-1):
print(i)
tt=str(int(intervel[i]))+"-"+str(int(intervel[i+1]))
print(tt)
xticklabel.append(tt)
import matplotlib.pyplot as plt
#解决中文乱码问题
plt.rcParams['font.sans-serif'] = ['Simhei']
fig,ax1=plt.subplots()
ax1.bar(xticklabel,picdata["score1各箱人数"],label="坏人数",color='r',width=0.6)
ax1.bar(xticklabel,picdata["score1goodcount"],label="好人数",color='b',width=0.6)
ax1.set_ylim([0,300]) # y轴边界
ax1.legend(loc=2)
ax2=ax1.twinx()
ax2.plot(np.arange(5),picdata["badper"],'r',label="bad百分比")
ax2.set_ylim([0,1]) # y轴边界
ax2.legend(loc=1)
ax1.set_xticklabels(xticklabel,fontsize=20,rotation=45)
fig.suptitle(u'score1的情况',fontsize=15)