#卡方分箱代码
def ChiMerge(train_new, feature, chuShiQieXiang, xiangShu):
d1 = pd.DataFrame({'X':train_new[feature], 'Y':train_new.iloc[:,0], feature:pd.qcut(train_new[feature],chuShiQieXiang)})
d2 = d1.groupby(feature,as_index=True)
d3=pd.concat([d2.sum()[['Y']],d2[['Y']].count()-d2.sum()[['Y']]],axis=1)
d3.columns=['bad','good']
d3['index']=d3.index
d3.index = range(len(d3))
while(len(d3)>xiangShu):
X2=[]
for i in range (0,len(d3)-1):
chi2,p,dof,expect=stats.chi2_contingency(d3.iloc[i:i+2,0:2])
X2.append(chi2)
ind=X2.index(min(X2))
chaR=d3.iloc[ind,0:2]+d3.iloc[ind+1,0:2]
charu=pd.DataFrame({'bad':chaR['bad'],'good':chaR['good'],'index':[pd.Interval(d3.iloc[ind]['index'].left,d3.iloc[ind+1]['index'].right)]})
d3.drop(d3.iloc[ind:ind+2,:].index,inplace=True)
heBing=pd.concat([d3,charu])
heBing=heBing.sort_values('index')
heBing.index = range(len(heBing))
d3=heBing
d3.columns=['bad','good',feature]
return d3
卡方分箱
最新推荐文章于 2024-05-23 18:00:05 发布