def Cal_Psi(score,pre_score,length=10):
import math
labels=['c'+str(i) for i in range(length)]
True_out,bins=pd.qcut(score,q=length,retbins=True,labels=labels)
bins[0] = bins[0]-0.001 #cut左开右闭,之前最小值再分组后组记号为空,这里减0.01划到最左侧区间
Pre_out,bins_=pd.cut(pre_score,bins=bins,retbins=True,labels=labels)
a=pd.DataFrame(pd.Series(True_out).value_counts()).rename(columns={0:'val1'})
a=a.applymap(lambda y : y/len(score)) #计算实际值区间占比
b=pd.DataFrame(pd.Series(Pre_out).value_counts()).rename(columns={0:'val2'})
b=b.applymap(lambda y : y/len(score)) #计算预测值区间占比
re=pd.merge(a,b,left_index=True,right_index=True)
psi=0
for i in range(len(re)):
if re['val1'][i]==0:
re['val1'][i]=0.000001
if re['val2'][i]==0:
re['val2'][i]=0.000001
p=((re['val2'][i]-re['val1'][i])*(math.log((re['val2'][i]/re['val1'][i]))))
psi=psi+p
return psi