信用评分卡的建立No.2
1.信用评分卡的建立
对于ScorecardsData数据集,假设比例即违约与正常比v为1/70 PDD设置为30
则根据
B=PDD/log(2)
A=Z+Blog(v)
有
令c为logistic模型的截距项
coef为回归参数的列表
基础分数为A-Bc
每种特征的各个区间:Bcoefwoe(woe为该数据区间的woe值coef为该特征的回归参数)
2.代码实现
'''
评分卡计算
'''
def scorecard_cal(model,df):
'''假设比例即违约与正常比v为1/70,此时预期分值Z为700,PDD(比率翻倍的分数)为30
B=PDD/log(2)
A=Z+B*log(v)
'''
#计算A、B
B=30/np.log(2)
A=700+B*np.log(1/70)
#计算基础分值A-BP0,参考上文
c=model.intercept_ #输出logistic模型的截距项
coef=model.coef_ #输出回归参数
BaseScore=A-B*c #计算基础分值
x1 = woe_iv(x=df['age'],
y=df['SeriousDlqin2yrs'])
x2 = woe_iv(x=df['NumberOfTime30-59DaysPastDueNotWorse'],
y=df['SeriousDlqin2yrs'])
x3 = woe_iv(x=df['DebtRatio'],
y=df['SeriousDlqin2yrs'])
x4 = woe_iv(x=df['NumberOfOpenCreditLinesAndLoans'],
y=df['SeriousDlqin2yrs'])
x5 = woe_iv(x=df['NumberRealEstateLoansOrLines'],
y=df['SeriousDlqin2yrs'])
x6 = woe_iv(x=df['NumberOfDependents'],
y=df['SeriousDlqin2yrs'])
x7 = woe_iv(x=df['MonthlyIncome_rf'],
y=df['SeriousDlqin2yrs'])
scorex1=get_score(x1,coef[0][6],B)
scorex1.to_excel('dataset/年龄.xls', index=False)
scorex2=get_score(x2,coef[0][5],B)
scorex2.to_excel('dataset/逾期30-59天笔数.xls', index=False)
scorex3=get_score(x3,coef[0][4],B)
scorex3.to_excel('dataset/负债率.xls', index=False)
scorex4=get_score(x4,coef[0][3],B)
scorex4.to_excel('dataset/信贷数量.xls',index=False)
scorex5=get_score(x5,coef[0][2],B)
scorex5.to_excel('dataset/固定资产贷款量.xls', index=False)
scorex6=get_score(x6,coef[0][1],B)
scorex6.to_excel('dataset/家属数量.xls', index=False)
scorex7=get_score(x7,coef[0][0],B)
scorex7.to_excel('dataset/月收入.xls', index=False)
print("年龄\n",scorex1)
print("逾期30-59天笔数\n",scorex2)
print("负债率\n",scorex3)
print("信贷数量\n",scorex4)
print("固定资产贷款量\n",scorex5)
print("家属数量\n",scorex6)
print("月收入\n",scorex7)
print('基础值为:',BaseScore)