#########WOE 评分模型 在logistics regression算法在评分卡上的实践

以德国信用数据为例,用logistict regression算法做信用评分卡原理性实现,因此并未考虑feature selection.


第一步:导入必要的库

1
2
3
import  pandas as pd
import  numpy as np
from  sklearn.cross_validation  import  train_test_split


第二步:导入数据

1
2
3
4
5
german  =  pd.read_csv( 'D:/CreditDatasets/german.data' , sep = ' ' , header = None )
german.columns  =  [ 'Status_of_existing_checking_account' 'Duration_in_month' 'Credit_history' , 'Purpose' 'Credit_amount' 'Savings_account' 'Present_employment_since' , 'Installment_rate' 'Personal_status_and_sex' 'Other_debtors' 'Present_residence_since' , 'Property' 'Age' 'Other_installment_plans' 'Housing' 'Number_of_existing_credits' , 'Job' 'Number_of_people' 'Telephone' 'foreign_worker' 'default' ]
Grp  =  german.groupby( 'default' )
total_good  =  Grp.size()[ 1 ]
total_bad  =  Grp.size()[ 2 ]


第三步:分别计算名义变量和数值变量的woe值,对取值较少的数值变量也用名义变量woe计算方法实现,其余数值变量均5等分

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
def  CalcWOE(VarName):
     WOE_Map  =  pd.DataFrame()
     Vars  =  np.unique(german[VarName])
     for  in  Vars :
         tmp  =  german[VarName]  = =  v
         grp  =  german[tmp].groupby( 'default' )
         Good  =  grp.size()[ 1 ]
         Bad  =  grp.size()[ 2 ]
         good_ratio  =  float (Good) / total_good
         bad_ratio  =  float (Bad) / total_bad
         WOE  =  np.log(bad_ratio / good_ratio)
         IV  =  (bad_ratio  -  good_ratio) * WOE
         result  =  pd.DataFrame([[VarName, v, WOE, IV]], index = None , columns = [ 'variable' 'class' 'woe' 'iv' ])
         WOE_Map  =  WOE_Map.append(result, ignore_index = True )
     return  WOE_Map
 
# nominal variable woe
status_checking_account_woe  =  CalcWOE( 'Status_of_existing_checking_account' )
Credit_history_woe           =  CalcWOE( 'Credit_history' )
Purpose_woe                  =  CalcWOE( 'Purpose' )
Savings_account_woe          =  CalcWOE( 'Savings_account' )
Present_employment_since_woe =  CalcWOE( 'Present_employment_since' )
Personal_status_and_sex_woe  =  CalcWOE( 'Personal_status_and_sex' )
Other_debtors_woe            =  CalcWOE( 'Other_debtors' )
Property_woe                 =  CalcWOE( 'Property' )
Other_installment_plans_woe  =  CalcWOE( 'Other_installment_plans' )
Housing_woe                  =  CalcWOE( 'Housing' )
Job_woe                      =  CalcWOE( 'Job' )
Telephone_woe                =  CalcWOE( 'Telephone' )
foreign_worker_woe           =  CalcWOE( 'foreign_worker' )
 
# numeric variable woe, no binning
Installment_rate_woe         =  CalcWOE( 'Installment_rate' )
Present_residence_since_woe  =  CalcWOE( 'Present_residence_since' )
Number_of_existing_credits_woe  =  CalcWOE( 'Number_of_existing_credits' )
Number_of_people_woe         =  CalcWOE( 'Number_of_people' )
 
 
def  CalcWOE_bin(VarName,N):
     WOE_Map  =  pd.DataFrame()
     max_value  =  max (german[VarName])
     min_value  =  min (german[VarName])
     bin  =  float (max_value  -  min_value) / N
     for  in  range (N):
         bin_U  =  min_value  +  (i + 1 ) * bin
         bin_L  =  bin_U  -  bin
         if  = =  1 :
             tmp  =  (german[VarName] > =  bin_L) & (german[VarName] < =  bin_U)
             grp  =  german[tmp].groupby( 'default' )
         else :
             tmp  =  (german[VarName] > bin_L) & (german[VarName] < =  bin_U)
             grp  =  german[tmp].groupby( 'default' )
         Good  =  grp.size()[ 1 ]
         Bad  =  grp.size()[ 2 ]
         good_ratio  =  float (Good) / total_good
         bad_ratio  =  float (Bad) / total_bad
         WOE  =  np.log(bad_ratio / good_ratio)
         IV  =  (bad_ratio  -  good_ratio) * WOE
         result  =  pd.DataFrame([[VarName, [bin_L, bin_U, WOE], WOE, IV]],
                               index = None , columns = [ 'variable' 'class+woe' 'woe' 'iv' ])
         WOE_Map  =  WOE_Map.append(result, ignore_index = True )
     return  WOE_Map
 
Duration_in_month_woe  =  CalcWOE_bin( 'Duration_in_month' 5 )
Credit_amount_woe      =  CalcWOE_bin( 'Credit_amount' 5 )
Age_woe                =  CalcWOE_bin( 'Age' 5 )




第四步:用woe值替代原来的值

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
def  ReplaceWOE(VarName, SourceDF, VarWOE):
     dict1  =  dict .fromkeys(VarWOE[ 'class' ])
     =  0
     for  key  in  dict1:
         dict1[key]  =  VarWOE[ 'woe' ][j]
         =  +  1
     SourceDF[VarName]  =  SourceDF[VarName]. map (dict1)
     return  SourceDF
 
german_woe  =  german
temp  =  ReplaceWOE( 'Status_of_existing_checking_account' , german_woe, status_checking_account_woe)
temp1  =  ReplaceWOE( 'Credit_history' , temp, Credit_history_woe)
temp  =  ReplaceWOE( 'Purpose' , temp1, Purpose_woe)
temp1  =  ReplaceWOE( 'Savings_account' , temp, Savings_account_woe)
temp  =  ReplaceWOE( 'Present_employment_since' , temp1, Present_employment_since_woe)
temp1  =  ReplaceWOE( 'Personal_status_and_sex' , temp, Personal_status_and_sex_woe)
temp  =  ReplaceWOE( 'Other_debtors' , temp1, Other_debtors_woe)
temp1  =  ReplaceWOE( 'Property' , temp, Property_woe)
temp  =  ReplaceWOE( 'Other_installment_plans' , temp1, Other_installment_plans_woe)
temp1  =  ReplaceWOE( 'Housing' , temp, Housing_woe)
temp  =  ReplaceWOE( 'Job' , temp1, Job_woe)
temp1  =  ReplaceWOE( 'Telephone' , temp, Telephone_woe)
temp  =  ReplaceWOE( 'foreign_worker' , temp1, foreign_worker_woe)
 
temp1  =  ReplaceWOE( 'Installment_rate' , temp, Installment_rate_woe)
temp  =  ReplaceWOE( 'Present_residence_since' , temp1, Present_residence_since_woe)
temp1  =  ReplaceWOE( 'Number_of_existing_credits' , temp, Number_of_existing_credits_woe)
temp  =  ReplaceWOE( 'Number_of_people' , temp1, Number_of_people_woe)
 
def  ReplaceWOE_bin(VarName, SourceDF, VarWOE):
     items  =  np.unique(SourceDF[VarName])
     =  min (SourceDF[VarName])
     dict2  =  {}
     for  it  in  items:
         if  it  = =  m:
             dict2[it]  =  VarWOE[ 'class+woe' ][ 0 ][ 2 ]
         else :
             for  l, u, w  in  VarWOE[ 'class+woe' ]:
                 if  (it > l) & (it < =  u):
                     dict2[it]  =  w
     SourceDF[VarName]  =  SourceDF[VarName]. map (dict2)
     return  SourceDF
 
temp1  =  ReplaceWOE_bin( 'Duration_in_month' , temp, Duration_in_month_woe)
temp  =  ReplaceWOE_bin( 'Credit_amount' , temp1, Credit_amount_woe)
temp1  =  ReplaceWOE_bin( 'Age' , temp, Age_woe)



第五步:将数据集拆分为训练集和测试集

1
2
3
=  temp1[ list (temp1.columns)[: - 1 ]]
=  temp1[ 'default' -  1
X_train, X_test, y_train, y_test  =  train_test_split(X, y, test_size = 0.1 , random_state = 0 )


第六步:在训练集上应用logistic regression算法

1
2
3
4
from  sklearn.linear_model.logistic  import  LogisticRegression
classifier  =  LogisticRegression()
classifier.fit(X_train, y_train)
predictions  =  classifier.predict(X_test)


第七步:评估模型分类精度

1
2
3
4
5
6
from  sklearn.metrics  import  accuracy_score
# print 'Accuracy:', accuracy_score(y_test, predictions)
 
from  sklearn.cross_validation  import  cross_val_score
scores  =  cross_val_score(classifier, X_train, y_train, cv = 5 )
# print np.mean(scores), scores


第八步:创建评分卡

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# score = A - B*log(theta)
# P0 = A - B*log(theta0), P0 + PDO = A - B*log(2*theta0)
P0  =  600
PDO  =  20
theta0  =  1.0 / 60
=  PDO / np.log( 2 )
=  P0  +  B * np.log(theta0)
coef  =  classifier.coef_
beta0  =  classifier.intercept_
 
status_checking_account_woe[ 'score' =  (A  -  B * beta0) / 20  -  B * coef[ 0 ][ 0 ] * status_checking_account_woe[ 'woe' ]
Duration_in_month_woe[ 'score' =  (A  -  B * beta0) / 20  -  B * coef[ 0 ][ 1 ] * Duration_in_month_woe[ 'woe' ]
Credit_history_woe[ 'score' =  (A  -  B * beta0) / 20  -  B * coef[ 0 ][ 2 ] * Credit_history_woe[ 'woe' ]
Purpose_woe[ 'score' =  (A  -  B * beta0) / 20  -  B * coef[ 0 ][ 3 ] * Purpose_woe[ 'woe' ]
Credit_amount_woe[ 'score' =  (A  -  B * beta0) / 20  -  B * coef[ 0 ][ 4 ] * Credit_amount_woe[ 'woe' ]
Savings_account_woe[ 'score' =  (A  -  B * beta0) / 20  -  B * coef[ 0 ][ 5 ] * Savings_account_woe[ 'woe' ]
Present_employment_since_woe[ 'score' =  (A  -  B * beta0) / 20  -  B * coef[ 0 ][ 6 ] * Present_employment_since_woe[ 'woe' ]
Installment_rate_woe[ 'score' =  (A  -  B * beta0) / 20  -  B * coef[ 0 ][ 7 ] * Installment_rate_woe[ 'woe' ]
Personal_status_and_sex_woe[ 'score' =  (A  -  B * beta0) / 20  -  B * coef[ 0 ][ 8 ] * Personal_status_and_sex_woe[ 'woe' ]
Other_debtors_woe[ 'score' =  (A  -  B * beta0) / 20  -  B * coef[ 0 ][ 9 ] * Other_debtors_woe[ 'woe' ]
Present_residence_since_woe[ 'score' =  (A  -  B * beta0) / 20  -  B * coef[ 0 ][ 10 ] * Present_residence_since_woe[ 'woe' ]
Property_woe[ 'score' =  (A  -  B * beta0) / 20  -  B * coef[ 0 ][ 11 ] * Property_woe[ 'woe' ]
Age_woe[ 'score' =  (A  -  B * beta0) / 20  -  B * coef[ 0 ][ 12 ] * Age_woe[ 'woe' ]
Other_installment_plans_woe[ 'score' =  (A  -  B * beta0) / 20  -  B * coef[ 0 ][ 13 ] * Other_installment_plans_woe[ 'woe' ]
Housing_woe[ 'score' =  (A  -  B * beta0) / 20  -  B * coef[ 0 ][ 14 ] * Housing_woe[ 'woe' ]
Number_of_existing_credits_woe[ 'score' =  (A  -  B * beta0) / 20  -  B * coef[ 0 ][ 15 ] * Number_of_existing_credits_woe[ 'woe' ]
Job_woe[ 'score' =  (A  -  B * beta0) / 20  -  B * coef[ 0 ][ 16 ] * Job_woe[ 'woe' ]
Number_of_people_woe[ 'score' =  (A  -  B * beta0) / 20  -  B * coef[ 0 ][ 17 ] * Number_of_people_woe[ 'woe' ]
Telephone_woe[ 'score' =  (A  -  B * beta0) / 20  -  B * coef[ 0 ][ 18 ] * Telephone_woe[ 'woe' ]
foreign_worker_woe[ 'score' =  (A  -  B * beta0) / 20  -  B * coef[ 0 ][ 19 ] * foreign_worker_woe[ 'woe' ]


  • 0
    点赞
  • 7
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值