Scorecard R

#install.packages("scorecard")
# Traditional Credit Scoring Using Logistic Regression
library(scorecard)


data(germancredit)
# Example I
dt = germancredit[, c("creditability", "credit.amount", "purpose")]
# binning for dt
bins = woebin(dt, y = "creditability")
# converting to woe
dt_woe = woebin_ply(dt, bins=bins)
str(dt_woe)
# converting to bin
dt_bin = woebin_ply(dt, bins=bins, to = 'bin')
str(dt_bin)

# data preparing ------
# load germancredit data
data("germancredit")
# filter variable via missing rate, iv, identical value rate
dt_f = var_filter(germancredit, y="creditability")
# breaking dt into train and test
dt_list = split_df(dt_f, y="creditability", ratio = c(0.6, 0.4), seed = 30)
label_list = lapply(dt_list, function(x) x$creditability)

# woe binning ------
bins = woebin(dt_f, y="creditability")
bins_adj = bins
# woebin_plot(bins)

# converting train and test into woe values
dt_woe_list = lapply(dt_list, function(x) woebin_ply(x, bins_adj))

dt_bin_list = lapply(dt_list, function(x) woebin_ply(x, bins_adj,to = 'bin'))

  
  # glm / selecting variables ------
  m1 = glm( creditability ~ ., family = binomial(), data = dt_woe_list$train)
  # vif(m1, merge_coef = TRUE) # summary(m1)
  # Select a formula-based model by AIC (or by LASSO for large dataset)
  m_step = step(m1, direction="both", trace = FALSE)
  m2 = eval(m_step$call)
  # vif(m2, merge_coef = TRUE) # summary(m2)
  
  # performance ks & roc ------
  ## predicted proability
  pred_list = lapply(dt_woe_list, function(x) predict(m2, x, type='response'))
  ## Adjusting for oversampling (support.sas.com/kb/22/601.html)
  # card_prob_adj = scorecard2(bins_adj, dt=dt_list$train, y='creditability', 
  #                x=sub('_woe$','',names(coef(m2))[-1]), badprob_pop=0.03, return_prob=TRUE)
  
  ## performance
  perf = perf_eva(pred = pred_list, label = label_list)
  # perf_adj = perf_eva(pred = card_prob_adj$prob, label = label_list$train)
  
  # score ------
  ## scorecard
  card = scorecard(bins_adj, m2)
  ## credit score
  score_list = lapply(dt_list, function(x) scorecard_ply(x, card))
  
  score_list2 = lapply(dt_list, function(x) scorecard_ply(x, card, only_total_score=FALSE))
  
    ctrain = cbind(dt_list$train,data.frame(dt_bin_list[1]),data.frame(dt_woe_list[1]),data.frame(score_list2[1]))
    
  write.csv(ctrain,"/home/star/balack_namelist/scorecard/CreditScoreModel-master/train.csv",row.names = F)

write.csv(score_list2[1],"/home/star/balack_namelist/scorecard/CreditScoreModel-master/train.csv",row.names = F)

## psi
perf_psi(score = score_list, label = label_list)

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

AI周红伟

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值