有一段时间没来写博了,一直忙我司申请评分卡、催收评分卡的上线工作,那么我们的评分卡上线后,如何对评分卡的效果进行有效监测,监测哪些指标,监测的指标阈值达到多少我们需要对现有评分卡进行调整更新?这是我们在评分卡上线后需要持续性监测、关注的问题,今天就来跟大家分享一下互金行业评分卡监测的常用手段。
1. 模型稳定性
包括评分卡得分分布的PSI(Population Stability Index), 评分卡所有涉及变量的PSI. 模型分数分布稳定性:监测模型的打分结果的分布是否有变化,主要将评分卡上线后的样本RealData与建模时的样本Train_Data比较。使用的统计指标为PSI(Population Stability Index).使用的指标是PSI.
变量稳定性:监测模型的输入变量的分布是否有变化,主要将评分卡上线后的样本RealData与建模时的样本Train_Data比较。使用的指标也是PSI.
PSI 计算步骤: 假设我们要比较样本A与样本B中某一变量Y的分布,首先按照同一标准将Y分为几个区间(通常分为10段),计算样本A和样本B中每个区间的占比。在每个区间段上,将两个样本的各自占比相除再取对数,然后乘以各自占比之差,最后将各个区间段的计算值相加,得到最终PSI.
以“联名贷”产品申请评分卡监测过程为例,代码实现:
realdata<-read.csv("C:/Users/5609/Desktop/每日定时报表/20171023/CacheData_LMD.csv",header = TRUE)
modeldata<-read.csv("D:/sissi/联名贷/联名贷分数_建模样本.csv",header=TRUE)
realdata$申请日期<-as.Date(realdata$time)
modeldata$申请日期<-as.Date(modeldata$申请日期)
vars <- read.table("variable list.txt", sep = "\t")
vars <- as.character(vars[,1])
for (i in vars){
if(is.character(modeldata[,i]) | is.logical(modeldata[,i])){
modeldata[,i] <- as.factor(modeldata[,i])
}
}
modeldata1<-modeldata[,c("申请编号","申请日期",vars,"pred","groups","groups_n")]
realdata1<-realdata[,c("申请编号","申请日期",vars,"final_score","group")]
# 联名贷评分卡分组
breaks_g <- c( 0, 3.67,
4.49,
5.21,
5.99,
6.83,
8.02,
9.59,
12.44,
19.90,
100.00
)
realdata1$groups <- cut(realdata1$final_score, breaks = breaks_g, include.lowest = FALSE, right = TRUE)
realdata1$groups_n<-as.numeric(realdata1$groups)
####建模数据
tab <- summary(modeldata$groups)
write.table(tab, "clipboard", sep = "\t")
t1 <- summary(modeldata$groups)/dim(modeldata)[1]
write.table(t1, "clipboard", sep = "\t", row.names = FALSE, col.names = FALSE)
# 每组样本量 更新至excel
tab <- summary(realdata1$groups)
write.table(tab, "clipboard", sep = "\t")
# 每组占比 更新至excel
t2 <- summary(realdata1$groups)/dim(realdata1)[1]
write.table(t2, "clipboard", sep = "\t", row.names = FALSE, col.names = FALSE)
PSI <- sum((t2-t1)*log(t2/t1))
PSI
# 变量 PSI ----------------------------------------------------------------
vars <- read.table("variable list.txt", sep = "\t")
vars <- as.character(vars[,1])
# 调整变量值 (评分卡对输入变量的值有调整,将调整后的值与建模时的数据做比较)
#loan_query_12MA_level
realdata1$loan_query_12MA_level <- cut(realdata1$loan_query_12MA, breaks = c(0, 1.5,Inf),
include.lowest = TRUE)
realdata1$loan_query_12MA_level <- as.character(realdata1$loan_query_12MA_level)
index <- is.na(realdata1$loan_query_12MA_level)
realdata1[index, "loan_query_12MA_level"] <- "NA"
realdata1$loan_query_12MA_level <- as.factor(realdata1$loan_query_12MA_level)
levels(realdata1$loan_query_12MA_level) <- c( "2_(1.5,Inf]", "1_[0,1.5] & NA","1_[0,1.5] & NA" )
realdata1$loan_query_12MA_level <- as.character(realdata1$loan_query_12MA_level)
# 未结清贷款笔数
realdata1$未结清贷款笔数_level <- cut(realdata1$未结清贷款笔数_level,
breaks = c(0, 5, Inf),
include.lowest = TRUE, right = FALSE)
realdata1$未结清贷款笔数_level <- as.factor(as.character(realdata1$未结清贷款笔数_level))
index <- is.na(realdata1$未结清贷款笔数_level)
realdata1[index, "未结清贷款笔数_level"] <- "[0,5)"
#贷款类别
realdata1$贷款类别 <- as.factor(as.character(realdata1$贷款类别))
levels(realdata1$贷款类别) <- c( "新贷款", "再贷","续贷" )
modeldata1[, "贷款类别"] <- ordered(
modeldata1[, "贷款类别"],
levels=c("新贷款", "再贷", "续贷"),
labels=c('新贷款', '再贷', '续贷')
);
table(modeldata1[, "贷款类别"])
#modeldata1[order(modeldata1[, "贷款类别"]),]
#名下物业数量_所有联名人
index <- is.na(realdata1$名下物业数量_所有联名人)
realdata1[index, "名下物业数量_所有联名人"] <- 0
index <- realdata1$名下物业数量_所有联名人 > 3
realdata1[index, "名下物业数量_所有联名人"] <- 3
#要求贷款期限_level
realdata1$要求贷款期限_level <- cut(realdata1$要求贷款期限, breaks = c(0,18,36),
include.lowest = FALSE, right =