信贷风控策略/模型监控_基于sql优化

**

1 sql编写规范性

**

编写原则:有序不杂乱、直观可读性强、不降低执行效率。

  1. 换行和缩进;(union、case when、占比等串行语句;整体三段式:查询段-表关联段-条件判断段)
  2. 排列有序可循;(关联表:数据来源/业务流程分块;字段排序同理)
  3. 勤写注释;(表名comment; 字段码值 ;注意事项说明;判断逻辑–复杂判断逻辑注释,便于理解,尤其特殊非通用性条件,如样本选择和时间段注释,方便溯源)
  4. 表命名避免使用a b c或 t1 t2 t3之类,避免混淆;
  5. 字段带表名,便于核对数据源;
  6. 逗号写在前,增加可读性,便于查错;
  7. 尽量避免使用in,尤其客户id。

【例1】信贷业务多表关联编写举例:

create table detail as 
select 
DATE_FORMAT(sqzhb.APPL_TIME,'%Y-%m')  as 申请月份
,dkjg.PRODUCT_CODE as 产品小类
,case when  pjmx.SCORE is null then '未评分'  
 when pjmx.SCORE<=20 then '  (0,20]'  when pjmx.SCORE<=40 then ' (20,40]'  when pjmx.SCORE<=60 then ' (40,60]' 
 when pjmx.SCORE<=80 then ' (60,80]'  when pjmx.SCORE<=90 then ' (80,90]' else '(90,100]' end as 评分区间 

-- 1、申请审批情况
from ( select * from (select a.* ,@num2 := IF(@str2 =  APPL_CERTIFICATE_NUMBER ,@num2 + 1, 1) as rn
 from LM_LOAN_APPLICATION_INFO a ,(SELECT @str2 := '', @num2 := 0) b
 order by APPL_CERTIFICATE_NUMBER ,APPL_TIME desc )t where rn=1
) as  sqzhb -- 【1-申请主表:LM_LOAN_APPLICATION_INFO】申请客户去重,ROW_NUMBER() OVER()在此mysql版本不支持
left join LM_LOAN_APPLICATION as sqzb on sqzhb.req_id = sqzb.original_appl_id -- 【2-申请子表:LM_LOAN_APPLICATION】
left join lm_application_result as dkjg on sqzb.req_id = dkjg.req_id -- 【3-贷款结果表:lm_application_result】
left join lm_application_audit_result as  dkjjsp on sqzb.business_id = dkjjsp.business_id and sqzb.PRODUCT_CODE=dkjjsp.PRODUCT_CODE -- 【4-最终进件表:lm_application_audit_result】

-- 2、签约情况
left join lm_loan_apply_report_list as cont on dkjg.business_id = cont.business_id and dkjg.PRODUCT_CODE = cont.PRODUCT_CODE -- 【5-合同信息表:lm_loan_apply_report_list】

-- 3、支用申请情况
left join (select  zyjg.business_id, zysq.APPROVAL_BUSINESS_ID ,dkjjsp.PRODUCT_CODE ,sum(zysq.DISBURSE_AMOUNT) as  DISBURSE_AMOUNT
  from (select * from  lm_disburse_result where DISBURSE_RESULT='Y') zyjg -- 【6-支用结果表:lm_disburse_result】
  left join (select * from  lm_disburse_application where STATUS='2') as zysq  on zysq.DISBURSE_ID = zyjg.DISBURSE_ID --【7-支用申请表:lm_disburse_application 】
  left join lm_application_audit_result as  dkjjsp on zysq.business_id = dkjjsp.business_id -- 最终进件表
  group by zyjg.business_id, zysq.APPROVAL_BUSINESS_ID, dkjjsp.PRODUCT_CODE
) as zyxx on dkjg.business_id = zyxx.business_id  and dkjg.PRODUCT_CODE = zyxx.PRODUCT_CODE  

-- 4、放款情况
left join( 
select
loan.cont_no	-- as 合同编号
,dkjjsp.business_id 
,dkjjsp.PRODUCT_CODE
,max(loan.ovdue_days) as 最长逾期天数
,sum(repayplan.should_pay_amt) as 应还金额
from  bdp_cm_inte_co_lon_info as loan -- 【8-借据信息表:bdp_cm_inte_co_lon_info】
left join ( select dubil_no ,sum(repay_amt) as	repay_amt
 from bdp_cm_inte_co_lon_repay_dtl    -- 【9-还款明细表:bdp_cm_inte_co_lon_repay_dtl】
 group by dubil_no
 ) as repay  on loan.dubil_no = repay.dubil_no 
left join( select dubil_no ,sum(prin_total) as	prin_total ,sum(int_total) as int_total ,sum(should_pay_amt) as should_pay_amt
 from bdp_cm_inte_co_lon_repay_plan   -- 【10-还款计划表:bdp_cm_inte_co_lon_repay_plan】
 group by dubil_no
 ) as repayplan on loan.dubil_no = repayplan.dubil_no 
left join (select distinct business_id , APPROVAL_BUSINESS_ID from  lm_disburse_application where STATUS='2'
 ) as zysq on loan.cont_no = zysq.APPROVAL_BUSINESS_ID -- 支用申请表
left join lm_application_audit_result as  dkjjsp on zysq.business_id = dkjjsp.business_id -- 最终进件审批表
 group by cont_no ,dkjjsp.business_id ,dkjjsp.PRODUCT_CODE
) as dubil on dkjg.business_id = dubil.business_id and dkjg.PRODUCT_CODE = dubil.PRODUCT_CODE

-- 5、模型审批结果
left join (select *  from lm_model_result 
  where IS_COMPOSITIVE='1' and STATUS='1' and MODEL_TYPE='GRADE' 
) as pjmx on dkjg.req_id = pjmx.req_id  -- 评级模型
;

Comment On table detail Is '产品流程明细表'

**

2 sql编写简洁性

**
去繁求简:快速查错减少debug、便于理解和复用。

  1. 多条件判断,尤其嵌套判断,可结合使用decode()和case when;
  2. 高频出现语句或字段先定义,避免重复创建;(语句用with as;字段打标签)
  3. 统计交叉矩阵可使用行转列函数pivot();

【例2】评分/评级&额度逾期率交叉分布(with… as…pivot())

with detail as --到期客户明细
(
select 
a.*
,b.overdueday
,case when a.suggestline/10000<10 then '[0,10)'
     when a.suggestline/10000<20  then '[10,20)'
     when a.suggestline/10000<30  then '[20,30)'
     when a.suggestline/10000<40  then '[30,40)'
     when a.suggestline/10000<50  then '[40,50)'
     when a.suggestline/10000<60  then '[50,60)'
     when a.suggestline/10000<70  then '[60,70)'
     when a.suggestline/10000<80  then '[70,80)'
     when a.suggestline/10000<90  then '[80,90)'
     when a.suggestline/10000<100 then '[90,100)'
     when a.suggestline/10000<150 then '[100,150)'
     when a.suggestline/10000<200 then '[150,200)'
     when a.suggestline/10000<250 then '[200,250)'
     when a.suggestline/10000<=300 then '[250,300]'
     else '>300'
 end as edqj
from vm_a_first_table a
left join vm_a_second_table b on a.businessid  = b.businessid  
where a.suggestline >= 0 and b.closedate <= trunc(sysdate,'mm')-1 
) 

方法1-评分&额度逾期率交叉分布:

select
pfqj as "模型评分/额度"--到期逾期率=逾期数/到期数
,to_char(count(case yq.edqj when '[0,10)'    Then 1 end)/count(case dq.edqj when '[0,10)'    Then 1 end)*100,'990.99')||'%'  as "[0,10)"
,to_char(count(case yq.edqj when '[10,20)'   Then 1 end)/count(case dq.edqj when '[10,20)'   Then 1 end)*100,'990.99')||'%'  as "[10,20)"
,to_char(count(case yq.edqj when '[20,30)'   Then 1 end)/count(case dq.edqj when '[20,30)'   Then 1 end)*100,'990.99')||'%'  as "[20,30)"
,to_char(count(case yq.edqj when '[30,40)'   Then 1 end)/count(case dq.edqj when '[30,40)'   Then 1 end)*100,'990.99')||'%'  as "[30,40)"
,to_char(count(case yq.edqj when '[40,50)'   Then 1 end)/count(case dq.edqj when '[40,50)'   Then 1 end)*100,'990.99')||'%'  as "[40,50)"
,to_char(count(case yq.edqj when '[50,60)'   Then 1 end)/count(case dq.edqj when '[50,60)'   Then 1 end)*100,'990.99')||'%'  as "[50,60)"
,to_char(count(case yq.edqj when '[60,70)'   Then 1 end)/count(case dq.edqj when '[60,70)'   Then 1 end)*100,'990.99')||'%'  as "[60,70)"
,to_char(count(case yq.edqj when '[70,80)'   Then 1 end)/count(case dq.edqj when '[70,80)'   Then 1 end)*100,'990.99')||'%'  as "[70,80)"
,to_char(count(case yq.edqj when '[80,90)'   Then 1 end)/count(case dq.edqj when '[80,90)'   Then 1 end)*100,'990.99')||'%'  as "[80,90)"
,to_char(count(case yq.edqj when '[90,100)'  Then 1 end)/count(case dq.edqj when '[90,100)'  Then 1 end)*100,'990.99')||'%'  as "[90,100)"
,to_char(count(case yq.edqj when '[100,150)' Then 1 end)/count(case dq.edqj when '[100,150)' Then 1 end)*100,'990.99')||'%'  as "[100,150)"
,to_char(count(case yq.edqj when '[150,200)' Then 1 end)/count(case dq.edqj when '[150,200)' Then 1 end)*100,'990.99')||'%'  as "[150,200)"
,to_char(count(case yq.edqj when '[200,250)' Then 1 end)/count(case dq.edqj when '[200,250)' Then 1 end)*100,'990.99')||'%'  as "[200,250)"
,to_char(count(case yq.edqj when '[250,300]' Then 1 end)/count(case dq.edqj when '[250,300]' Then 1 end)*100,'990.99')||'%'  as "[250,300]"
,to_char(count(case yq.edqj when '>300' Then 1 end)/count(case dq.edqj when '>300' Then 1 end)*100,'990.99')||'%'  as ">300"

from detail dq --到期客户
group by pfqj
order by pfqj

–> 计算结果如下图:
在这里插入图片描述

方法2-额度&评级逾期率交叉分布:

SELECT *  FROM (
 select
 customergrade as 模型评级
 ,edqj as 额度
 ,to_char(decode(count(1),0,0,count(case when overdueday >=30 then 1 end)/count(1))*100,'990.99')||'%' as 到期逾期率 --到期逾期率=逾期数/到期数
 from detail  --到期客户
 group by customergrade
 order by customergrade
) P
PIVOT --行转列PIVOT,列传行则用UNPIVOT
(
   max(到期逾期率) FOR 模型评级 IN ( 'R1','R2','R3','R4','R5') 
) T

–> 计算结果如下图:
在这里插入图片描述

  1. 简化合计统计提高执行效率,group by rollup()替换union all;
  2. 巧用开窗函数over(),用法如下:

1)排序:row_number() / dense_rank() / rank() / percent_rank() / cume_dist() / ntile() over();
2)聚合:sum() / count() / min() / max() / avg() / var() over();
3)行偏移:first() / last() / lag() / lead() over();
4)占比:ratio_to_report() over()

【例3】取最新预警数据(dense_rank() /row_number() over())

select b.* from (
select a.*
  ,row_number()  over(partition by BUSINESS_ID order by UPDATE_DATE desc) rn2 
  from( 
     select wn.* 
     ,jg.createtime as jgsj ,ct.startdate ,ct.enddate --合同到期时间
     ,ct.amount --合同金额
     ,dense_rank() over(partition by wn.CONTRACT_NUM order by jg.createtime desc) rn1
     from  warn_contact_info wn 
     left join second_table_details ct on wn.business_id = ct.business_id 
     left join tb_resultinformation jg  on jg.BUSINESSID = wn.BUSINESS_ID
     )a 
  where a.rn1=1 --合同号取最新业务申请
  ) b 
  where b.rn2=1 --取最新预警数据 

【例4】指标分箱分布(ratio_to_report() over() … group by rollup())

select 
decode(grouping(quotas_value_qj),1,'合计',quotas_value_qj) as 指标分箱  --grouping()=1为生成的合计项
,count(1) as 申请客户数
,to_char(ratio_to_report(count(1)) over(partition by grouping(quotas_value_qj))*100,'990.90') ||'%' as 申请客户数占比 --求纵向占比,合计项为100%
,count(case when comprehensiveadmission=0 then 1 end )as 授信客户数
,to_char(ratio_to_report(count(case when comprehensiveadmission=0 then 1 end )) over(partition by grouping(quotas_value_qj))*100,'990.90') ||'%'  as 授信客户数占比
,case when count(1) >0 then 
to_char(count(case when comprehensiveadmission=0 then 1 end )/count(1) *100,'990.90') ||'%' else '0.00%' end as 授信率

from vm_a_first_table t 
left join ( select a.*
,CASE WHEN to_number(quotas_value)>=300000 THEN '[30,∞)'
       WHEN to_number(quotas_value)>=250000 THEN '[25,30)'
       WHEN to_number(quotas_value)>=200000 THEN '[20,25)'
       WHEN to_number(quotas_value)>=150000 THEN '[15,20)'
       WHEN to_number(quotas_value)>=100000 THEN '[10,15)'
       WHEN to_number(quotas_value)>=80000 THEN '[8,10)'
       WHEN to_number(quotas_value)>=60000 THEN '[6,8)'
       WHEN to_number(quotas_value)>=40000 THEN '[4,6)'
       WHEN to_number(quotas_value)>=20000 THEN '[2,4)'
       WHEN to_number(quotas_value)>0 THEN '(0,2)'
       else '[0]' end as  quotas_value_qj
from ide_quotas_data_value a
where quotas_code='CPED_QX_HM_ICR02_001'
)  t1  on t.req_id=t1.req_id

where  t1.quotas_value_qj is not null and t.state=0 
group by rollup(quotas_value_qj)  --按指标分箱后汇总统计
order by decode(quotas_value_qj,'[0]',1,'(0,2)',2,'[2,4)',3,'[4,6)',4,'[6,8)',5,'[8,10)',6,'[10,15)',7,'[15,20)',8, '[20,25)',9, '[25,30)',10, '[30,∞)',11) --分档强制排序,或分档前加空格等优先级高符号预处理

–> 计算结果如下图:
在这里插入图片描述

【例5】准入指标单一触碰率(count()/sum()/lag() over())


select zbcp.*  --当月、上月、本季、累计触碰率对比
,lag(单一触碰率,1,0) over(partition by 指标名称 order by 月份) as 上月单一触碰率 --偏移可用作计算同比、环比等
,to_char( sum(单一触碰次数) over(partition by 指标名称 order by 月份 ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) 
 / sum(判定数) over(partition by 指标名称 order by 月份 ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) *100,'990.99')||'%' as 当季单一触碰率 
,to_char( sum(单一触碰次数) over(partition by 指标名称) 
 / sum(判定数) over(partition by 指标名称) *100,'990.99')||'%' as 累计单一触碰率 

from(  --先统计各月指标触碰率

select 
substr(createtime,1,7) as 月份
,RULE_NAME as 指标名称
,count(1)as 判定次数
,count(case when RESULT_VALUE='N' then 1 end) as 触碰次数
,count(case when RESULT_VALUE='N' and sum1=1 then 1 end) as 单一触碰次数
,to_char(count(case when RESULT_VALUE='N' then 1 end)/count(1)*100,'990.99')||'%' as 触碰率
,to_char(count(case when RESULT_VALUE='N' and sum1=1 then 1 end)/count(1)*100,'990.99')||'%' as 单一触碰率 
from (
 select a.* ,count(case when result_value ='N' then 1 end) over(partition by req_id) as  sum1 --客户准入指标触碰数
 from IDE_RULE_RESULT a 
 where RESULT_VALUE is not null and MODEL_CODE = 'ZRMX' -- 准入模型
 )t
group by substr(createtime,1,7) ,RULE_NAME
order by substr(createtime,1,7) ,RULE_NAME

)zbcp

where trunc(createtime,'MM') = add_months(trunc(sysdate,'MM'),-1)  -- 取当前时间上月

–> 计算结果如下图:
在这里插入图片描述

【例6】评级模型效果评估(ks/auc/ar)

select 
s2.PRODUCT_NAME  as 产品名称
,ROUND(MAX(ks_i),4) AS KS
-- KS = MAX(KS_i) = max(tpr-fpr)
,ROUND(sum(auc_i),4) as AUC
-- AUC = SUM(auc_i)= sum((TPR+PTPR)/2*(FPR-PFPR))
,ROUND(sum(auc_i)*2-1,4) as AR
-- AR = SUM(auc_i)*2-1
from (
  select s1.* 
  ,tpr -fpr as ks_i
  -- KS(i)= 累计坏客户比-累计好客户比= (累计坏客户/所有坏客户)-(累计好客户/所有好客户)
  ,(tpr+lag(tpr,1,0)over(partition by PRODUCT_NAME order by customerscore ))/2*(fpr-lag(fpr,1,0)over(partition by PRODUCT_NAME order by customerscore )) as auc_i
  -- auc_i = 累计坏客户比*新增好客户比 = TPR*(FPR-PFPR) ~ (TPR+PTPR)/2*(FPR-PFPR) 
  from 
  (
    select  
    a.PRODUCT_NAME ,a.business_id ,a.customerscore as customerscore ,a.cjsj
    ,b.overdueday -- 历史最大本息逾期天数
    ,b.typelabel -- 客户标签
    ,count(case when b.typelabel ='bad' then 1 end)over(partition by a.PRODUCT_NAME order by a.customerscore) 
      /count(case when b.typelabel ='bad' then 1 end)over(partition by a.PRODUCT_NAME ) as tpr -- 坏样本累计占比:TPR真正例率/召回率
    ,count(case when b.typelabel ='good' then 1 end)over(partition by a.PRODUCT_NAME order by a.customerscore)
      /count(case when b.typelabel ='good' then 1 end)over(partition by a.PRODUCT_NAME ) as fpr -- 好样本累计占比:FPR假正例率=1-特异性
    from vm_a_first_table a 
    left join vm_a_fourth_table b on a.business_id = b.business_id 
    where b.loanamount>0 and b.typelabel in ('bad' ,'good')  -- 样本选取:放款好坏样本标签
    order by a.PRODUCT_NAME ,a.customerscore -- 客户评分升序排序
   )s1 
)s2
group by s2.PRODUCT_NAME 
order by s2.PRODUCT_NAME

–> 计算结果如下图:
在这里插入图片描述

  • 1
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值