**
1 sql编写规范性
**
编写原则:有序不杂乱、直观可读性强、不降低执行效率。
- 换行和缩进;(union、case when、占比等串行语句;整体三段式:查询段-表关联段-条件判断段)
- 排列有序可循;(关联表:数据来源/业务流程分块;字段排序同理)
- 勤写注释;(表名comment; 字段码值 ;注意事项说明;判断逻辑–复杂判断逻辑注释,便于理解,尤其特殊非通用性条件,如样本选择和时间段注释,方便溯源)
- 表命名避免使用a b c或 t1 t2 t3之类,避免混淆;
- 字段带表名,便于核对数据源;
- 逗号写在前,增加可读性,便于查错;
- 尽量避免使用in,尤其客户id。
【例1】信贷业务多表关联编写举例:
create table detail as
select
DATE_FORMAT(sqzhb.APPL_TIME,'%Y-%m') as 申请月份
,dkjg.PRODUCT_CODE as 产品小类
,case when pjmx.SCORE is null then '未评分'
when pjmx.SCORE<=20 then ' (0,20]' when pjmx.SCORE<=40 then ' (20,40]' when pjmx.SCORE<=60 then ' (40,60]'
when pjmx.SCORE<=80 then ' (60,80]' when pjmx.SCORE<=90 then ' (80,90]' else '(90,100]' end as 评分区间
-- 1、申请审批情况
from ( select * from (select a.* ,@num2 := IF(@str2 = APPL_CERTIFICATE_NUMBER ,@num2 + 1, 1) as rn
from LM_LOAN_APPLICATION_INFO a ,(SELECT @str2 := '', @num2 := 0) b
order by APPL_CERTIFICATE_NUMBER ,APPL_TIME desc )t where rn=1
) as sqzhb -- 【1-申请主表:LM_LOAN_APPLICATION_INFO】申请客户去重,ROW_NUMBER() OVER()在此mysql版本不支持
left join LM_LOAN_APPLICATION as sqzb on sqzhb.req_id = sqzb.original_appl_id -- 【2-申请子表:LM_LOAN_APPLICATION】
left join lm_application_result as dkjg on sqzb.req_id = dkjg.req_id -- 【3-贷款结果表:lm_application_result】
left join lm_application_audit_result as dkjjsp on sqzb.business_id = dkjjsp.business_id and sqzb.PRODUCT_CODE=dkjjsp.PRODUCT_CODE -- 【4-最终进件表:lm_application_audit_result】
-- 2、签约情况
left join lm_loan_apply_report_list as cont on dkjg.business_id = cont.business_id and dkjg.PRODUCT_CODE = cont.PRODUCT_CODE -- 【5-合同信息表:lm_loan_apply_report_list】
-- 3、支用申请情况
left join (select zyjg.business_id, zysq.APPROVAL_BUSINESS_ID ,dkjjsp.PRODUCT_CODE ,sum(zysq.DISBURSE_AMOUNT) as DISBURSE_AMOUNT
from (select * from lm_disburse_result where DISBURSE_RESULT='Y') zyjg -- 【6-支用结果表:lm_disburse_result】
left join (select * from lm_disburse_application where STATUS='2') as zysq on zysq.DISBURSE_ID = zyjg.DISBURSE_ID --【7-支用申请表:lm_disburse_application 】
left join lm_application_audit_result as dkjjsp on zysq.business_id = dkjjsp.business_id -- 最终进件表
group by zyjg.business_id, zysq.APPROVAL_BUSINESS_ID, dkjjsp.PRODUCT_CODE
) as zyxx on dkjg.business_id = zyxx.business_id and dkjg.PRODUCT_CODE = zyxx.PRODUCT_CODE
-- 4、放款情况
left join(
select
loan.cont_no -- as 合同编号
,dkjjsp.business_id
,dkjjsp.PRODUCT_CODE
,max(loan.ovdue_days) as 最长逾期天数
,sum(repayplan.should_pay_amt) as 应还金额
from bdp_cm_inte_co_lon_info as loan -- 【8-借据信息表:bdp_cm_inte_co_lon_info】
left join ( select dubil_no ,sum(repay_amt) as repay_amt
from bdp_cm_inte_co_lon_repay_dtl -- 【9-还款明细表:bdp_cm_inte_co_lon_repay_dtl】
group by dubil_no
) as repay on loan.dubil_no = repay.dubil_no
left join( select dubil_no ,sum(prin_total) as prin_total ,sum(int_total) as int_total ,sum(should_pay_amt) as should_pay_amt
from bdp_cm_inte_co_lon_repay_plan -- 【10-还款计划表:bdp_cm_inte_co_lon_repay_plan】
group by dubil_no
) as repayplan on loan.dubil_no = repayplan.dubil_no
left join (select distinct business_id , APPROVAL_BUSINESS_ID from lm_disburse_application where STATUS='2'
) as zysq on loan.cont_no = zysq.APPROVAL_BUSINESS_ID -- 支用申请表
left join lm_application_audit_result as dkjjsp on zysq.business_id = dkjjsp.business_id -- 最终进件审批表
group by cont_no ,dkjjsp.business_id ,dkjjsp.PRODUCT_CODE
) as dubil on dkjg.business_id = dubil.business_id and dkjg.PRODUCT_CODE = dubil.PRODUCT_CODE
-- 5、模型审批结果
left join (select * from lm_model_result
where IS_COMPOSITIVE='1' and STATUS='1' and MODEL_TYPE='GRADE'
) as pjmx on dkjg.req_id = pjmx.req_id -- 评级模型
;
Comment On table detail Is '产品流程明细表'
**
2 sql编写简洁性
**
去繁求简:快速查错减少debug、便于理解和复用。
- 多条件判断,尤其嵌套判断,可结合使用decode()和case when;
- 高频出现语句或字段先定义,避免重复创建;(语句用with as;字段打标签)
- 统计交叉矩阵可使用行转列函数pivot();
【例2】评分/评级&额度逾期率交叉分布(with… as…pivot())
with detail as --到期客户明细
(
select
a.*
,b.overdueday
,case when a.suggestline/10000<10 then '[0,10)'
when a.suggestline/10000<20 then '[10,20)'
when a.suggestline/10000<30 then '[20,30)'
when a.suggestline/10000<40 then '[30,40)'
when a.suggestline/10000<50 then '[40,50)'
when a.suggestline/10000<60 then '[50,60)'
when a.suggestline/10000<70 then '[60,70)'
when a.suggestline/10000<80 then '[70,80)'
when a.suggestline/10000<90 then '[80,90)'
when a.suggestline/10000<100 then '[90,100)'
when a.suggestline/10000<150 then '[100,150)'
when a.suggestline/10000<200 then '[150,200)'
when a.suggestline/10000<250 then '[200,250)'
when a.suggestline/10000<=300 then '[250,300]'
else '>300'
end as edqj
from vm_a_first_table a
left join vm_a_second_table b on a.businessid = b.businessid
where a.suggestline >= 0 and b.closedate <= trunc(sysdate,'mm')-1
)
方法1-评分&额度逾期率交叉分布:
select
pfqj as "模型评分/额度"--到期逾期率=逾期数/到期数
,to_char(count(case yq.edqj when '[0,10)' Then 1 end)/count(case dq.edqj when '[0,10)' Then 1 end)*100,'990.99')||'%' as "[0,10)"
,to_char(count(case yq.edqj when '[10,20)' Then 1 end)/count(case dq.edqj when '[10,20)' Then 1 end)*100,'990.99')||'%' as "[10,20)"
,to_char(count(case yq.edqj when '[20,30)' Then 1 end)/count(case dq.edqj when '[20,30)' Then 1 end)*100,'990.99')||'%' as "[20,30)"
,to_char(count(case yq.edqj when '[30,40)' Then 1 end)/count(case dq.edqj when '[30,40)' Then 1 end)*100,'990.99')||'%' as "[30,40)"
,to_char(count(case yq.edqj when '[40,50)' Then 1 end)/count(case dq.edqj when '[40,50)' Then 1 end)*100,'990.99')||'%' as "[40,50)"
,to_char(count(case yq.edqj when '[50,60)' Then 1 end)/count(case dq.edqj when '[50,60)' Then 1 end)*100,'990.99')||'%' as "[50,60)"
,to_char(count(case yq.edqj when '[60,70)' Then 1 end)/count(case dq.edqj when '[60,70)' Then 1 end)*100,'990.99')||'%' as "[60,70)"
,to_char(count(case yq.edqj when '[70,80)' Then 1 end)/count(case dq.edqj when '[70,80)' Then 1 end)*100,'990.99')||'%' as "[70,80)"
,to_char(count(case yq.edqj when '[80,90)' Then 1 end)/count(case dq.edqj when '[80,90)' Then 1 end)*100,'990.99')||'%' as "[80,90)"
,to_char(count(case yq.edqj when '[90,100)' Then 1 end)/count(case dq.edqj when '[90,100)' Then 1 end)*100,'990.99')||'%' as "[90,100)"
,to_char(count(case yq.edqj when '[100,150)' Then 1 end)/count(case dq.edqj when '[100,150)' Then 1 end)*100,'990.99')||'%' as "[100,150)"
,to_char(count(case yq.edqj when '[150,200)' Then 1 end)/count(case dq.edqj when '[150,200)' Then 1 end)*100,'990.99')||'%' as "[150,200)"
,to_char(count(case yq.edqj when '[200,250)' Then 1 end)/count(case dq.edqj when '[200,250)' Then 1 end)*100,'990.99')||'%' as "[200,250)"
,to_char(count(case yq.edqj when '[250,300]' Then 1 end)/count(case dq.edqj when '[250,300]' Then 1 end)*100,'990.99')||'%' as "[250,300]"
,to_char(count(case yq.edqj when '>300' Then 1 end)/count(case dq.edqj when '>300' Then 1 end)*100,'990.99')||'%' as ">300"
from detail dq --到期客户
group by pfqj
order by pfqj
–> 计算结果如下图:
方法2-额度&评级逾期率交叉分布:
SELECT * FROM (
select
customergrade as 模型评级
,edqj as 额度
,to_char(decode(count(1),0,0,count(case when overdueday >=30 then 1 end)/count(1))*100,'990.99')||'%' as 到期逾期率 --到期逾期率=逾期数/到期数
from detail --到期客户
group by customergrade
order by customergrade
) P
PIVOT --行转列PIVOT,列传行则用UNPIVOT
(
max(到期逾期率) FOR 模型评级 IN ( 'R1','R2','R3','R4','R5')
) T
–> 计算结果如下图:
- 简化合计统计提高执行效率,group by rollup()替换union all;
- 巧用开窗函数over(),用法如下:
1)排序:row_number() / dense_rank() / rank() / percent_rank() / cume_dist() / ntile() over();
2)聚合:sum() / count() / min() / max() / avg() / var() over();
3)行偏移:first() / last() / lag() / lead() over();
4)占比:ratio_to_report() over()
【例3】取最新预警数据(dense_rank() /row_number() over())
select b.* from (
select a.*
,row_number() over(partition by BUSINESS_ID order by UPDATE_DATE desc) rn2
from(
select wn.*
,jg.createtime as jgsj ,ct.startdate ,ct.enddate --合同到期时间
,ct.amount --合同金额
,dense_rank() over(partition by wn.CONTRACT_NUM order by jg.createtime desc) rn1
from warn_contact_info wn
left join second_table_details ct on wn.business_id = ct.business_id
left join tb_resultinformation jg on jg.BUSINESSID = wn.BUSINESS_ID
)a
where a.rn1=1 --合同号取最新业务申请
) b
where b.rn2=1 --取最新预警数据
【例4】指标分箱分布(ratio_to_report() over() … group by rollup())
select
decode(grouping(quotas_value_qj),1,'合计',quotas_value_qj) as 指标分箱 --grouping()=1为生成的合计项
,count(1) as 申请客户数
,to_char(ratio_to_report(count(1)) over(partition by grouping(quotas_value_qj))*100,'990.90') ||'%' as 申请客户数占比 --求纵向占比,合计项为100%
,count(case when comprehensiveadmission=0 then 1 end )as 授信客户数
,to_char(ratio_to_report(count(case when comprehensiveadmission=0 then 1 end )) over(partition by grouping(quotas_value_qj))*100,'990.90') ||'%' as 授信客户数占比
,case when count(1) >0 then
to_char(count(case when comprehensiveadmission=0 then 1 end )/count(1) *100,'990.90') ||'%' else '0.00%' end as 授信率
from vm_a_first_table t
left join ( select a.*
,CASE WHEN to_number(quotas_value)>=300000 THEN '[30,∞)'
WHEN to_number(quotas_value)>=250000 THEN '[25,30)'
WHEN to_number(quotas_value)>=200000 THEN '[20,25)'
WHEN to_number(quotas_value)>=150000 THEN '[15,20)'
WHEN to_number(quotas_value)>=100000 THEN '[10,15)'
WHEN to_number(quotas_value)>=80000 THEN '[8,10)'
WHEN to_number(quotas_value)>=60000 THEN '[6,8)'
WHEN to_number(quotas_value)>=40000 THEN '[4,6)'
WHEN to_number(quotas_value)>=20000 THEN '[2,4)'
WHEN to_number(quotas_value)>0 THEN '(0,2)'
else '[0]' end as quotas_value_qj
from ide_quotas_data_value a
where quotas_code='CPED_QX_HM_ICR02_001'
) t1 on t.req_id=t1.req_id
where t1.quotas_value_qj is not null and t.state=0
group by rollup(quotas_value_qj) --按指标分箱后汇总统计
order by decode(quotas_value_qj,'[0]',1,'(0,2)',2,'[2,4)',3,'[4,6)',4,'[6,8)',5,'[8,10)',6,'[10,15)',7,'[15,20)',8, '[20,25)',9, '[25,30)',10, '[30,∞)',11) --分档强制排序,或分档前加空格等优先级高符号预处理
–> 计算结果如下图:
【例5】准入指标单一触碰率(count()/sum()/lag() over())
select zbcp.* --当月、上月、本季、累计触碰率对比
,lag(单一触碰率,1,0) over(partition by 指标名称 order by 月份) as 上月单一触碰率 --偏移可用作计算同比、环比等
,to_char( sum(单一触碰次数) over(partition by 指标名称 order by 月份 ROWS BETWEEN 2 PRECEDING AND CURRENT ROW)
/ sum(判定数) over(partition by 指标名称 order by 月份 ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) *100,'990.99')||'%' as 当季单一触碰率
,to_char( sum(单一触碰次数) over(partition by 指标名称)
/ sum(判定数) over(partition by 指标名称) *100,'990.99')||'%' as 累计单一触碰率
from( --先统计各月指标触碰率
select
substr(createtime,1,7) as 月份
,RULE_NAME as 指标名称
,count(1)as 判定次数
,count(case when RESULT_VALUE='N' then 1 end) as 触碰次数
,count(case when RESULT_VALUE='N' and sum1=1 then 1 end) as 单一触碰次数
,to_char(count(case when RESULT_VALUE='N' then 1 end)/count(1)*100,'990.99')||'%' as 触碰率
,to_char(count(case when RESULT_VALUE='N' and sum1=1 then 1 end)/count(1)*100,'990.99')||'%' as 单一触碰率
from (
select a.* ,count(case when result_value ='N' then 1 end) over(partition by req_id) as sum1 --客户准入指标触碰数
from IDE_RULE_RESULT a
where RESULT_VALUE is not null and MODEL_CODE = 'ZRMX' -- 准入模型
)t
group by substr(createtime,1,7) ,RULE_NAME
order by substr(createtime,1,7) ,RULE_NAME
)zbcp
where trunc(createtime,'MM') = add_months(trunc(sysdate,'MM'),-1) -- 取当前时间上月
–> 计算结果如下图:
【例6】评级模型效果评估(ks/auc/ar)
select
s2.PRODUCT_NAME as 产品名称
,ROUND(MAX(ks_i),4) AS KS
-- KS = MAX(KS_i) = max(tpr-fpr)
,ROUND(sum(auc_i),4) as AUC
-- AUC = SUM(auc_i)= sum((TPR+PTPR)/2*(FPR-PFPR))
,ROUND(sum(auc_i)*2-1,4) as AR
-- AR = SUM(auc_i)*2-1
from (
select s1.*
,tpr -fpr as ks_i
-- KS(i)= 累计坏客户比-累计好客户比= (累计坏客户/所有坏客户)-(累计好客户/所有好客户)
,(tpr+lag(tpr,1,0)over(partition by PRODUCT_NAME order by customerscore ))/2*(fpr-lag(fpr,1,0)over(partition by PRODUCT_NAME order by customerscore )) as auc_i
-- auc_i = 累计坏客户比*新增好客户比 = TPR*(FPR-PFPR) ~ (TPR+PTPR)/2*(FPR-PFPR)
from
(
select
a.PRODUCT_NAME ,a.business_id ,a.customerscore as customerscore ,a.cjsj
,b.overdueday -- 历史最大本息逾期天数
,b.typelabel -- 客户标签
,count(case when b.typelabel ='bad' then 1 end)over(partition by a.PRODUCT_NAME order by a.customerscore)
/count(case when b.typelabel ='bad' then 1 end)over(partition by a.PRODUCT_NAME ) as tpr -- 坏样本累计占比:TPR真正例率/召回率
,count(case when b.typelabel ='good' then 1 end)over(partition by a.PRODUCT_NAME order by a.customerscore)
/count(case when b.typelabel ='good' then 1 end)over(partition by a.PRODUCT_NAME ) as fpr -- 好样本累计占比:FPR假正例率=1-特异性
from vm_a_first_table a
left join vm_a_fourth_table b on a.business_id = b.business_id
where b.loanamount>0 and b.typelabel in ('bad' ,'good') -- 样本选取:放款好坏样本标签
order by a.PRODUCT_NAME ,a.customerscore -- 客户评分升序排序
)s1
)s2
group by s2.PRODUCT_NAME
order by s2.PRODUCT_NAME
–> 计算结果如下图: