项目场景:
项目场景:数仓模型表
问题描述:
HiveQL中CASE WHEN带SPLIT切片判断和LEFT OUTER JOIN连用报错。
SELECT
*
FROM
(
SELECT
customer_id
,target_date
,target_qty
,COALESCE(CASE WHEN SPLIT(target_date, '-')[1] IN ('01', '02', '03') THEN 'Q1'
WHEN SPLIT(target_date, '-')[1] IN ('04', '05', '06') THEN 'Q2'
WHEN SPLIT(target_date, '-')[1] IN ('07', '08', '09') THEN 'Q3'
WHEN SPLIT(target_date, '-')[1] IN ('10', '11', '12') THEN 'Q4'
END, '空') AS target_quarter
,SPLIT(target_date, '-')[0] AS target_year
FROM ods.ods_xxx
WHERE pt_d = '${pt_d}'
AND customer_id IS NOT NULL
) t1
LEFT OUTER JOIN
(
SELECT
id
,mdm_code
,name
FROM ods.ods_yyy
WHERE pt_d = '${pt_d}'
) t2
ON t1.customer_id = t2.id
;
原因分析:
只要加上SPLIT切片再用CASE WHEN判断分类就会报错,个人感觉是否为多种情况左连接出错,具体原因未知,求大佬解答。
解决方案:
先LEFT OUTER JOIN处理完数据再做CASE WHEN就不会报错。
SELECT
*
FROM
(
SELECT
customer_code
,provider_name
,target_date
,provider_sale_target
,COALESCE(CASE WHEN SPLIT(target_date, '-')[1] IN ('01', '02', '03') THEN 'Q1'
WHEN SPLIT(target_date, '-')[1] IN ('04', '05', '06') THEN 'Q2'
WHEN SPLIT(target_date, '-')[1] IN ('07', '08', '09') THEN 'Q3'
WHEN SPLIT(target_date, '-')[1] IN ('10', '11', '12') THEN 'Q4'
END, '空') AS target_quarter
,SPLIT(target_date, '-')[0] AS target_year
FROM
(
SELECT
UUID() AS etl_id
,t2.mdm_code AS customer_code
,t2.name AS provider_name
,COALESCE(t1.target_date, '空') AS target_date
,COALESCE(t1.target_qty, '空') AS provider_sale_target
,DATE_FORMAT(CURRENT_TIMESTAMP,'yyyy-MM-dd HH:mm:ss') AS etl_time
FROM
(
SELECT
customer_id
,target_date
,target_qty
FROM ods.ods_xxx
WHERE pt_d = '${pt_d}'
AND customer_id IS NOT NULL
) t1
LEFT OUTER JOIN
(
SELECT
id
,mdm_code
,name
FROM ods.ods_yyy
WHERE pt_d = '${pt_d}'
) t2
ON t1.customer_id = t2.id
)aa
) t3
;