一、初始化
SELECT
ROW_NUMBER() OVER(ORDER BY xxxx_code) AS xxxx_key -- 代理键值
,t.*
,CAST(CURRENT_TIMESTAMP() AS STRING) AS au_last_update_time -- 更新时间
,'1' AS au_active_flag -- 是否有效
,'1970-01-01' AS au_start_date -- 有效开始时间
,'9999-12-31' AS au_end_date -- 有效结束时间
FROM 源表 t;
二、增量
1.源表没有lastmodifyrecordtime情况,需要每个字段逐一比对
中间表:
将昨日全量数据插入到中间表
INSERT OVERWIRTE TABLE temp.temp_dim_channel_01
增量变更:
WITH modify_data_temp AS( --查询原表有修改或新增的数据
SELECT t1.*
FROM temp.temp_dim_channel_01 t1
LEFT JOIN dim.dim_channel_zi t2
ON
MD5(CONCAT_WS(','
,t1.channel_code
,t1.channel_name
,t1.channel_level_code
,t1.channel_level_name
,t1.channel_code_L1
,t1.channel_name_L1
,t1.channel_code_L2
,t1.channel_name_L2
,t1.channel_code_L3
,t1.channel_name_L3
,t1.channel_code_L4
,t1.channel_name_L4
,t1.channel_code_L5
,t1.channel_name_L5)) =
MD5(CONCAT_WS(','
,t2.channel_code
,t2.channel_name
,t2.channel_level_code
,t2.channel_level_name
,t2.channel_code_L1
,t2.channel_name_L1
,t2.channel_code_L2
,t2.channel_name_L2
,t2.channel_code_L3
,t2.channel_name_L3
,t2.channel_code_L4
,t2.channel_name_L4
,t2.channel_code_L5
,t2.channel_name_L5))
AND t2.au_active_flag='1'
WHERE t2.channel_code IS NULL)
INSERT OVERWRITE TABLE dim.dim_channel_zi
SELECT
t1.channel_key -- 代理键值
,t1.channel_code -- 渠道编码
,t1.channel_name -- 渠道名称
,t1.channel_level_code -- 渠道层级代码
,t1.channel_level_name -- 渠道层级名称
,t1.channel_code_L1 -- 一级渠道代码
,t1.channel_name_L1 -- 一级渠道名称
,t1.channel_code_L2 -- 二级渠道代码
,t1.channel_name_L2 -- 二级渠道名称
,t1.channel_code_L3 -- 三级渠道代码
,t1.channel_name_L3 -- 三级渠道名称
,t1.channel_code_L4 -- 四级渠道代码
,t1.channel_name_L4 -- 四级渠道名称
,t1.channel_code_L5 -- 五级渠道代码
,t1.channel_name_L5 -- 五级渠道名称
,t1.bu_flag -- BU标记
,t1.bu_name -- BU名称
,t1.remark_channel -- 渠道备注
,IF(t2.channel_code IS NOT NULL AND t1.au_end_date='9999-12-31',CAST(CURRENT_TIMESTAMP() AS STRING),t1.au_last_update_time) AS au_last_update_time -- 更新时间
,IF(t2.channel_code IS NOT NULL AND t1.au_end_date='9999-12-31','0',t1.au_active_flag) AS au_active_flag -- 是否有效
,t1.au_start_date -- 有效开始时间
,IF(t2.channel_code IS NOT NULL AND t1.au_end_date='9999-12-31',DATE_SUB(CURRENT_DATE(),1),t1.au_end_date) AS au_end_date -- 有效结束时间
FROM dim.dim_channel_zi t1
LEFT JOIN modify_data_temp t2
ON t1.channel_code=t2.channel_code
AND t1.channel_name=t2.channel_name --必须要能唯一匹配到一条数据
UNION ALL
SELECT
ROW_NUMBER() OVER(ORDER BY t1.channel_code)+t2.max_channel_key AS channel_key
,t1.*
,CAST(CURRENT_TIMESTAMP() AS STRING) AS au_last_update_time -- 更新时间
,'1' AS au_active_flag -- 是否有效
,CAST(CURRENT_DATE() AS STRING) AS au_start_date -- 有效开始时间
,'9999-12-31' AS au_end_date -- 有效结束时间
FROM modify_data_temp t1
CROSS JOIN (SELECT COALESCE(MAX(channel_key),0) AS max_channel_key FROM dim.dim_channel_zi) t2;
2.源表有lastmodifyrecordtime情况
增量变更:
在这里插入代码片WITH modify_data_temp AS( --源表必须每个字段都要引用到,否则还是得每个字段逐一比对
SELECT *
FROM 源表
WHERE lastmodifyrecordtime >= DATE_SUB(CURRENT_DATE(),1))
INSERT OVERWRITE TABLE dim.dim_channel_zi
SELECT
t1.channel_key -- 代理键值
,t1.channel_code -- 渠道编码
,t1.channel_name -- 渠道名称
,t1.channel_level_code -- 渠道层级代码
,t1.channel_level_name -- 渠道层级名称
,t1.channel_code_L1 -- 一级渠道代码
,t1.channel_name_L1 -- 一级渠道名称
,t1.channel_code_L2 -- 二级渠道代码
,t1.channel_name_L2 -- 二级渠道名称
,t1.channel_code_L3 -- 三级渠道代码
,t1.channel_name_L3 -- 三级渠道名称
,t1.channel_code_L4 -- 四级渠道代码
,t1.channel_name_L4 -- 四级渠道名称
,t1.channel_code_L5 -- 五级渠道代码
,t1.channel_name_L5 -- 五级渠道名称
,t1.bu_flag -- BU标记
,t1.bu_name -- BU名称
,t1.remark_channel -- 渠道备注
,IF(t2.channel_code IS NOT NULL AND t1.au_end_date='9999-12-31',CAST(CURRENT_TIMESTAMP() AS STRING),t1.au_last_update_time) AS au_last_update_time -- 更新时间
,IF(t2.channel_code IS NOT NULL AND t1.au_end_date='9999-12-31','0',t1.au_active_flag) AS au_active_flag -- 是否有效
,t1.au_start_date -- 有效开始时间
,IF(t2.channel_code IS NOT NULL AND t1.au_end_date='9999-12-31',DATE_SUB(CURRENT_DATE(),1),t1.au_end_date) AS au_end_date -- 有效结束时间
FROM dim.dim_channel_zi t1
LEFT JOIN modify_data_temp t2
ON t1.channel_code=t2.channel_code --必须要能唯一匹配到一条数据,即业务主键要唯一,否则会出现数据发散
UNION ALL
SELECT
ROW_NUMBER() OVER(ORDER BY t1.channel_code)+t2.max_channel_key AS channel_key
,t1.*
,CAST(CURRENT_TIMESTAMP() AS STRING) AS au_last_update_time -- 更新时间
,'1' AS au_active_flag -- 是否有效
,CAST(CURRENT_DATE() AS STRING) AS au_start_date -- 有效开始时间
,'9999-12-31' AS au_end_date -- 有效结束时间
FROM modify_data_temp t1
CROSS JOIN (SELECT COALESCE(MAX(channel_key),0) AS max_channel_key FROM dim.dim_channel_zi) t2;
三、增量删除
如果源表会出现删除数据的情况,就需要考虑增量删除
INSERT OVERWRITE TABLE dim.dim_channel_zi
SELECT
t1.channel_key -- 代理键值
,t1.channel_code -- 渠道编码
,t1.channel_name -- 渠道名称
,t1.channel_level_code -- 渠道层级代码
,t1.channel_level_name -- 渠道层级名称
,t1.channel_code_L1 -- 一级渠道代码
,t1.channel_name_L1 -- 一级渠道名称
,t1.channel_code_L2 -- 二级渠道代码
,t1.channel_name_L2 -- 二级渠道名称
,t1.channel_code_L3 -- 三级渠道代码
,t1.channel_name_L3 -- 三级渠道名称
,t1.channel_code_L4 -- 四级渠道代码
,t1.channel_name_L4 -- 四级渠道名称
,t1.channel_code_L5 -- 五级渠道代码
,t1.channel_name_L5 -- 五级渠道名称
,t1.bu_flag -- BU标记
,t1.bu_name -- BU名称
,t1.remark_channel -- 渠道备注
,IF(t2.channel_code IS NULL AND t1.au_end_date='9999-12-31',CAST(CURRENT_TIMESTAMP() AS STRING),t1.au_last_update_time) AS au_last_update_time -- 更新时间
,IF(t2.channel_code IS NULL AND t1.au_end_date='9999-12-31','0',t1.au_active_flag) AS au_active_flag -- 是否有效
,t1.au_start_date -- 有效开始时间
,IF(t2.channel_code IS NULL AND t1.au_end_date='9999-12-31',DATE_SUB(CURRENT_DATE(),1),t1.au_end_date) AS au_end_date -- 有效结束时间
FROM dim.dim_channel_zi t1
LEFT JOIN temp.temp_dim_channel_01 t2
ON t1.channel_code=t2.channel_code;
小计:
拉链表中对时间的更新逻辑,以下两种写法都可:
1.
--增量变更
,IF(t2.channel_code IS NOT NULL AND t1.au_end_date='9999-12-31',CAST(CURRENT_TIMESTAMP() AS STRING),t1.au_last_update_time) AS au_last_update_time -- 更新时间
,IF(t2.channel_code IS NOT NULL AND t1.au_end_date='9999-12-31','0',t1.au_active_flag) AS au_active_flag -- 是否有效
,t1.au_start_date -- 有效开始时间
,IF(t2.channel_code IS NOT NULL AND t1.au_end_date='9999-12-31',DATE_SUB(CURRENT_DATE(),1),t1.au_end_date) AS au_end_date -- 有效结束时间
--增量删除
,IF(t2.channel_code IS NULL AND t1.au_end_date='9999-12-31',CAST(CURRENT_TIMESTAMP() AS STRING),t1.au_last_update_time) AS au_last_update_time -- 更新时间
,IF(t2.channel_code IS NULL AND t1.au_end_date='9999-12-31','0',t1.au_active_flag) AS au_active_flag -- 是否有效
,t1.au_start_date -- 有效开始时间
,IF(t2.channel_code IS NULL AND t1.au_end_date='9999-12-31',DATE_SUB(CURRENT_DATE(),1),t1.au_end_date) AS au_end_date -- 有效结束时间
--增量变更
,IF(t2.channel_code IS NULL OR t1.au_end_date<'9999-12-31',t1.au_last_update_time,CAST(CURRENT_TIMESTAMP() AS STRING)) AS au_last_update_time -- 更新时间
,IF(t2.channel_code IS NULL OR t1.au_end_date<'9999-12-31',t1.au_active_flag,'0') AS au_active_flag -- 是否有效
,t1.au_start_date -- 有效开始时间
,IF(t2.channel_code IS NULL OR t1.au_end_date<'9999-12-31',t1.au_end_date,DATE_SUB(CURRENT_DATE(),1)) AS au_end_date -- 有效结束时间
--增量删除
,IF(t2.channel_code IS NOT NULL OR t1.au_end_date<'9999-12-31',t1.au_last_update_time,CAST(CURRENT_TIMESTAMP() AS STRING)) AS au_last_update_time -- 更新时间
,IF(t2.channel_code IS NOT NULL OR t1.au_end_date<'9999-12-31',t1.au_active_flag,'0') AS au_active_flag -- 是否有效
,t1.au_start_date -- 有效开始时间
,IF(t2.channel_code IS NOT NULL OR t1.au_end_date<'9999-12-31',t1.au_end_date,DATE_SUB(CURRENT_DATE(),1)) AS au_end_date -- 有效结束时间