创建hive拉链表

一、初始化

SELECT
 ROW_NUMBER() OVER(ORDER BY xxxx_code)    AS xxxx_key              -- 代理键值
,t.*
,CAST(CURRENT_TIMESTAMP() AS STRING)      AS au_last_update_time   -- 更新时间
,'1'                                      AS au_active_flag        -- 是否有效
,'1970-01-01'                             AS au_start_date         -- 有效开始时间
,'9999-12-31'                             AS au_end_date           -- 有效结束时间
FROM 源表 t;

二、增量

1.源表没有lastmodifyrecordtime情况,需要每个字段逐一比对

中间表:
将昨日全量数据插入到中间表

INSERT OVERWIRTE TABLE temp.temp_dim_channel_01

增量变更:

WITH modify_data_temp AS(             --查询原表有修改或新增的数据
SELECT t1.*
FROM temp.temp_dim_channel_01 t1
LEFT JOIN dim.dim_channel_zi t2
ON  
MD5(CONCAT_WS(','
,t1.channel_code      
,t1.channel_name      
,t1.channel_level_code
,t1.channel_level_name
,t1.channel_code_L1   
,t1.channel_name_L1   
,t1.channel_code_L2   
,t1.channel_name_L2   
,t1.channel_code_L3   
,t1.channel_name_L3   
,t1.channel_code_L4   
,t1.channel_name_L4   
,t1.channel_code_L5   
,t1.channel_name_L5)) =
MD5(CONCAT_WS(','
,t2.channel_code      
,t2.channel_name      
,t2.channel_level_code
,t2.channel_level_name
,t2.channel_code_L1   
,t2.channel_name_L1   
,t2.channel_code_L2   
,t2.channel_name_L2   
,t2.channel_code_L3   
,t2.channel_name_L3   
,t2.channel_code_L4   
,t2.channel_name_L4   
,t2.channel_code_L5   
,t2.channel_name_L5))
AND t2.au_active_flag='1' 
WHERE t2.channel_code IS NULL)
INSERT OVERWRITE TABLE dim.dim_channel_zi
SELECT 
 t1.channel_key           -- 代理键值
,t1.channel_code          -- 渠道编码
,t1.channel_name          -- 渠道名称
,t1.channel_level_code    -- 渠道层级代码
,t1.channel_level_name    -- 渠道层级名称
,t1.channel_code_L1       -- 一级渠道代码
,t1.channel_name_L1       -- 一级渠道名称
,t1.channel_code_L2       -- 二级渠道代码
,t1.channel_name_L2       -- 二级渠道名称
,t1.channel_code_L3       -- 三级渠道代码
,t1.channel_name_L3       -- 三级渠道名称
,t1.channel_code_L4       -- 四级渠道代码
,t1.channel_name_L4       -- 四级渠道名称
,t1.channel_code_L5       -- 五级渠道代码
,t1.channel_name_L5       -- 五级渠道名称
,t1.bu_flag               -- BU标记
,t1.bu_name               -- BU名称
,t1.remark_channel        -- 渠道备注
,IF(t2.channel_code IS NOT NULL AND t1.au_end_date='9999-12-31',CAST(CURRENT_TIMESTAMP() AS STRING),t1.au_last_update_time) AS au_last_update_time  -- 更新时间
,IF(t2.channel_code IS NOT NULL AND t1.au_end_date='9999-12-31','0',t1.au_active_flag) AS au_active_flag       -- 是否有效 
,t1.au_start_date         -- 有效开始时间
,IF(t2.channel_code IS NOT NULL AND t1.au_end_date='9999-12-31',DATE_SUB(CURRENT_DATE(),1),t1.au_end_date) AS au_end_date  -- 有效结束时间
FROM dim.dim_channel_zi t1 
LEFT JOIN modify_data_temp t2
ON  t1.channel_code=t2.channel_code
AND t1.channel_name=t2.channel_name       --必须要能唯一匹配到一条数据
UNION ALL
SELECT
 ROW_NUMBER() OVER(ORDER BY t1.channel_code)+t2.max_channel_key AS channel_key
,t1.*
,CAST(CURRENT_TIMESTAMP() AS STRING)      AS au_last_update_time   -- 更新时间
,'1'                                      AS au_active_flag        -- 是否有效
,CAST(CURRENT_DATE() AS STRING)           AS au_start_date         -- 有效开始时间
,'9999-12-31'                             AS au_end_date           -- 有效结束时间
FROM modify_data_temp t1
CROSS JOIN (SELECT COALESCE(MAX(channel_key),0) AS max_channel_key FROM dim.dim_channel_zi) t2;
2.源表有lastmodifyrecordtime情况

增量变更:

在这里插入代码片WITH modify_data_temp AS(   --源表必须每个字段都要引用到,否则还是得每个字段逐一比对
SELECT *
FROM 源表
WHERE lastmodifyrecordtime >= DATE_SUB(CURRENT_DATE(),1))
INSERT OVERWRITE TABLE dim.dim_channel_zi
SELECT 
 t1.channel_key           -- 代理键值
,t1.channel_code          -- 渠道编码
,t1.channel_name          -- 渠道名称
,t1.channel_level_code    -- 渠道层级代码
,t1.channel_level_name    -- 渠道层级名称
,t1.channel_code_L1       -- 一级渠道代码
,t1.channel_name_L1       -- 一级渠道名称
,t1.channel_code_L2       -- 二级渠道代码
,t1.channel_name_L2       -- 二级渠道名称
,t1.channel_code_L3       -- 三级渠道代码
,t1.channel_name_L3       -- 三级渠道名称
,t1.channel_code_L4       -- 四级渠道代码
,t1.channel_name_L4       -- 四级渠道名称
,t1.channel_code_L5       -- 五级渠道代码
,t1.channel_name_L5       -- 五级渠道名称
,t1.bu_flag               -- BU标记
,t1.bu_name               -- BU名称
,t1.remark_channel        -- 渠道备注
,IF(t2.channel_code IS NOT NULL AND t1.au_end_date='9999-12-31',CAST(CURRENT_TIMESTAMP() AS STRING),t1.au_last_update_time) AS au_last_update_time  -- 更新时间
,IF(t2.channel_code IS NOT NULL AND t1.au_end_date='9999-12-31','0',t1.au_active_flag) AS au_active_flag       -- 是否有效 
,t1.au_start_date         -- 有效开始时间
,IF(t2.channel_code IS NOT NULL AND t1.au_end_date='9999-12-31',DATE_SUB(CURRENT_DATE(),1),t1.au_end_date) AS au_end_date  -- 有效结束时间
FROM dim.dim_channel_zi t1 
LEFT JOIN modify_data_temp t2
ON  t1.channel_code=t2.channel_code    --必须要能唯一匹配到一条数据,即业务主键要唯一,否则会出现数据发散
UNION ALL
SELECT
 ROW_NUMBER() OVER(ORDER BY t1.channel_code)+t2.max_channel_key AS channel_key
,t1.*
,CAST(CURRENT_TIMESTAMP() AS STRING)      AS au_last_update_time   -- 更新时间
,'1'                                      AS au_active_flag        -- 是否有效
,CAST(CURRENT_DATE() AS STRING)           AS au_start_date         -- 有效开始时间
,'9999-12-31'                             AS au_end_date           -- 有效结束时间
FROM modify_data_temp t1
CROSS JOIN (SELECT COALESCE(MAX(channel_key),0) AS max_channel_key FROM dim.dim_channel_zi) t2;

三、增量删除

如果源表会出现删除数据的情况,就需要考虑增量删除

INSERT OVERWRITE TABLE dim.dim_channel_zi
SELECT
     t1.channel_key           -- 代理键值
    ,t1.channel_code          -- 渠道编码
    ,t1.channel_name          -- 渠道名称
    ,t1.channel_level_code    -- 渠道层级代码
    ,t1.channel_level_name    -- 渠道层级名称
    ,t1.channel_code_L1       -- 一级渠道代码
    ,t1.channel_name_L1       -- 一级渠道名称
    ,t1.channel_code_L2       -- 二级渠道代码
    ,t1.channel_name_L2       -- 二级渠道名称
    ,t1.channel_code_L3       -- 三级渠道代码
    ,t1.channel_name_L3       -- 三级渠道名称
    ,t1.channel_code_L4       -- 四级渠道代码
    ,t1.channel_name_L4       -- 四级渠道名称
    ,t1.channel_code_L5       -- 五级渠道代码
    ,t1.channel_name_L5       -- 五级渠道名称
    ,t1.bu_flag               -- BU标记
    ,t1.bu_name               -- BU名称
    ,t1.remark_channel        -- 渠道备注
    ,IF(t2.channel_code IS NULL AND t1.au_end_date='9999-12-31',CAST(CURRENT_TIMESTAMP() AS STRING),t1.au_last_update_time) AS au_last_update_time  -- 更新时间
    ,IF(t2.channel_code IS NULL AND t1.au_end_date='9999-12-31','0',t1.au_active_flag) AS au_active_flag       -- 是否有效 
    ,t1.au_start_date         -- 有效开始时间
    ,IF(t2.channel_code IS NULL AND t1.au_end_date='9999-12-31',DATE_SUB(CURRENT_DATE(),1),t1.au_end_date) AS au_end_date  -- 有效结束时间
FROM dim.dim_channel_zi t1 
LEFT JOIN temp.temp_dim_channel_01 t2   
ON t1.channel_code=t2.channel_code;
小计:

拉链表中对时间的更新逻辑,以下两种写法都可:
1.

--增量变更
,IF(t2.channel_code IS NOT NULL AND t1.au_end_date='9999-12-31',CAST(CURRENT_TIMESTAMP() AS STRING),t1.au_last_update_time) AS au_last_update_time  -- 更新时间
,IF(t2.channel_code IS NOT NULL AND t1.au_end_date='9999-12-31','0',t1.au_active_flag) AS au_active_flag       -- 是否有效 
,t1.au_start_date         -- 有效开始时间
,IF(t2.channel_code IS NOT NULL AND t1.au_end_date='9999-12-31',DATE_SUB(CURRENT_DATE(),1),t1.au_end_date) AS au_end_date  -- 有效结束时间

 --增量删除
,IF(t2.channel_code IS NULL AND t1.au_end_date='9999-12-31',CAST(CURRENT_TIMESTAMP() AS STRING),t1.au_last_update_time) AS au_last_update_time  -- 更新时间
,IF(t2.channel_code IS NULL AND t1.au_end_date='9999-12-31','0',t1.au_active_flag) AS au_active_flag       -- 是否有效 
,t1.au_start_date         -- 有效开始时间
,IF(t2.channel_code IS NULL AND t1.au_end_date='9999-12-31',DATE_SUB(CURRENT_DATE(),1),t1.au_end_date) AS au_end_date  -- 有效结束时间
--增量变更
,IF(t2.channel_code IS NULL OR t1.au_end_date<'9999-12-31',t1.au_last_update_time,CAST(CURRENT_TIMESTAMP() AS STRING)) AS au_last_update_time  -- 更新时间
,IF(t2.channel_code IS NULL OR t1.au_end_date<'9999-12-31',t1.au_active_flag,'0') AS au_active_flag       -- 是否有效 
,t1.au_start_date         -- 有效开始时间
,IF(t2.channel_code IS NULL OR t1.au_end_date<'9999-12-31',t1.au_end_date,DATE_SUB(CURRENT_DATE(),1)) AS au_end_date  -- 有效结束时间

--增量删除
,IF(t2.channel_code IS NOT NULL OR t1.au_end_date<'9999-12-31',t1.au_last_update_time,CAST(CURRENT_TIMESTAMP() AS STRING)) AS au_last_update_time  -- 更新时间
,IF(t2.channel_code IS NOT NULL OR t1.au_end_date<'9999-12-31',t1.au_active_flag,'0') AS au_active_flag       -- 是否有效 
,t1.au_start_date         -- 有效开始时间
,IF(t2.channel_code IS NOT NULL OR t1.au_end_date<'9999-12-31',t1.au_end_date,DATE_SUB(CURRENT_DATE(),1)) AS au_end_date  -- 有效结束时间
  • 0
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值