拉链表使用的场景:
- 数据量大,且表中部分字段会更新,比如用户地址、产品描述信息、订单状态等等
- 需要查看某一个时间段的历史快照信息
- 变化比例和频率不是很大
--拉链表实现
--原始数据
CREATE TABLE wedw_tmp.tmp_orders (
orderid INT,
createtime STRING,
modifiedtime STRING,
status STRING
) stored AS textfile;--拉链表
CREATE TABLE wedw_tmp.tmp_orders_dz(
orderid int,
createtime STRING,
modifiedtime STRING,
status STRING,
link_start_date string,
link_end_date string
) stored AS textfile;--更新表
CREATE TABLE wedw_tmp.tmp_orders_update(
orderid INT,
createtime STRING,
modifiedtime STRING,
status STRING
) stored AS textfile;
--插入原始数据
insert overwrite table wedw_tmp.tmp_orders
select 1,"2015-08-18","2015-08-18","创建"
union all
select 2,"2015-08-18","2015-08-18","创建"
union all
select 3,"2015-08-19","2015-08-21","支付"
union all
select 4,"2015-08-19","2015-08-21","完成"
union all
select 5,"2015-08-19","2015-08-20","支付"
union all
select 6,"2015-08-20","2015-08-20","创建"
union all
select 7,"2015-08-20","2015-08-21","支付"--拉链表初始化
insert into wedw_tmp.tmp_orders_dz
select *,createtime,'9999-12-31' from wedw_tmp.tmp_orders--增量数据
insert into wedw_tmp.tmp_orders_update
select 3,"2015-08-19","2015-08-21","支付"
union all
select 4,"2015-08-19","2015-08-21","完成"
union all
select 7,"2015-08-20","2015-08-21","支付"
union all
select 8,"2015-08-21","2015-08-21","创建"--更新拉链表
insert overwrite table wedw_tmp.tmp_orders_dz
select
t1.orderid,
t1.createtime,
t1.modifiedtime,
t1.status,
t1.link_start_date,
case when t1.link_end_date='9999-12-31' and t2.orderid is not null then '2015-08-20'
else t1.link_end_date
end as link_end_date
from wedw_tmp.tmp_orders_dz t1
left join wedw_tmp.tmp_orders_update t2
on t1.orderid = t2.orderid
union all
select
orderid,
createtime,
modifiedtime,
status,
'2015-08-21' as link_start_date,
'9999-12-31' as link_end_date
from wedw_tmp.tmp_orders_update--拉链表回滚,比如在插入2015-08-22的数据后,回滚2015-08-21的数据,使拉链表与2015-08-20的一致,具体操作过程如下
select
orderid,
createtime,
modifiedtime,
status,
link_start_date,
link_end_date
from wedw_tmp.tmp_orders_dz
where link_end_date<'2015-08-20'union all
select
orderid,
createtime,
modifiedtime,
status,
link_start_date,
'9999-12-31'
from wedw_tmp.tmp_orders_dz
where link_end_date='2015-08-20'union all
select
orderid,
createtime,
modifiedtime,
status,
link_start_date,
'9999-12-31'
from wedw_tmp.tmp_orders_dz
where link_start_date<'2020-08-21' and link_end_date>='2015-08-21'