拉链表实现案例

拉链表使用的场景:

  1. 数据量大,且表中部分字段会更新,比如用户地址、产品描述信息、订单状态等等
  2. 需要查看某一个时间段的历史快照信息
  3. 变化比例和频率不是很大

--拉链表实现
--原始数据
CREATE TABLE wedw_tmp.tmp_orders (
    orderid INT,
    createtime STRING,
    modifiedtime STRING,
    status STRING
) stored AS textfile;

--拉链表
CREATE TABLE wedw_tmp.tmp_orders_dz(
    orderid int,
    createtime STRING,
    modifiedtime STRING,
    status STRING,
    link_start_date string,
    link_end_date string
) stored AS textfile;

--更新表
CREATE TABLE wedw_tmp.tmp_orders_update(
    orderid INT,
    createtime STRING,
    modifiedtime STRING,
    status STRING
) stored AS textfile;


--插入原始数据
insert overwrite table  wedw_tmp.tmp_orders
select 1,"2015-08-18","2015-08-18","创建"
union all 
select 2,"2015-08-18","2015-08-18","创建"
union all
select 3,"2015-08-19","2015-08-21","支付"
union all
select 4,"2015-08-19","2015-08-21","完成"
union all 
select 5,"2015-08-19","2015-08-20","支付"
union all 
select 6,"2015-08-20","2015-08-20","创建"
union all 
select 7,"2015-08-20","2015-08-21","支付"

--拉链表初始化
insert into  wedw_tmp.tmp_orders_dz
select *,createtime,'9999-12-31' from  wedw_tmp.tmp_orders 

--增量数据
insert into  wedw_tmp.tmp_orders_update
select 3,"2015-08-19","2015-08-21","支付"
union all 
select 4,"2015-08-19","2015-08-21","完成"
union all 
select 7,"2015-08-20","2015-08-21","支付"
union all 
select 8,"2015-08-21","2015-08-21","创建" 

--更新拉链表
insert overwrite table  wedw_tmp.tmp_orders_dz
select 
    t1.orderid,
    t1.createtime,
    t1.modifiedtime,
    t1.status,
    t1.link_start_date,
    case when t1.link_end_date='9999-12-31' and t2.orderid is not null then '2015-08-20'
     else t1.link_end_date
    end as link_end_date
from  wedw_tmp.tmp_orders_dz t1
left join wedw_tmp.tmp_orders_update t2
on t1.orderid = t2.orderid
union all 
select 
  orderid,
  createtime,
  modifiedtime,
  status,
  '2015-08-21' as link_start_date,
  '9999-12-31' as link_end_date
from wedw_tmp.tmp_orders_update

--拉链表回滚,比如在插入2015-08-22的数据后,回滚2015-08-21的数据,使拉链表与2015-08-20的一致,具体操作过程如下

select 
  orderid,
  createtime,
  modifiedtime,
  status,
  link_start_date,
  link_end_date
from wedw_tmp.tmp_orders_dz
where link_end_date<'2015-08-20'

union all 
select 
  orderid,
  createtime,
  modifiedtime,
  status,
  link_start_date,
  '9999-12-31'
from wedw_tmp.tmp_orders_dz
where link_end_date='2015-08-20'

union all 
select 
  orderid,
  createtime,
  modifiedtime,
  status,
  link_start_date,
  '9999-12-31'
from wedw_tmp.tmp_orders_dz
where link_start_date<'2020-08-21' and  link_end_date>='2015-08-21'
 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值