hive拉链表实战


–第一步:
数据库源表orders

create table orders(
    order_id int,
    createtime string,
    modifiedtime string,
    status string
)

20190821开始做拉链表 数据如下
±--------------------------------±----------------------------------±------------------------------------±------------------------------±-+
| orders.order_id | orders.createtime | orders.modifiedtime | orders.status |
±--------------------------------±----------------------------------±------------------------------------±------------------------------±-+
| 1 | 2019-08-18 | 2019-08-18 | 创建 |
| 2 | 2019-08-18 | 2019-08-18 | 创建 |
| 3 | 2019-08-19 | 2019-08-21 | 支付 |
| 4 | 2019-08-19 | 2019-08-21 | 完成 |
| 5 | 2019-08-19 | 2019-08-20 | 支付 |
| 6 | 2019-08-20 | 2019-08-20 | 创建 |
| 7 | 2019-08-20 | 2019-08-21 | 支付 |
| 8 | 2019-08-21 | 2019-08-21 | 创建 |
±--------------------------------±----------------------------------±------------------------------------±------------------------------±-+

– 创建 ods 层增量数据表,按天分区,存放每天的增量数据

create table ods_order_inc(
    order_id int,
    createtime string,
    modifiedtime string,
    status string
)
partitioned by (day string)
row format delimited fields terminated by ',' stored as TEXTFILE;

–进行全量的初始化(这个sql只执行一次)

INSERT overwrite TABLE ods_order_inc PARTITION (day ="20190821")
SELECT order_id,createtime,modifiedtime,status
FROM orders   --注意:order表在生产环境中应该是数据库中的表,使用工具将数据抽取到ods_order_inc,这里为了简便直接使用了查询
WHERE createtime <= '20190821';

– dw 拉链表建表语句

create table dw_order_his(
    order_id int,
    createtime string,
    modifiedtime string,
    status string,
    dw_start_date string,
    dw_end_date string
)
row format delimited fields terminated by ',' stored as TEXTFILE;
--将数据加入到拉链表
INSERT overwrite TABLE dw_order_his
SELECT order_id,createtime,modifiedtime,status,
createtime AS dw_start_date,
'9999-12-31' AS dw_end_date
FROM ods_order_inc
WHERE day = '20190821';

–第二步:
–将2019-08-22好的数据增量同步到ods_order_inc分区表

INSERT overwrite TABLE ods_order_inc PARTITION (day = '20190822')
SELECT order_id,createtime,modifiedtime,status
FROM orders  
WHERE createtime = '2019-08-22' OR modifiedtime = '2019-08-22';

–例如:数据如下
±----------±------------±--------------±--------±-+
| order_id | createtime | modifiedtime | status |
±----------±------------±--------------±--------±-+
| 1 | 2019-08-18 | 2019-08-22 | 支付 |
| 2 | 2019-08-18 | 2019-08-22 | 完成 |
| 6 | 2019-08-20 | 2019-08-22 | 支付 |
| 8 | 2019-08-21 | 2019-08-22 | 支付 |
| 9 | 2019-08-22 | 2019-08-22 | 创建 |
| 10 | 2019-08-22 | 2019-08-22 | 支付 |
±----------±------------±--------------±--------±-+

----第三步:
–创建dw_orders_his_tmp临时表
–更新sql分为两部分,
–1 第一部分 ,使用拉链表(里面是20190821以前的数据)left join ods_order_inc 20190822 号的数据 关联的上的说明,20190822 更新了20190821号的数据,将这些数据的结束时间改为20190821,数据追加到dw_orders_his_tmp
–2 第二部分,直接将ods_order_inc 中 20190822 中的数据追加到dw_orders_his_tmp,

DROP TABLE IF EXISTS dw_orders_his_tmp;
CREATE TABLE dw_order_his_tmp 
AS
select * from (
SELECT
order_id order_id,
createtime,
modifiedtime,
status,
dw_start_date,
dw_end_date
FROM 
(SELECT
a.order_id,
a.createtime,
a.modifiedtime,
a.status,
a.dw_start_date,
CASE WHEN b.order_id IS NOT NULL AND a.dw_end_date > '2019-08-22' THEN '2019-08-21' ELSE a.dw_end_date END AS dw_end_date
FROM dw_order_his a
left outer join (SELECT * FROM ods_order_inc WHERE day = '20190822') b
ON (a.order_id = b.order_id))
union all
SELECT 
order_id order_id,
createtime,
modifiedtime,
status,
modifiedtime AS dw_start_date,
'9999-12-31' AS dw_end_date
FROM ods_order_inc
WHERE day = '20190822')t ORDER BY order_id,dw_start_date;

----第四步:

–更新拉链表

INSERT overwrite TABLE dw_order_his
SELECT * FROM dw_order_his_tmp;

结果如下
±-----------------------±-------------------------±---------------------------±---------------------±----------------------------±--------------------------±-+
| dw_order_his.order_id | dw_order_his.createtime | dw_order_his.modifiedtime | dw_order_his.status | dw_order_his.dw_start_date | dw_order_his.dw_end_date |
±-----------------------±-------------------------±---------------------------±---------------------±----------------------------±--------------------------±-+
| 1 | 2019-08-18 | 2019-08-18 | 创建 | 2019-08-18 | 2019-08-21 |
| 1 | 2019-08-18 | 2019-08-22 | 支付 | 2019-08-22 | 9999-12-31 |
| 2 | 2019-08-18 | 2019-08-18 | 创建 | 2019-08-18 | 2019-08-21 |
| 2 | 2019-08-18 | 2019-08-22 | 完成 | 2019-08-22 | 9999-12-31 |
| 3 | 2019-08-19 | 2019-08-21 | 支付 | 2019-08-19 | 9999-12-31 |
| 4 | 2019-08-19 | 2019-08-21 | 完成 | 2019-08-19 | 9999-12-31 |
| 5 | 2019-08-19 | 2019-08-20 | 支付 | 2019-08-19 | 9999-12-31 |
| 6 | 2019-08-20 | 2019-08-20 | 创建 | 2019-08-20 | 2019-08-21 |
| 6 | 2019-08-20 | 2019-08-22 | 支付 | 2019-08-22 | 9999-12-31 |
| 7 | 2019-08-20 | 2019-08-21 | 支付 | 2019-08-20 | 9999-12-31 |
| 8 | 2019-08-21 | 2019-08-21 | 创建 | 2019-08-21 | 2019-08-21 |
| 8 | 2019-08-21 | 2019-08-22 | 支付 | 2019-08-22 | 9999-12-31 |
| 9 | 2019-08-22 | 2019-08-22 | 创建 | 2019-08-22 | 9999-12-31 |
| 10 | 2019-08-22 | 2019-08-22 | 支付 | 2019-08-22 | 9999-12-31 |
±-----------------------±-------------------------±---------------------------±---------------------±----------------------------±--------------------------±-+

每天运行第二步到第四步即完成了拉链表的制作

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值