hive拉链表入门

概念

拉链表目的在保存历史记录. 同时平衡时间和空间消耗

案例

CREATE TABLE orders
(
    orderid      INT,
    createtime   STRING,
    modifiedtime STRING,
    status       STRING
) row format delimited fields terminated by '\t'

CREATE TABLE ods_orders_inc
(
    orderid      INT,
    createtime   STRING,
    modifiedtime STRING,
    status       STRING
) PARTITIONED BY (day STRING)
    row format delimited fields terminated by '\t'

CREATE TABLE dw_orders_his
(
    orderid       INT,
    createtime    STRING,
    modifiedtime  STRING,
    status        STRING,
    dw_start_date STRING,
    dw_end_date   STRING
) row format delimited fields terminated by '\t';

将原始数据导入orders表中

原始数据
1	2016-08-20	2016-08-20	创建
2	2016-08-20	2016-08-20	创建
3	2016-08-20	2016-08-20	创建
1	2016-08-20	2016-08-21	支付
2	2016-08-20	2016-08-21	完成
4	2016-08-21	2016-08-21	创建
1	2016-08-20	2016-08-22	完成
3	2016-08-20	2016-08-22	支付
4	2016-08-21	2016-08-22	支付
5	2016-08-22	2016-08-22	创建
load data local inpath '/data/7.txt' into table orders;
select * from orders;

+-------+----------+------------+------+
|orderid|createtime|modifiedtime|status|
+-------+----------+------------+------+
|1      |2016-08-20|2016-08-20  |创建    |
|2      |2016-08-20|2016-08-20  |创建    |
|3      |2016-08-20|2016-08-20  |创建    |
|1      |2016-08-20|2016-08-21  |支付    |
|2      |2016-08-20|2016-08-21  |完成    |
|4      |2016-08-21|2016-08-21  |创建    |
|1      |2016-08-20|2016-08-22  |完成    |
|3      |2016-08-20|2016-08-22  |支付    |
|4      |2016-08-21|2016-08-22  |支付    |
|5      |2016-08-22|2016-08-22  |创建    |
+-------+----------+------------+------+

首先全量更新,我们先到2016-08-20为止的数据。初始化,先把2016-08-20的数据初始化进去

insert overwrite table ods_orders_inc partition (day = '2016-08-20')
select orderid,
       createtime,
       modifiedtime,
       status
from orders
where createtime < '2016-08-21'
  and modifiedtime < '2016-08-21';
select * from ods_orders_inc;
+-------+----------+------------+------+----------+
|orderid|createtime|modifiedtime|status|day       |
+-------+----------+------------+------+----------+
|1      |2016-08-20|2016-08-20  |创建    |2016-08-20|
|2      |2016-08-20|2016-08-20  |创建    |2016-08-20|
|3      |2016-08-20|2016-08-20  |创建    |2016-08-20|
+-------+----------+------------+------+----------+

刷到dw中

insert overwrite table dw_orders_his
select orderid,
       createtime,
       modifiedtime,
       status,
       createtime   as dw_start_date,
       '9999-12-31' as dw_end_date
from ods_orders_inc;
select * from dw_orders_his;

+-------+----------+------------+------+-------------+-----------+
|orderid|createtime|modifiedtime|status|dw_start_date|dw_end_date|
+-------+----------+------------+------+-------------+-----------+
|1      |2016-08-20|2016-08-20  |创建    |2016-08-20   |9999012-31 |
|2      |2016-08-20|2016-08-20  |创建    |2016-08-20   |9999012-31 |
|3      |2016-08-20|2016-08-20  |创建    |2016-08-20   |9999012-31 |
+-------+----------+------------+------+-------------+-----------+

剩余需要进行增量更新

insert overwrite table ods_orders_inc partition (day='2016-08-21')
select orderid, createtime, modifiedtime, status
from orders
where (createtime='2016-08-21' and modifiedtime='2016-08-21') or modifiedtime='2016-08-21';

select * from ods_orders_inc where day='2016-08-21';
+-------+----------+------------+------+----------+
|orderid|createtime|modifiedtime|status|day       |
+-------+----------+------------+------+----------+
|1      |2016-08-20|2016-08-21  |支付    |2016-08-21|
|2      |2016-08-20|2016-08-21  |完成    |2016-08-21|
|4      |2016-08-21|2016-08-21  |创建    |2016-08-21|
+-------+----------+------------+------+----------+

先放到增量表中,然后进行关联到一张临时表中,在插入到新表中



INSERT overwrite TABLE dw_orders_his
SELECT *
FROM dw_orders_his_tmp;
select * from dw_orders_his;
+-------+----------+------------+------+-------------+-----------+
|orderid|createtime|modifiedtime|status|dw_start_date|dw_end_date|
+-------+----------+------------+------+-------------+-----------+
|1      |2016-08-20|2016-08-20  |创建    |2016-08-20   |9999012-31 |
|1      |2016-08-20|2016-08-20  |创建    |2016-08-20   |9999-12-31 |
|1      |2016-08-20|2016-08-21  |支付    |2016-08-21   |9999-12-31 |
|2      |2016-08-20|2016-08-20  |创建    |2016-08-20   |9999-12-31 |
|2      |2016-08-20|2016-08-20  |创建    |2016-08-20   |9999012-31 |
|2      |2016-08-20|2016-08-21  |完成    |2016-08-21   |9999-12-31 |
|3      |2016-08-20|2016-08-20  |创建    |2016-08-20   |9999-12-31 |
|3      |2016-08-20|2016-08-20  |创建    |2016-08-20   |9999012-31 |
|4      |2016-08-21|2016-08-21  |创建    |2016-08-21   |9999-12-31 |
+-------+----------+------------+------+-------------+-----------+

DROP TABLE IF EXISTS dw_orders_his_tmp;
CREATE TABLE dw_orders_his_tmp AS
SELECT orderid,
       createtime,
       modifiedtime,
       status,
       dw_start_date,
       dw_end_date
FROM (
         SELECT a.orderid,
                a.createtime,
                a.modifiedtime,
                a.status,
                a.dw_start_date,
                CASE
                    WHEN b.orderid IS NOT NULL AND a.dw_end_date > '2016-08-21' THEN '2016-08-21'
                    ELSE a.dw_end_date END AS dw_end_date
         FROM dw_orders_his a
                  left outer join (SELECT * FROM ods_orders_inc WHERE day = '2016-08-21') b
                                  ON (a.orderid = b.orderid)
         UNION ALL
         SELECT orderid,
                createtime,
                modifiedtime,
                status,
                modifiedtime AS dw_start_date,
                '9999-12-31' AS dw_end_date
         FROM ods_orders_inc
         WHERE day = '2016-08-21'
     ) x
ORDER BY orderid, dw_start_date;
INSERT overwrite TABLE dw_orders_his
                   SELECT *
                   FROM dw_orders_his_tmp;
select *
from dw_orders_his;
+-------+----------+------------+------+-------------+-----------+
|orderid|createtime|modifiedtime|status|dw_start_date|dw_end_date|
+-------+----------+------------+------+-------------+-----------+
|1      |2016-08-20|2016-08-21  |支付    |2016-08-20   |2016-08-21 |
|1      |2016-08-20|2016-08-20  |创建    |2016-08-20   |2016-08-21 |
|1      |2016-08-20|2016-08-21  |支付    |2016-08-21   |9999-12-31 |
|2      |2016-08-20|2016-08-21  |完成    |2016-08-20   |2016-08-21 |
|2      |2016-08-20|2016-08-20  |创建    |2016-08-20   |2016-08-21 |
|2      |2016-08-20|2016-08-21  |完成    |2016-08-21   |9999-12-31 |
|3      |2016-08-20|2016-08-20  |创建    |2016-08-20   |9999-12-31 |
|4      |2016-08-21|2016-08-21  |创建    |2016-08-21   |2016-08-21 |
|4      |2016-08-21|2016-08-21  |创建    |2016-08-21   |9999-12-31 |
+-------+----------+------------+------+-------------+-----------+

参考

hive中拉链表_weixin_30677617的博客-CSDN博客

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值