概念
拉链表目的在保存历史记录. 同时平衡时间和空间消耗
案例
CREATE TABLE orders
(
orderid INT,
createtime STRING,
modifiedtime STRING,
status STRING
) row format delimited fields terminated by '\t'
CREATE TABLE ods_orders_inc
(
orderid INT,
createtime STRING,
modifiedtime STRING,
status STRING
) PARTITIONED BY (day STRING)
row format delimited fields terminated by '\t'
CREATE TABLE dw_orders_his
(
orderid INT,
createtime STRING,
modifiedtime STRING,
status STRING,
dw_start_date STRING,
dw_end_date STRING
) row format delimited fields terminated by '\t';
将原始数据导入orders表中
原始数据
1 2016-08-20 2016-08-20 创建
2 2016-08-20 2016-08-20 创建
3 2016-08-20 2016-08-20 创建
1 2016-08-20 2016-08-21 支付
2 2016-08-20 2016-08-21 完成
4 2016-08-21 2016-08-21 创建
1 2016-08-20 2016-08-22 完成
3 2016-08-20 2016-08-22 支付
4 2016-08-21 2016-08-22 支付
5 2016-08-22 2016-08-22 创建
load data local inpath '/data/7.txt' into table orders;
select * from orders;
+-------+----------+------------+------+
|orderid|createtime|modifiedtime|status|
+-------+----------+------------+------+
|1 |2016-08-20|2016-08-20 |创建 |
|2 |2016-08-20|2016-08-20 |创建 |
|3 |2016-08-20|2016-08-20 |创建 |
|1 |2016-08-20|2016-08-21 |支付 |
|2 |2016-08-20|2016-08-21 |完成 |
|4 |2016-08-21|2016-08-21 |创建 |
|1 |2016-08-20|2016-08-22 |完成 |
|3 |2016-08-20|2016-08-22 |支付 |
|4 |2016-08-21|2016-08-22 |支付 |
|5 |2016-08-22|2016-08-22 |创建 |
+-------+----------+------------+------+
首先全量更新,我们先到2016-08-20为止的数据。初始化,先把2016-08-20的数据初始化进去
insert overwrite table ods_orders_inc partition (day = '2016-08-20')
select orderid,
createtime,
modifiedtime,
status
from orders
where createtime < '2016-08-21'
and modifiedtime < '2016-08-21';
select * from ods_orders_inc;
+-------+----------+------------+------+----------+
|orderid|createtime|modifiedtime|status|day |
+-------+----------+------------+------+----------+
|1 |2016-08-20|2016-08-20 |创建 |2016-08-20|
|2 |2016-08-20|2016-08-20 |创建 |2016-08-20|
|3 |2016-08-20|2016-08-20 |创建 |2016-08-20|
+-------+----------+------------+------+----------+
刷到dw中
insert overwrite table dw_orders_his
select orderid,
createtime,
modifiedtime,
status,
createtime as dw_start_date,
'9999-12-31' as dw_end_date
from ods_orders_inc;
select * from dw_orders_his;
+-------+----------+------------+------+-------------+-----------+
|orderid|createtime|modifiedtime|status|dw_start_date|dw_end_date|
+-------+----------+------------+------+-------------+-----------+
|1 |2016-08-20|2016-08-20 |创建 |2016-08-20 |9999012-31 |
|2 |2016-08-20|2016-08-20 |创建 |2016-08-20 |9999012-31 |
|3 |2016-08-20|2016-08-20 |创建 |2016-08-20 |9999012-31 |
+-------+----------+------------+------+-------------+-----------+
剩余需要进行增量更新
insert overwrite table ods_orders_inc partition (day='2016-08-21')
select orderid, createtime, modifiedtime, status
from orders
where (createtime='2016-08-21' and modifiedtime='2016-08-21') or modifiedtime='2016-08-21';
select * from ods_orders_inc where day='2016-08-21';
+-------+----------+------------+------+----------+
|orderid|createtime|modifiedtime|status|day |
+-------+----------+------------+------+----------+
|1 |2016-08-20|2016-08-21 |支付 |2016-08-21|
|2 |2016-08-20|2016-08-21 |完成 |2016-08-21|
|4 |2016-08-21|2016-08-21 |创建 |2016-08-21|
+-------+----------+------------+------+----------+
先放到增量表中,然后进行关联到一张临时表中,在插入到新表中
INSERT overwrite TABLE dw_orders_his
SELECT *
FROM dw_orders_his_tmp;
select * from dw_orders_his;
+-------+----------+------------+------+-------------+-----------+
|orderid|createtime|modifiedtime|status|dw_start_date|dw_end_date|
+-------+----------+------------+------+-------------+-----------+
|1 |2016-08-20|2016-08-20 |创建 |2016-08-20 |9999012-31 |
|1 |2016-08-20|2016-08-20 |创建 |2016-08-20 |9999-12-31 |
|1 |2016-08-20|2016-08-21 |支付 |2016-08-21 |9999-12-31 |
|2 |2016-08-20|2016-08-20 |创建 |2016-08-20 |9999-12-31 |
|2 |2016-08-20|2016-08-20 |创建 |2016-08-20 |9999012-31 |
|2 |2016-08-20|2016-08-21 |完成 |2016-08-21 |9999-12-31 |
|3 |2016-08-20|2016-08-20 |创建 |2016-08-20 |9999-12-31 |
|3 |2016-08-20|2016-08-20 |创建 |2016-08-20 |9999012-31 |
|4 |2016-08-21|2016-08-21 |创建 |2016-08-21 |9999-12-31 |
+-------+----------+------------+------+-------------+-----------+
DROP TABLE IF EXISTS dw_orders_his_tmp;
CREATE TABLE dw_orders_his_tmp AS
SELECT orderid,
createtime,
modifiedtime,
status,
dw_start_date,
dw_end_date
FROM (
SELECT a.orderid,
a.createtime,
a.modifiedtime,
a.status,
a.dw_start_date,
CASE
WHEN b.orderid IS NOT NULL AND a.dw_end_date > '2016-08-21' THEN '2016-08-21'
ELSE a.dw_end_date END AS dw_end_date
FROM dw_orders_his a
left outer join (SELECT * FROM ods_orders_inc WHERE day = '2016-08-21') b
ON (a.orderid = b.orderid)
UNION ALL
SELECT orderid,
createtime,
modifiedtime,
status,
modifiedtime AS dw_start_date,
'9999-12-31' AS dw_end_date
FROM ods_orders_inc
WHERE day = '2016-08-21'
) x
ORDER BY orderid, dw_start_date;
INSERT overwrite TABLE dw_orders_his
SELECT *
FROM dw_orders_his_tmp;
select *
from dw_orders_his;
+-------+----------+------------+------+-------------+-----------+
|orderid|createtime|modifiedtime|status|dw_start_date|dw_end_date|
+-------+----------+------------+------+-------------+-----------+
|1 |2016-08-20|2016-08-21 |支付 |2016-08-20 |2016-08-21 |
|1 |2016-08-20|2016-08-20 |创建 |2016-08-20 |2016-08-21 |
|1 |2016-08-20|2016-08-21 |支付 |2016-08-21 |9999-12-31 |
|2 |2016-08-20|2016-08-21 |完成 |2016-08-20 |2016-08-21 |
|2 |2016-08-20|2016-08-20 |创建 |2016-08-20 |2016-08-21 |
|2 |2016-08-20|2016-08-21 |完成 |2016-08-21 |9999-12-31 |
|3 |2016-08-20|2016-08-20 |创建 |2016-08-20 |9999-12-31 |
|4 |2016-08-21|2016-08-21 |创建 |2016-08-21 |2016-08-21 |
|4 |2016-08-21|2016-08-21 |创建 |2016-08-21 |9999-12-31 |
+-------+----------+------------+------+-------------+-----------+