1、准备数据表userods
create table userods(u_name string,u_pwd string,u_register date,u_des string) row format delimited fields terminated by '\t';
2、准备增量表user_inc
create table user_inc(u_name string,u_pwd string,u_register date,u_des string) row format delimited fields terminated by '\t';
3、准备拉链表user_chain
create table user_chain(u_name string,u_pwd string,u_register date,u_des string,u_start_date date,u_end_date date,u_status string) row format delimited fields terminated by '\t';
4、userods数据导入为
lily 123 2020-01-14 A
jhon 1234 2020-01-14 B
sunny 1234 2020-01-14 B
li 1234 2020-01-14 C
5、user_chain拉链表中先导入14号的数据
6、user_inc增量表导入新增数据为:
lily 123 2020-01-15 B
jhon 123456 2020-01-15 B
sunny 1234 2020-01-15 B
li 123 2020-01-15 C
zhang 1236 2020-01-15 A
7、拉链表user_chain更新数据,代码如下:
INSERT overwrite TABLE user_chain
SELECT
*
FROM(
SELECT
a.u_name,
a.u_pwd,
a.u_register,
a.u_des,
a.u_start_date,
CASE
WHEN a.u_end_date = '9999-12-31'
AND a.u_name IS NOT NULL
THEN to_date(FROM_UNIXTIME(UNIX_TIMESTAMP('2020-01-15','yyyy-MM-dd')))
ELSE a.u_end_date END AS u_end_date,
CASE
WHEN a.u_end_date = '9999-12-31'
AND a.u_name IS NOT NULL
THEN "expird"
ELSE "active" END AS u_status
FROM
user_chain a
LEFT JOIN user_inc b
ON b.u_name = a.u_name
UNION ALL
SELECT
c.u_name,
c.u_pwd,
c.u_register,
c.u_des,
to_date(FROM_UNIXTIME(UNIX_TIMESTAMP('2020-01-15','yyyy-MM-dd'))) AS u_start_date,
to_date(FROM_UNIXTIME(UNIX_TIMESTAMP('9999-12-31','yyyy-MM-dd'))) AS u_end_date,
"active" AS u_status
FROM user_inc c)T;
拉链表user_chain的数据为:
lily 123 2020-01-14 A 2020-01-14 2020-01-15 expird
jhon 1234 2020-01-14 B 2020-01-14 2020-01-15 expird
sunny 1234 2020-01-14 B 2020-01-14 2020-01-15 expird
li 1234 2020-01-14 C 2020-01-14 2020-01-15 expird
lily 123 2020-01-15 B 2020-01-15 9999-12-31 active
jhon 123456 2020-01-15 B 2020-01-15 9999-12-31 active
sunny 1234 2020-01-15 B 2020-01-15 9999-12-31 active
li 123 2020-01-15 C 2020-01-15 9999-12-31 active
zhang 1236 2020-01-15 A 2020-01-15 9999-12-31 active