/*
zipper.txt数据:
001 186xxxx1234 laoda 0 sh 2021-01-01 9999-12-31
002 186xxxx1235 laoer 1 bj 2021-01-01 9999-12-31
003 186xxxx1236 laosan 0 sz 2021-01-01 9999-12-31
004 186xxxx1237 laosi 1 gz 2021-01-01 9999-12-31
005 186xxxx1238 laowu 0 sh 2021-01-01 9999-12-31
006 186xxxx1239 laoliu 1 bj 2021-01-01 9999-12-31
007 186xxxx1240 laoqi 0 sz 2021-01-01 9999-12-31
008 186xxxx1241 laoba 1 gz 2021-01-01 9999-12-31
009 186xxxx1242 laojiu 0 sh 2021-01-01 9999-12-31
010 186xxxx1243 laoshi 1 bj 2021-01-01 9999-12-31
update.txt数据:
008 186xxxx1241 laoba 1 sh 2021-01-02 9999-12-31
011 186xxxx1244 laoshi 1 jx 2021-01-02 9999-12-31
012 186xxxx1245 laoshi 0 zj 2021-01-02 9999-12-31
*/
-- 创建拉链表,加载原始数据
create table dw_zipper
(
user_id string comment '用户id',
phone string comment '电话',
nick string comment '昵称',
gender string comment '性别',
addr string comment '地址',
start_time string comment '开始时间',
end_time string comment '结束时间'
) row format delimited fields terminated by '\t';
load data local inpath '/export/data/zipper.txt' into table dw_zipper;
select *
from dw_zipper;
-- 创建ods层增量表,加载新增和更新的数据
create table ods_zipper_update
(
user_id string comment '用户id',
phone string comment '电话',
nick string comment '昵称',
gender string comment '性别',
addr string comment '地址',
start_time string comment '开始时间',
end_time string comment '结束时间'
) row format delimited fields terminated by '\t';
load data local inpath '/export/data/update.txt' into table ods_zipper_update;
select *
from ods_zipper_update;
-- 创建临时表,合并数据
create table tmp_zipper
(
user_id string comment '用户id',
phone string comment '电话',
nick string comment '昵称',
gender string comment '性别',
addr string comment '地址',
start_time string comment '开始时间',
end_time string comment '结束时间'
) row format delimited fields terminated by '\t';
-- 将新增数据和原数据合并,并将有更新的数据的旧数据结束日期改为新数据的开始日期-1
insert overwrite table tmp_zipper
select user_id,
phone,
nick,
gender,
addr,
start_time,
end_time
from ods_zipper_update
union all
-- 查询原来拉链表的所有数据,并将这次需要更新的数据的end_time更改为新值的start_time-1
select dz.user_id,
dz.phone,
dz.nick,
dz.gender,
dz.addr,
dz.start_time,
/*
判断left join后的数据,user_id为null或者end_time小于9999-12-31则说明数据没有更新;
否则将更新数据的开始日期减1作为旧数据的结束日期
*/
if(ozu.user_id is null or dz.end_time < '9999-12-31', dz.end_time, date_sub(ozu.start_time, 1)) as end_time
from dw_zipper dz
-- 获取左表的全部数据和右表的交集,右表没有user_id则显示为null
left join ods_zipper_update ozu on dz.user_id = ozu.user_id;
-- 覆盖拉链表
insert overwrite table dw_zipper
select *
from tmp_zipper;
拉链表实现步骤
最新推荐文章于 2024-07-01 17:29:38 发布