2022.03.06 mysql8拉链表-测试

1.创建业务表并初始数据

drop table  if exists mall_user;
create table mall_user
(
    uid             bigint unsigned auto_increment comment '用户唯一ID'  primary key,
    login_name      varchar(32) not null comment '用户登录名',
    nick_name       varchar(32) not null default '请设置昵称' comment '用户昵称',
    remark        varchar(255) not null default '' comment '备注',
    create_time     timestamp default CURRENT_TIMESTAMP null comment '创建时间',
    update_time     timestamp default CURRENT_TIMESTAMP on update CURRENT_TIMESTAMP comment '更新数据',
    is_del      tinyint not null default '0' comment '删除标记 0-正常 1-已删除'
) comment '用户信息业务表' default character set utf8mb4;

insert into mall_user(uid, login_name, nick_name, remark, create_time, update_time)  
values
(1001, 'loginName001','nickName001', '初始数据', date_sub(now(), interval 2 day), date_sub(now(), interval 2 day)),
(1002, 'loginName002','nickName002', '初始数据', date_sub(now(), interval 3 day), date_sub(now(), interval 3 day)),
(1003, 'loginName003','nickName003', '初始数据', date_sub(now(), interval 4 day), date_sub(now(), interval 4 day)),
(1004, 'loginName004','nickName004', '初始数据', date_sub(now(), interval 5 day), date_sub(now(), interval 5 day));

update mall_user set is_del=1,update_time=date_sub(now(), interval 2 day)  where uid=1002;

在这里插入图片描述

2.假设业务表初始同步到ods表,插入拉链表,增加记录开始时间和结束时间

create table dim_mall_user_zip
(
    uid             bigint unsigned auto_increment comment '用户唯一ID'  primary key,
    login_name      varchar(32) not null comment '用户登录名',
    nick_name       varchar(32) not null default '请设置昵称' comment '用户昵称',
    remark      varchar(255) not null default '' comment '备注',
    create_time     timestamp default CURRENT_TIMESTAMP null comment '创建时间',
    update_time     timestamp default CURRENT_TIMESTAMP on update CURRENT_TIMESTAMP comment '更新数据',
    is_del      tinyint not null default '0' comment '删除标记 0-正常 1-已删除',
  start_date    date comment '生效时间',
  end_date    date comment '结束时间'
) comment '用户信息业务表-拉链表' default character set utf8mb4;
2.1首次装载数据-初始化拉链表-有效数据
replace into dim_mall_user_zip(uid, login_name, nick_name, remark, create_time, update_time, is_del, start_date, end_date)
select uid, login_name, nick_name, remark, create_time, update_time, is_del, 
date_format(create_time, '%Y-%m-%d') start_date,
'9999-12-31' end_date
from mall_user
where is_del=0;

在这里插入图片描述

2.2首次装载数据-初始化拉链表-新建一个分区放置过期数据
create table dim_mall_user_zip_expire_20220305 like dim_mall_user_zip;
replace into dim_mall_user_zip(uid, login_name, nick_name, remark, create_time, update_time, is_del, start_date, end_date)
select uid, login_name, nick_name, remark, create_time, update_time, is_del, 
date_format(create_time, '%Y-%m-%d') start_date,
date_format(update_time, '%Y-%m-%d') end_date
from mall_user
where is_del=1;

在这里插入图片描述

2.3此时全部的数据(hive里面的多个分区)
select * from dim_mall_user_zip dmuz 
union
select * from dim_mall_user_zip_expire_20220305 dmuze 
order by uid

在这里插入图片描述

3假设第二天,原业务表增加1条记录,修改1条记录,逻辑删除1条记录

insert into mall_user(uid, login_name, nick_name, remark, create_time, update_time)  
values
(1005, 'loginName005','nickName005', '每日新增数据', date_sub(now(), interval 1 day), date_sub(now(), interval 1 day));

update mall_user set nick_name='nickName033', remark ='每日修改数据' ,
create_time=date_sub(now(), interval 1 day), update_time=date_sub(now(), interval 1 day)
where uid=1003;

update mall_user set is_del =1, remark ='每日逻辑删除数据',
create_time=date_sub(now(), interval 1 day), update_time=date_sub(now(), interval 1 day)
where uid=1001;

3.1业务库表当前数据

在这里插入图片描述

3.2时间来到第三天,需要获取昨天变更的数据到当前ods表,1条新增+1条更新+1条逻辑删除
select * from mall_user
where (
create_time >= date_sub(curdate(), interval 1 day) and create_time < date_sub(curdate(), interval 0 day) 
or 
update_time >= date_sub(curdate(), interval 1 day) and update_time > date_sub(curdate(), interval 0 day)
);

变化的数据如下

在这里插入图片描述

将其抽到每日新增ods表(分区)

create table ods_mall_user_20220306
(
    uid             bigint unsigned auto_increment comment '用户唯一ID'  primary key,
    login_name      varchar(32) not null comment '用户登录名',
    nick_name       varchar(32) not null default '请设置昵称' comment '用户昵称',
    remark        varchar(255) not null default '' comment '备注',
    create_time     timestamp default CURRENT_TIMESTAMP null comment '创建时间',
    update_time     timestamp default CURRENT_TIMESTAMP on update CURRENT_TIMESTAMP comment '更新数据',
    is_del      tinyint not null default '0' comment '删除标记 0-正常 1-已删除'
) comment '用户信息业务表-每日变更ods表' default character set utf8mb4;

replace into ods_mall_user_20220306 
select * from mall_user
where (
create_time >= date_sub(curdate(), interval 1 day) and create_time < date_sub(curdate(), interval 0 day) 
or 
update_time >= date_sub(curdate(), interval 1 day) and update_time > date_sub(curdate(), interval 0 day)
);

4.合并全量最新数据到拉链表

昨日全量最新和今天变化的数据对比

select * from dim_mall_user_zip t1

select 
uid,
login_name ,
nick_name ,
remark ,
create_time ,
update_time ,
is_del ,
curdate() start_date ,
'9999-12-31' end_date 
from ods_mall_user_20220306

在这里插入图片描述

分析

不变的部分1004

修改的部分1001(标记删除),1003(昵称修改),原来的数据需要将结束时间改为当天,并插入新的1003数据

新增的部分1005

开链表上应该有1003、1004、1005记录

因为在mysql8上,不支持fulljoin,用union实现全外连接,左表还不能指明列名?!

select 
      *
    from dim_mall_user_zip_bak zip right join
      (select 
      uid           new_uid,
      login_name         new_login_name,
      nick_name         new_nick_name,
      remark           new_remark,
      create_time       new_create_time,
      update_time       new_update_time,
      is_del           new_is_del,
      curdate()          new_start_date,
      '9999-12-31'        new_end_date
      from ods_mall_user_20220306 where is_del=0) ods on zip.uid=ods.new_uid
    
    union
    
    select * from dim_mall_user_zip_bak zip left join
      (select 
      uid,
      login_name ,
      nick_name ,
      remark ,
      create_time ,
      update_time ,
      is_del ,
      curdate() start_date ,
      '9999-12-31' end_date 
      from ods_mall_user_20220306 where is_del=0) ods on zip.uid=ods.uid

全外连接后和execl里面是一样的

1001  loginName001  nickName001  初始数据  2022-03-04 13:32:20.0  2022-03-04 13:32:20.0  0  2022-03-04  9999-12-31  1001  loginName001  nickName001  每日逻辑删除数据  2022-03-05 11:24:12.0  2022-03-05 11:24:12.0  1  2022-03-06  9999-12-31
1003  loginName003  nickName002  初始数据  2022-03-02 13:32:20.0  2022-03-02 13:32:20.0  0  2022-03-02  9999-12-31  1003  loginName003  nickName033  每日修改数据  2022-03-05 11:24:09.0  2022-03-05 11:24:09.0  0  2022-03-06  9999-12-31
                  1004  loginName004  nickName004  每日新增数据  2022-03-05 11:24:06.0  2022-03-05 11:24:06.0  0  2022-03-06  9999-12-31
                  1005  loginName005  nickName005  每日新增数据  2022-03-05 11:24:06.0  2022-03-05 11:24:06.0  0  2022-03-06  9999-12-31
4.1模拟hive覆盖全新数据的拉链表
create table dim_mall_user_zip_bak like dim_mall_user_zip;
insert into dim_mall_user_zip_bak select * from dim_mall_user_zip; -- 此时只有初始化有效的2条数据
truncate table dim_mall_user_zip;

mysql还得套一层,不然过滤会有问题

insert into dim_mall_user_zip 
select * from (
  select 
      if(new_uid is not null, new_uid, uid) uid,
    if(new_login_name is not null, new_login_name, login_name) login_name,
    if(new_nick_name is not null, new_nick_name, nick_name) nick_name,
    if(new_remark is not null, new_remark, remark) remark,
    if(new_create_time is not null, new_create_time, create_time) create_time,
    if(new_update_time is not null, new_update_time, update_time) update_time,
    if(new_is_del is not null, new_is_del, is_del) is_del,
    if(new_start_date is not null, new_start_date, start_date) start_date,
    if(new_end_date is not null, new_end_date, end_date) end_date
    from (
      select 
        *
      from dim_mall_user_zip_bak zip right join
        (select 
        uid           new_uid,
        login_name         new_login_name,
        nick_name         new_nick_name,
        remark           new_remark,
        create_time       new_create_time,
        update_time       new_update_time,
        is_del           new_is_del,
        curdate()          new_start_date,
        '9999-12-31'        new_end_date
        from ods_mall_user_20220306) ods on zip.uid=ods.new_uid
      
      union
      
      select * from dim_mall_user_zip_bak zip left join
        (select 
        uid,
        login_name ,
        nick_name ,
        remark ,
        create_time ,
        update_time ,
        is_del ,
        curdate() start_date ,
        '9999-12-31' end_date 
        from ods_mall_user_20220306 ) ods on zip.uid=ods.uid
    ) all_new
  )t
  where is_del=0

在这里插入图片描述

4.2将过期的数据插入插入当天过期分区表
create table dim_mall_user_zip_expire_20220306 like dim_mall_user_zip;


insert into dim_mall_user_zip_expire_20220306
  select 
      uid,
    login_name,
    nick_name,
    remark,
    create_time,
    update_time,
    is_del,
    start_date,
    curdate() end_date
    from (
      select 
        *
      from dim_mall_user_zip_bak zip right join
        (select 
        uid           new_uid,
        login_name         new_login_name,
        nick_name         new_nick_name,
        remark           new_remark,
        create_time       new_create_time,
        update_time       new_update_time,
        is_del           new_is_del,
        curdate()          new_start_date,
        '9999-12-31'        new_end_date
        from ods_mall_user_20220306) ods on zip.uid=ods.new_uid
      
      union
      
      select * from dim_mall_user_zip_bak zip left join
        (select 
        uid           new_uid,
        login_name         new_login_name,
        nick_name         new_nick_name,
        remark           new_remark,
        create_time       new_create_time,
        update_time       new_update_time,
        is_del           new_is_del,
        curdate()          new_start_date,
        '9999-12-31'        new_end_date
        from ods_mall_user_20220306 ) ods on zip.uid=ods.new_uid
        
        where uid is not null and new_uid is not null
    ) all_new 
    where uid is not null and new_uid is not null;

在这里插入图片描述

4.3查看开链表和两张闭链表
select  * from dim_mall_user_zip dmuz 
union all
select * from dim_mall_user_zip_expire_20220305 dmuze 
union all 
select * from dim_mall_user_zip_expire_20220306 dmuze2 
order by uid, end_date

在这里插入图片描述

5.对于已过期的数据和同时标记了删除的数据,实际上历史保留的是哪一条?

replace dim_mall_user_zip_expire_20220306
  select 
      uid,
    login_name,
    nick_name,
    remark,
    create_time,
    update_time,
    is_del,
    start_date,
    curdate() end_date
    from (
      select 
        *
      from dim_mall_user_zip_bak zip right join
        (select 
        uid           new_uid,
        login_name         new_login_name,
        nick_name         new_nick_name,
        remark           new_remark,
        create_time       new_create_time,
        update_time       new_update_time,
        is_del           new_is_del,
        curdate()          new_start_date,
        '9999-12-31'        new_end_date
        from ods_mall_user_20220306) ods on zip.uid=ods.new_uid
      
      union
      
      select * from dim_mall_user_zip_bak zip left join
        (select 
        uid           new_uid,
        login_name         new_login_name,
        nick_name         new_nick_name,
        remark           new_remark,
        create_time       new_create_time,
        update_time       new_update_time,
        is_del           new_is_del,
        curdate()          new_start_date,
        '9999-12-31'        new_end_date
        from ods_mall_user_20220306 ) ods on zip.uid=ods.new_uid
        
        where uid is not null and new_uid is not null
    ) all_new 
    where uid is not null and new_uid is not null
  union
  
  select uid, login_name, nick_name, remark, create_time, update_time, is_del, 
  date_format(create_time, '%Y-%m-%d') start_date,
  date_format(update_time, '%Y-%m-%d') end_date
  from mall_user
  where is_del=1
  AND 
   (
  create_time >= date_sub(curdate(), interval 1 day) and create_time < date_sub(curdate(), interval 0 day) 
  or 
  update_time >= date_sub(curdate(), interval 1 day) and update_time > date_sub(curdate(), interval 0 day)
  );

在这里插入图片描述

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

Jenvid.yang

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值