拉链表流程
1.从ODS层获取增量数据(上一天新增和更新的数据) 2.拿着DWD原始拉链表数据 left join 增量数据 ,修改原始拉链中历史数据的结束时间 3.拿着left join 的结果集 union all 增量数据 4.把最新的拉链数据优先保存到DWD对应的临时表中 5.使用insert+select 方式把临时表中数据灌入DWD拉链表中
拉链表实现流程:
DWD层开发
DWD层: 数仓明细层(清洗转换、降维操作) 此层核心目标: 基于数据探查情况, 对相关表数据进行合并
会员基础信息表:
-
需要对此表进行历史数据拉链
说明: 将每日会员基础信息表中新增及更新的数据, 通过拉链的方式记录下来
建表操作:
CREATE TABLE IF NOT EXISTS dwd.dwd_mem_member_union_i(
zt_id BIGINT COMMENT '中台会员ID',
member_id BIGINT COMMENT '会员ID',
user_id BIGINT COMMENT '用户ID',
card_no STRING COMMENT '卡号',
member_name STRING COMMENT '会员名称',
mobile STRING COMMENT '手机号',
user_email STRING COMMENT '邮箱',
sex BIGINT COMMENT '用户的性别,1男性,2女性,0未知',
birthday_date STRING COMMENT '生日',
address STRING COMMENT '地址',
reg_time TIMESTAMP COMMENT '注册时间',
reg_md STRING COMMENT '注册门店',
bind_md STRING COMMENT '绑定门店',
flag BIGINT COMMENT '0正常,1删除',
is_black BIGINT COMMENT '是否被拉黑 1被拉黑,0正常用户',
user_state BIGINT COMMENT '会员状态,0停用/注销,1正常,2冻结',
user_type STRING COMMENT '用户类型(-1:传智鲜用户;0:普通用户;1:企业用户 2:内部员工 3:黑马门店 4:商铺会员 5:大买家 6:中间商 7:军区员工)',
member_type BIGINT COMMENT '会员状态 10:未付费会员 20:付费会员',
member_status BIGINT COMMENT '付费会员状态 -1:未付费会员 1:正常 2:试用 3:过期 4:试用已过期',
expired_time TIMESTAMP COMMENT '过期时间',
user_source BIGINT COMMENT '用户来源 ',
member_level BIGINT COMMENT '会员等级',
growth BIGINT COMMENT '成长值',
invite_member_id BIGINT COMMENT '邀请人标识',
invite_type BIGINT COMMENT '邀请类型,0为内部',
register_store_leader_id BIGINT COMMENT '注册归属团长 ID',
last_update_time TIMESTAMP COMMENT '更新日期',
end_date STRING COMMENT '生效结束日期'
)
comment '会员基础信息表'
partitioned by (start_date STRING COMMENT '生效开始日期')
row format delimited fields terminated by ','
stored as orc
tblproperties ('orc.compress'='SNAPPY');
需要对此表进行历史数据拉链
说明: 将每日会员基础信息表中新增及更新的数据, 通过拉链的方式记录下来
首次导入
-- 会员基础信息表首次导入:
insert overwrite table dwd.dwd_mem_member_union_i partition (start_date)
select
zt_id,
member_id,
user_id,
card_no,
member_name,
mobile,
user_email,
sex,
birthday_date,
address,
reg_time,
reg_md,
bind_md,
flag,
is_black,
user_state,
user_type,
member_type,
member_status,
expired_time,
user_source,
member_level,
growth,
invite_member_id,
invite_type,
register_store_leader_id,
last_update_time,
'9999-99-99' as end_date,
'2023-11-29' as start_date
from ods.ods_mem_member_union_i;
后续导入
1.mysql业务数据变化
注意: 实际开发没有自己准备业务数据这一步!!!
-- 步骤一: 在MySQL中, 添加增量的测试数据, 包含 新增 和 更新的数据 (测试)
-- 模拟新增数据
insert into member.member_union (zt_id, member_id, user_id, card_no, member_name, mobile, user_email, sex, birthday_date, address,reg_time, reg_md, bind_md, flag, is_black, user_state, user_type, member_type, member_status, expired_time, user_source, member_level, growth, invite_member_id, invite_type, register_store_leader_id,last_update_time)
values ('32015926',2160344,NULL,'','32015925',114,163,0,'','不详','2023-11-30 17:09:28','W121','W121',0,0,1,-1,10,-1,NULL,-1,0,0,NULL,NULL,NULL,'2023-11-30 17:09:28');
-- 模拟更新数据
UPDATE member.member_union SET SEX = 1, last_update_time = '2023-11-30 17:10:20' WHERE zt_id = '32015925';
-- 验证数据
select *
from member.member_union
where date_format(reg_time,'%Y-%m-%d') = date_format(date_sub(Now(),INTERVAL 1 DAY),'%Y-%m-%d')
OR date_format(last_update_time,'%Y-%m-%d') = date_format(date_sub(Now(),INTERVAL 1 DAY),'%Y-%m-%d')
2.datax-web导数据到ods
-- 步骤二: 执行DataX, 将新增数据和增量数据导入到ODS层 (应该在数据采集中执行)
-- 说明: 此步骤详细过程参考day02实施
-- 注意: mysqlreader中记得补充条件
date_format(reg_time,'%Y-%m-%d') = date_format(date_sub(Now(),INTERVAL 1 DAY),'%Y-%m-%d')
OR date_format(last_update_time,'%Y-%m-%d') = date_format(date_sub(Now(),INTERVAL 1 DAY),'%Y-%m-%d')
-- 注意: hdfswriter中记得补一个后置sql语句,内容如下
"postSql":[
"msck repair table ods.ods_mem_member_union_i"
],
3.hive增量导数据到dwd
-- 步骤三: 执行增量数据导入
-- 先创建一张目标表的临时表, 用于放置计算后的结果
CREATE TABLE IF NOT EXISTS dwd.dwd_mem_member_union_i_temp(
zt_id BIGINT COMMENT '中台会员ID',
member_id BIGINT COMMENT '会员ID',
user_id BIGINT COMMENT '用户ID',
card_no STRING COMMENT '卡号',
member_name STRING COMMENT '会员名称',
mobile STRING COMMENT '手机号',
user_email STRING COMMENT '邮箱',
sex BIGINT COMMENT '用户的性别,1男性,2女性,0未知',
birthday_date STRING COMMENT '生日',
address STRING COMMENT '地址',
reg_time TIMESTAMP COMMENT '注册时间',
reg_md STRING COMMENT '注册门店',
bind_md STRING COMMENT '绑定门店',
flag BIGINT COMMENT '0正常,1删除',
is_black BIGINT COMMENT '是否被拉黑 1被拉黑,0正常用户',
user_state BIGINT COMMENT '会员状态,0停用/注销,1正常,2冻结',
user_type STRING COMMENT '用户类型(-1:传智鲜用户;0:普通用户;1:企业用户 2:内部员工 3:黑马门店 4:商铺会员 5:大买家 6:中间商 7:军区员工)',
member_type BIGINT COMMENT '会员状态 10:未付费会员 20:付费会员',
member_status BIGINT COMMENT '付费会员状态 -1:未付费会员 1:正常 2:试用 3:过期 4:试用已过期',
expired_time TIMESTAMP COMMENT '过期时间',
user_source BIGINT COMMENT '用户来源 ',
member_level BIGINT COMMENT '会员等级',
growth BIGINT COMMENT '成长值',
invite_member_id BIGINT COMMENT '邀请人标识',
invite_type BIGINT COMMENT '邀请类型,0为内部',
register_store_leader_id BIGINT COMMENT '注册归属团长 ID',
last_update_time TIMESTAMP COMMENT '更新日期',
end_date STRING COMMENT '生效结束日期'
)
comment '会员基础信息表'
partitioned by (start_date STRING COMMENT '生效开始日期')
row format delimited fields terminated by ','
stored as orc
tblproperties ('orc.compress'='SNAPPY');
-- 循环导入数据
with t2 as (
select
t1.zt_id,
t1.member_id,
t1.user_id,
t1.card_no,
t1.member_name,
t1.mobile,
t1.user_email,
t1.sex,
t1.birthday_date,
t1.address,
t1.reg_time,
t1.reg_md,
t1.bind_md,
t1.flag,
t1.is_black,
t1.user_state,
t1.user_type,
t1.member_type,
t1.member_status,
t1.expired_time,
t1.user_source,
t1.member_level,
t1.growth,
t1.invite_member_id,
t1.invite_type,
t1.register_store_leader_id,
t1.last_update_time,
if(
t2.zt_id is null OR t1.end_date != '9999-99-99',
t1.end_date,
t2.dt
) as end_date,
t1.start_date
from dwd.dwd_mem_member_union_i t1
left join (select * from ods.ods_mem_member_union_i
where dt = date_format(date_sub(current_date(),1),'yyyy-MM-dd')
) as t2 on t1.zt_id = t2.zt_id
union all
select
zt_id,
member_id,
user_id,
card_no,
member_name,
mobile,
user_email,
sex,
birthday_date,
address,
reg_time,
reg_md,
bind_md,
flag,
is_black,
user_state,
user_type,
member_type,
member_status,