文章目录
一 退单事实表(事务型事实表)
1 分区设计
2 建表语句
DROP TABLE IF EXISTS dwd_order_refund_info;
CREATE EXTERNAL TABLE dwd_order_refund_info(
`id` STRING COMMENT '编号',
`user_id` STRING COMMENT '用户ID',
`order_id` STRING COMMENT '订单ID',
`sku_id` STRING COMMENT '商品ID',
`province_id` STRING COMMENT '地区ID',
`refund_type` STRING COMMENT '退单类型',
`refund_num` BIGINT COMMENT '退单件数',
`refund_amount` DECIMAL(16,2) COMMENT '退单金额',
`refund_reason_type` STRING COMMENT '退单原因类型',
`create_time` STRING COMMENT '退单时间'
) COMMENT '退单事实表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_order_refund_info/'
TBLPROPERTIES ("orc.compress"="snappy");
3 导入数据
(1)首日导入
insert overwrite table dwd_order_refund_info partition(dt)
select
ri.id,
ri.user_id,
ri.order_id,
ri.sku_id,
oi.province_id,
ri.refund_type,
ri.refund_num,
ri.refund_amount,
ri.refund_reason_type,
ri.create_time,
date_format(ri.create_time,'yyyy-MM-dd')
from
(
select * from ods_order_refund_info where dt='2020-06-14'
)ri
left join
(
select id,province_id from ods_order_info where dt='2020-06-14'
)oi
on ri.order_id=oi.id;
(2)每日导入
insert overwrite table dwd_order_refund_info partition(dt='2020-06-15')
select
ri.id,
ri.user_id,
ri.order_id,
ri.sku_id,
oi.province_id,
ri.refund_type,
ri.refund_num,
ri.refund_amount,
ri.refund_reason_type,
ri.create_time
from
(
select * from ods_order_refund_info where dt='2020-06-15'
)ri
left join
(
select id,province_id from ods_order_info where dt='2020-06-15'
)oi
on ri.order_id=oi.id;
二 加购事实表(周期型快照事实表,每日快照)
1 分区设计
2 建表语句
DROP TABLE IF EXISTS dwd_cart_info;
CREATE EXTERNAL TABLE dwd_cart_info(
`id` STRING COMMENT '编号',
`user_id` STRING COMMENT '用户ID',
`sku_id` STRING COMMENT '商品ID',
`source_type` STRING COMMENT '来源类型',
`source_id` STRING COMMENT '来源编号',
`cart_price` DECIMAL(16,2) COMMENT '加入购物车时的价格',
`is_ordered` STRING COMMENT '是否已下单',
`create_time` STRING COMMENT '创建时间',
`operate_time` STRING COMMENT '修改时间',
`order_time` STRING COMMENT '下单时间',
`sku_num` BIGINT COMMENT '加购数量'
) COMMENT '加购事实表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_cart_info/'
TBLPROPERTIES ("orc.compress"="snappy");
3 导入数据
每日全量导入
每日导入与首日导入相同
insert overwrite table dwd_cart_info partition(dt='2020-06-14')
select
id,
user_id,
sku_id,
source_type,
source_id,
cart_price,
is_ordered,
create_time,
operate_time,
order_time,
sku_num
from ods_cart_info
where dt='2020-06-14';
三 收藏事实表(周期型快照事实表,每日快照)
设计思路与架构事实表相同
1 建表语句
DROP TABLE IF EXISTS dwd_favor_info;
CREATE EXTERNAL TABLE dwd_favor_info(
`id` STRING COMMENT '编号',
`user_id` STRING COMMENT '用户id',
`sku_id` STRING COMMENT 'skuid',
`spu_id` STRING COMMENT 'spuid',
`is_cancel` STRING COMMENT '是否取消',
`create_time` STRING COMMENT '收藏时间',
`cancel_time` STRING COMMENT '取消时间'
) COMMENT '收藏事实表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_favor_info/'
TBLPROPERTIES ("orc.compress"="snappy");
2 数据导入
insert overwrite table dwd_favor_info partition(dt='2020-06-14')
select
id,
user_id,
sku_id,
spu_id,
is_cancel,
create_time,
cancel_time
from ods_favor_info
where dt='2020-06-14';
以上两张周期型快照事实表在某种程度上并不是很重要,可以被dwd_action_log表代替
四 优惠券领用事实表(累积型快照事实表)
1 分区设计
2 建表语句
DROP TABLE IF EXISTS dwd_coupon_use;
CREATE EXTERNAL TABLE dwd_coupon_use(
`id` STRING COMMENT '编号',
`coupon_id` STRING COMMENT '优惠券ID',
`user_id` STRING COMMENT 'userid',
`order_id` STRING COMMENT '订单id',
`coupon_status` STRING COMMENT '优惠券状态',
`get_time` STRING COMMENT '领取时间',
`using_time` STRING COMMENT '使用时间(下单)',
`used_time` STRING COMMENT '使用时间(支付)',
`expire_time` STRING COMMENT '过期时间'
) COMMENT '优惠券领用事实表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_coupon_use/'
TBLPROPERTIES ("orc.compress"="snappy");
3 数据导入
数据导入思路:
(1)首日导入
used_time为null且expire_time为null,进入9999-99-99分区
coalesce函数返回第一个不为null的值,可以理解为三个及其以上的nvl
insert overwrite table dwd_coupon_use partition(dt)
select
id,
coupon_id,
user_id,
order_id,
coupon_status,
get_time,
using_time,
used_time,
expire_time,
coalesce(date_format(used_time,'yyyy-MM-dd'),date_format(expire_time,'yyyy-MM-dd'),'9999-99-99')
from ods_coupon_use
where dt='2020-06-14';
(2)每日导入
将9999-99-99分区的数据查询出来,与6-15的数据join,查看哪些数据被修改,哪些数据过期了
insert overwrite table dwd_coupon_use partition(dt)
select
nvl(new.id,old.id),
nvl(new.coupon_id,old.coupon_id),
nvl(new.user_id,old.user_id),
nvl(new.order_id,old.order_id),
nvl(new.coupon_status,old.coupon_status),
nvl(new.get_time,old.get_time),
nvl(new.using_time,old.using_time),
nvl(new.used_time,old.used_time),
nvl(new.expire_time,old.expire_time),
coalesce(date_format(nvl(new.used_time,old.used_time),'yyyy-MM-dd'),date_format(nvl(new.expire_time,old.expire_time),'yyyy-MM-dd'),'9999-99-99')
from
(
select
id,
coupon_id,
user_id,
order_id,
coupon_status,
get_time,
using_time,
used_time,
expire_time
from dwd_coupon_use
where dt='9999-99-99'
)old
full outer join
(
select
id,
coupon_id,
user_id,
order_id,
coupon_status,
get_time,
using_time,
used_time,
expire_time
from ods_coupon_use
where dt='2020-06-15'
)new
on old.id=new.id;
**以上sql存在一个问题:**在极端情况下,6-15当天将6-14所有没使用的和6-15领取的优惠券全部使用,这样9999-99-99分区就不会有数据,这样9999-99-99分区的数据就不会被override,理论上6-15当天9999-99-99分区中没有数据,而以上sql中6-15当天9999-99-99分区中的数据不会是空的,将会是6-14中9999-99-99分区中的数据,因为没有任何数据去覆盖9999-99-99分区。
6-16会从9999-99-99分区查询数据,这个分区中的数据永远不会被覆盖。
解决方案
- 在9999-99-99分区存放一条全为null的数据(无效的数据),使得这个分区永远存在数据,即使当天优惠券全部用完,也会有一条数据去覆盖前一天的9999-99-99分区
累积型快照事实表处理思路大体相同:第一天将分区导入到各个分区,第二天将第一天9999-99-99分区的数据和第二天的新数据full out join,再将数据导入到各个分区
五 支付事实表(累积型快照事实表)
1 分区设计
2 建表语句
DROP TABLE IF EXISTS dwd_payment_info;
CREATE EXTERNAL TABLE dwd_payment_info (
`id` STRING COMMENT '编号',
`order_id` STRING COMMENT '订单编号',
`user_id` STRING COMMENT '用户编号',
`province_id` STRING COMMENT '地区ID',
`trade_no` STRING COMMENT '交易编号',
`out_trade_no` STRING COMMENT '对外交易编号',
`payment_type` STRING COMMENT '支付类型',
`payment_amount` DECIMAL(16,2) COMMENT '支付金额',
`payment_status` STRING COMMENT '支付状态',
`create_time` STRING COMMENT '创建时间',--调用第三方支付接口的时间
`callback_time` STRING COMMENT '完成时间'--支付完成时间,即支付成功回调时间
) COMMENT '支付事实表表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_payment_info/'
TBLPROPERTIES ("orc.compress"="snappy");
3 数据导入
(1)首日导入
insert overwrite table dwd_payment_info partition(dt)
select
pi.id,
pi.order_id,
pi.user_id,
oi.province_id,
pi.trade_no,
pi.out_trade_no,
pi.payment_type,
pi.payment_amount,
pi.payment_status,
pi.create_time,
pi.callback_time,
nvl(date_format(pi.callback_time,'yyyy-MM-dd'),'9999-99-99')
from
(
select * from ods_payment_info where dt='2020-06-14'
)pi
left join
(
select id,province_id from ods_order_info where dt='2020-06-14'
)oi
on pi.order_id=oi.id;
(2)每日导入
insert overwrite table dwd_payment_info partition(dt)
select
nvl(new.id,old.id),
nvl(new.order_id,old.order_id),
nvl(new.user_id,old.user_id),
nvl(new.province_id,old.province_id),
nvl(new.trade_no,old.trade_no),
nvl(new.out_trade_no,old.out_trade_no),
nvl(new.payment_type,old.payment_type),
nvl(new.payment_amount,old.payment_amount),
nvl(new.payment_status,old.payment_status),
nvl(new.create_time,old.create_time),
nvl(new.callback_time,old.callback_time),
nvl(date_format(nvl(new.callback_time,old.callback_time),'yyyy-MM-dd'),'9999-99-99')
from
(
select id,
order_id,
user_id,
province_id,
trade_no,
out_trade_no,
payment_type,
payment_amount,
payment_status,
create_time,
callback_time
from dwd_payment_info
where dt = '9999-99-99'
)old
full outer join
(
select
pi.id,
pi.out_trade_no,
pi.order_id,
pi.user_id,
oi.province_id,
pi.payment_type,
pi.trade_no,
pi.payment_amount,
pi.payment_status,
pi.create_time,
pi.callback_time
from
(
select * from ods_payment_info where dt='2020-06-15'
)pi
left join
(
select id,province_id from ods_order_info where dt='2020-06-15'
)oi
on pi.order_id=oi.id
)new
on old.id=new.id;
六 退款事实表(累积型快照事实表)
1 建表语句
DROP TABLE IF EXISTS dwd_refund_payment;
CREATE EXTERNAL TABLE dwd_refund_payment (
`id` STRING COMMENT '编号',
`user_id` STRING COMMENT '用户ID',
`order_id` STRING COMMENT '订单编号',
`sku_id` STRING COMMENT 'SKU编号',
`province_id` STRING COMMENT '地区ID',
`trade_no` STRING COMMENT '交易编号',
`out_trade_no` STRING COMMENT '对外交易编号',
`payment_type` STRING COMMENT '支付类型',
`refund_amount` DECIMAL(16,2) COMMENT '退款金额',
`refund_status` STRING COMMENT '退款状态',
`create_time` STRING COMMENT '创建时间',--调用第三方支付接口的时间
`callback_time` STRING COMMENT '回调时间'--支付接口回调时间,即支付成功时间
) COMMENT '退款事实表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_refund_payment/'
TBLPROPERTIES ("orc.compress"="snappy");
2 数据导入
(1)首日导入
insert overwrite table dwd_refund_payment partition(dt)
select
rp.id,
user_id,
order_id,
sku_id,
province_id,
trade_no,
out_trade_no,
payment_type,
refund_amount,
refund_status,
create_time,
callback_time,
nvl(date_format(callback_time,'yyyy-MM-dd'),'9999-99-99')
from
(
select
id,
out_trade_no,
order_id,
sku_id,
payment_type,
trade_no,
refund_amount,
refund_status,
create_time,
callback_time
from ods_refund_payment
where dt='2020-06-14'
)rp
left join
(
select
id,
user_id,
province_id
from ods_order_info
where dt='2020-06-14'
)oi
on rp.order_id=oi.id;
(2)每日导入
insert overwrite table dwd_refund_payment partition(dt)
select
nvl(new.id,old.id),
nvl(new.user_id,old.user_id),
nvl(new.order_id,old.order_id),
nvl(new.sku_id,old.sku_id),
nvl(new.province_id,old.province_id),
nvl(new.trade_no,old.trade_no),
nvl(new.out_trade_no,old.out_trade_no),
nvl(new.payment_type,old.payment_type),
nvl(new.refund_amount,old.refund_amount),
nvl(new.refund_status,old.refund_status),
nvl(new.create_time,old.create_time),
nvl(new.callback_time,old.callback_time),
nvl(date_format(nvl(new.callback_time,old.callback_time),'yyyy-MM-dd'),'9999-99-99')
from
(
select
id,
user_id,
order_id,
sku_id,
province_id,
trade_no,
out_trade_no,
payment_type,
refund_amount,
refund_status,
create_time,
callback_time
from dwd_refund_payment
where dt='9999-99-99'
)old
full outer join
(
select
rp.id,
user_id,
order_id,
sku_id,
province_id,
trade_no,
out_trade_no,
payment_type,
refund_amount,
refund_status,
create_time,
callback_time
from
(
select
id,
out_trade_no,
order_id,
sku_id,
payment_type,
trade_no,
refund_amount,
refund_status,
create_time,
callback_time
from ods_refund_payment
where dt='2020-06-15'
)rp
left join
(
select
id,
user_id,
province_id
from ods_order_info
where dt='2020-06-15'
)oi
on rp.order_id=oi.id
)new
on old.id=new.id
七 订单事实表(累积型快照事实表)
1 建表语句
DROP TABLE IF EXISTS dwd_order_info;
CREATE EXTERNAL TABLE dwd_order_info(
`id` STRING COMMENT '编号',
`order_status` STRING COMMENT '订单状态',
`user_id` STRING COMMENT '用户ID',
`province_id` STRING COMMENT '地区ID',
`payment_way` STRING COMMENT '支付方式',
`delivery_address` STRING COMMENT '邮寄地址',
`out_trade_no` STRING COMMENT '对外交易编号',
`tracking_no` STRING COMMENT '物流单号',
`create_time` STRING COMMENT '创建时间(未支付状态)',
`payment_time` STRING COMMENT '支付时间(已支付状态)',
`cancel_time` STRING COMMENT '取消时间(已取消状态)',
`finish_time` STRING COMMENT '完成时间(已完成状态)',
`refund_time` STRING COMMENT '退款时间(退款中状态)',
`refund_finish_time` STRING COMMENT '退款完成时间(退款完成状态)',
`expire_time` STRING COMMENT '过期时间',
`feight_fee` DECIMAL(16,2) COMMENT '运费',
`feight_fee_reduce` DECIMAL(16,2) COMMENT '运费减免',
`activity_reduce_amount` DECIMAL(16,2) COMMENT '活动减免',
`coupon_reduce_amount` DECIMAL(16,2) COMMENT '优惠券减免',
`original_amount` DECIMAL(16,2) COMMENT '订单原始价格',
`final_amount` DECIMAL(16,2) COMMENT '订单最终价格'
) COMMENT '订单事实表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_order_info/'
TBLPROPERTIES ("orc.compress"="snappy");
2 数据导入
(1)首日导入
insert overwrite table dwd_order_info partition(dt)
select
oi.id,
oi.order_status,
oi.user_id,
oi.province_id,
oi.payment_way,
oi.delivery_address,
oi.out_trade_no,
oi.tracking_no,
oi.create_time,
times.ts['1002'] payment_time,
times.ts['1003'] cancel_time,
times.ts['1004'] finish_time,
times.ts['1005'] refund_time,
times.ts['1006'] refund_finish_time,
oi.expire_time,
feight_fee,
feight_fee_reduce,
activity_reduce_amount,
coupon_reduce_amount,
original_amount,
final_amount,
case
when times.ts['1003'] is not null then date_format(times.ts['1003'],'yyyy-MM-dd')
when times.ts['1004'] is not null and date_add(date_format(times.ts['1004'],'yyyy-MM-dd'),7)<='2020-06-14' and times.ts['1005'] is null then date_add(date_format(times.ts['1004'],'yyyy-MM-dd'),7)
when times.ts['1006'] is not null then date_format(times.ts['1006'],'yyyy-MM-dd')
when oi.expire_time is not null then date_format(oi.expire_time,'yyyy-MM-dd')
else '9999-99-99'
end
from
(
select
*
from ods_order_info
where dt='2020-06-14'
)oi
left join
(
select
order_id,
str_to_map(concat_ws(',',collect_set(concat(order_status,'=',operate_time))),',','=') ts
from ods_order_status_log
where dt='2020-06-14'
group by order_id
)times
on oi.id=times.order_id;
(2)每日导入
insert overwrite table dwd_order_info partition(dt)
select
nvl(new.id,old.id),
nvl(new.order_status,old.order_status),
nvl(new.user_id,old.user_id),
nvl(new.province_id,old.province_id),
nvl(new.payment_way,old.payment_way),
nvl(new.delivery_address,old.delivery_address),
nvl(new.out_trade_no,old.out_trade_no),
nvl(new.tracking_no,old.tracking_no),
nvl(new.create_time,old.create_time),
nvl(new.payment_time,old.payment_time),
nvl(new.cancel_time,old.cancel_time),
nvl(new.finish_time,old.finish_time),
nvl(new.refund_time,old.refund_time),
nvl(new.refund_finish_time,old.refund_finish_time),
nvl(new.expire_time,old.expire_time),
nvl(new.feight_fee,old.feight_fee),
nvl(new.feight_fee_reduce,old.feight_fee_reduce),
nvl(new.activity_reduce_amount,old.activity_reduce_amount),
nvl(new.coupon_reduce_amount,old.coupon_reduce_amount),
nvl(new.original_amount,old.original_amount),
nvl(new.final_amount,old.final_amount),
case
when new.cancel_time is not null then date_format(new.cancel_time,'yyyy-MM-dd')
when new.finish_time is not null and date_add(date_format(new.finish_time,'yyyy-MM-dd'),7)='2020-06-15' and new.refund_time is null then '2020-06-15'
when new.refund_finish_time is not null then date_format(new.refund_finish_time,'yyyy-MM-dd')
when new.expire_time is not null then date_format(new.expire_time,'yyyy-MM-dd')
else '9999-99-99'
end
from
(
select
id,
order_status,
user_id,
province_id,
payment_way,
delivery_address,
out_trade_no,
tracking_no,
create_time,
payment_time,
cancel_time,
finish_time,
refund_time,
refund_finish_time,
expire_time,
feight_fee,
feight_fee_reduce,
activity_reduce_amount,
coupon_reduce_amount,
original_amount,
final_amount
from dwd_order_info
where dt='9999-99-99'
)old
full outer join
(
select
oi.id,
oi.order_status,
oi.user_id,
oi.province_id,
oi.payment_way,
oi.delivery_address,
oi.out_trade_no,
oi.tracking_no,
oi.create_time,
times.ts['1002'] payment_time,
times.ts['1003'] cancel_time,
times.ts['1004'] finish_time,
times.ts['1005'] refund_time,
times.ts['1006'] refund_finish_time,
oi.expire_time,
feight_fee,
feight_fee_reduce,
activity_reduce_amount,
coupon_reduce_amount,
original_amount,
final_amount
from
(
select
*
from ods_order_info
where dt='2020-06-15'
)oi
left join
(
select
order_id,
str_to_map(concat_ws(',',collect_set(concat(order_status,'=',operate_time))),',','=') ts
from ods_order_status_log
where dt='2020-06-15'
group by order_id
)times
on oi.id=times.order_id
)new
on old.id=new.id;