文章目录
一 数仓血缘图
之前几层总结,数仓血缘图如下:
二 优惠券主题
ADS层不涉及建模,建表根据具体需求而定。
1 优惠券统计
要求统计最近30日发布的所有优惠券的领用情况和补贴率。
补贴率:总优惠金额与使用优惠券的订单的原价金额的比值。
2 建表语句
DROP TABLE IF EXISTS ads_coupon_stats;
CREATE EXTERNAL TABLE ads_coupon_stats (
`dt` STRING COMMENT '统计日期',
`coupon_id` STRING COMMENT '优惠券ID',
`coupon_name` STRING COMMENT '优惠券名称',
`start_date` STRING COMMENT '发布日期',
`rule_name` STRING COMMENT '优惠规则',
`get_count` BIGINT COMMENT '领取次数',
`order_count` BIGINT COMMENT '使用(下单)次数',
`expire_count` BIGINT COMMENT '过期次数',
`order_original_amount` DECIMAL(16,2) COMMENT '使用优惠券订单原始金额',
`order_final_amount` DECIMAL(16,2) COMMENT '使用优惠券订单最终金额',
`reduce_amount` DECIMAL(16,2) COMMENT '优惠金额',
`reduce_rate` DECIMAL(16,2) COMMENT '补贴率'
) COMMENT '商品销售统计'
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION '/warehouse/gmall/ads/ads_coupon_stats/';
3 数据导入
insert overwrite table ads_coupon_stats
select * from ads_coupon_stats
union
select
'2020-06-14' dt,
t1.id,
coupon_name,
start_date,
rule_name,
get_count,
order_count,
expire_count,
order_original_amount,
order_final_amount,
reduce_amount,
reduce_rate
from
(
select
id,
coupon_name,
date_format(start_time,'yyyy-MM-dd') start_date,
case
when coupon_type='3201' then concat('满',condition_amount,'元减',benefit_amount,'元')
when coupon_type='3202' then concat('满',condition_num,'件打', (1-benefit_discount)*10,'折')
when coupon_type='3203' then concat('减',benefit_amount,'元')
end rule_name
from dim_coupon_info
where dt='2020-06-14'
and date_format(start_time,'yyyy-MM-dd')>=date_add('2020-06-14',-29)
)t1
left join
(
select
coupon_id,
get_count,
order_count,
expire_count,
order_original_amount,
order_final_amount,
order_reduce_amount reduce_amount,
cast(order_reduce_amount/order_original_amount as decimal(16,2)) reduce_rate
from dwt_coupon_topic
where dt='2020-06-14'
)t2
on t1.id=t2.coupon_id;
三 活动主题
1 活动统计
统计最近30日发布的所有活动的参与情况和补贴率。
补贴率:总优惠金额与参与活动的订单原价金额的比值。
2 建表语句
DROP TABLE IF EXISTS ads_activity_stats;
CREATE EXTERNAL TABLE `ads_activity_stats` (
`dt` STRING COMMENT '统计日期',
`activity_id` STRING COMMENT '活动ID',
`activity_name` STRING COMMENT '活动名称',
`start_date` STRING COMMENT '活动开始日期',
`order_count` BIGINT COMMENT '参与活动订单数',
`order_original_amount` DECIMAL(16,2) COMMENT '参与活动订单原始金额',
`order_final_amount` DECIMAL(16,2) COMMENT '参与活动订单最终金额',
`reduce_amount` DECIMAL(16,2) COMMENT '优惠金额',
`reduce_rate` DECIMAL(16,2) COMMENT '补贴率'
) COMMENT '商品销售统计'
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION '/warehouse/gmall/ads/ads_activity_stats/';
3 数据导入
insert overwrite table ads_activity_stats
select * from ads_activity_stats
union
select
'2020-06-14' dt,
t4.activity_id,
activity_name,
start_date,
order_count,
order_original_amount,
order_final_amount,
reduce_amount,
reduce_rate
from
(
select
activity_id,
activity_name,
date_format(start_time,'yyyy-MM-dd') start_date
-- 近30日的活动
from dim_activity_rule_info
group by activity_id,activity_name,start_time
)t4
left join
(
select
activity_id,
sum(order_count) order_count,
sum(order_original_amount) order_original_amount,
sum(order_final_amount) order_final_amount,
sum(order_reduce_amount) reduce_amount,
cast(sum(order_reduce_amount)/sum(order_original_amount)*100 as decimal(16,2)) reduce_rate
-- 近30日统计情况
from dwt_activity_topic
where dt='2020-06-14'
group by activity_id
)t5
on t4.activity_id=t5.activity_id;
四 商品主题
1 商品统计
该指标为商品综合统计,包含每个spu被下单总次数和被下单总金额。
2 建表语句
DROP TABLE IF EXISTS ads_order_spu_stats;
CREATE EXTERNAL TABLE `ads_order_spu_stats` (
`dt` STRING COMMENT '统计日期',
`spu_id` STRING COMMENT '商品ID',
`spu_name` STRING COMMENT '商品名称',
`tm_id` STRING COMMENT '品牌ID',
`tm_name` STRING COMMENT '品牌名称',
`category3_id` STRING COMMENT '三级品类ID',
`category3_name` STRING COMMENT '三级品类名称',
`category2_id` STRING COMMENT '二级品类ID',
`category2_name` STRING COMMENT '二级品类名称',
`category1_id` STRING COMMENT '一级品类ID',
`category1_name` STRING COMMENT '一级品类名称',
`order_count` BIGINT COMMENT '订单数',
`order_amount` DECIMAL(16,2) COMMENT '订单金额'
) COMMENT '商品销售统计'
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION '/warehouse/gmall/ads/ads_order_spu_stats/';
3 数据导入
insert overwrite table ads_order_spu_stats
-- 将原来数据拿出来
select * from ads_order_spu_stats
-- 与现在数据拼接
-- hive中union all的缺点:优化器优化后的结果是将新数据直接追加到以前数据中,不会与以前数据一起处理,时间长了之后,就会有大量的小文件,解决办法就是定期处理一次,如一个月去覆盖一次
-- union:将数据查询出来,join之后再覆盖回去,产生一个文件,不会产生小文件,但会牺牲一点性能
union
select
'2020-06-14' dt,
spu_id,
spu_name,
tm_id,
tm_name,
category3_id,
category3_name,
category2_id,
category2_name,
category1_id,
category1_name,
sum(order_last_1d_count),
sum(order_last_1d_final_amount)
from
(
select
sku_id,
order_last_1d_count,
order_last_1d_final_amount
from dwt_sku_topic
where dt='2020-06-14'
)t1
left join
(
select
id,
spu_id,
spu_name,
tm_id,
tm_name,
category3_id,
category3_name,
category2_id,
category2_name,
category1_id,
category1_name
-- 维度信息表
from dim_sku_info
where dt='2020-06-14'
)t2
on t1.sku_id=t2.id
group by spu_id,spu_name,tm_id,tm_name,category3_id,category3_name,category2_id,category2_name,category1_id,category1_name;
五 商品主题一
1 品牌复购率(最近三十天)
品牌复购率是指一段时间内重复购买某品牌的人数与购买过该品牌的人数的比值。重复购买即购买次数大于等于2,购买过即购买次数大于1。
统计最近30天的各品牌复购率。
2 建表语句
DROP TABLE IF EXISTS ads_repeat_purchase;
CREATE EXTERNAL TABLE `ads_repeat_purchase` (
`dt` STRING COMMENT '统计日期',
`tm_id` STRING COMMENT '品牌ID',
`tm_name` STRING COMMENT '品牌名称',
`order_last_30d_repeat_rate` DECIMAL(16,2) COMMENT '复购率'
) COMMENT '品牌复购率'
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION '/warehouse/gmall/ads/ads_repeat_purchase/';
3 数据导入
该需求可分两步实现:
- 统计每个用户购买每个品牌的次数。
- 分别统计购买次数大于1的人数和大于2的人数。
insert overwrite table ads_repeat_purchase
select * from ads_repeat_purchase
union
select
'2020-06-14' dt,
tm_id,
tm_name,
cast(sum(if(order_count>=2,1,0))/sum(if(order_count>=1,1,0))*100 as decimal(16,2))
from
(
select
user_id,
tm_id,
tm_name,
sum(order_count) order_count
from
(
select
user_id,
sku_id,
-- 一单中有两件同一品牌不同型号的商品算复购,如果不算,可以将* 换成distinct order_id
count(*) order_count
-- 步骤一
from dwd_order_detail
where dt>=date_add('2020-06-14',-29)
group by user_id,sku_id
)t1
left join
(
select
id,
tm_id,
tm_name
from dim_sku_info
where dt='2020-06-14'
)t2
on t1.sku_id=t2.id
group by user_id,tm_id,tm_name
)t3
group by tm_id,tm_name;
六 订单主题
1 订单统计
该需求包含订单总数。
2 建表语句
DROP TABLE IF EXISTS ads_order_total;
CREATE EXTERNAL TABLE `ads_order_total` (
`dt` STRING COMMENT '统计日期',
`order_count` BIGINT COMMENT '订单数',
`order_amount` DECIMAL(16,2) COMMENT '订单金额',
`order_user_count` BIGINT COMMENT '下单人数'
) COMMENT '订单统计'
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION '/warehouse/gmall/ads/ads_order_total/';
3 数据导入
insert overwrite table ads_order_total
select * from ads_order_total
union
select
'2020-06-14',
sum(order_last_1d_count),
sum(order_last_1d_final_amount) order_final_amount,
sum(if(order_last_1d_final_amount>0,1,0)) order_user_count
from dwt_user_topic
where dt='2020-06-14';
七 订单主题一
1 各地区订单统计
该需求包含订单总金额和下单总人数。
2 建表语句
DROP TABLE IF EXISTS ads_order_by_province;
CREATE EXTERNAL TABLE `ads_order_by_province` (
`dt` STRING COMMENT '统计日期',
`province_id` STRING COMMENT '省份id',
`province_name` STRING COMMENT '省份名称',
`area_code` STRING COMMENT '地区编码',
`iso_code` STRING COMMENT '国际标准地区编码',
`iso_code_3166_2` STRING COMMENT '国际标准地区编码',
`order_count` BIGINT COMMENT '订单数',
`order_amount` DECIMAL(16,2) COMMENT '订单金额'
) COMMENT '用户留存率'
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION '/warehouse/gmall/ads/ads_order_by_province/';
3 数据导入
insert overwrite table ads_order_by_province
select * from ads_order_by_province
union
select
'2020-06-14' dt,
province_id,
province_name,
area_code,
iso_code,
iso_3166_2,
order_last_1d_count,
order_last_1d_final_amount
from
(
select
province_id,
order_last_1d_count,
order_last_1d_final_amount
from dwt_area_topic
where dt='2020-06-14'
)t1
left join
(
select
id,
province_name,
area_code,
iso_code,
iso_3166_2
from dim_base_province
)t2
on t1.province_id=t2.id;