数据仓库项目笔记9

用户订单画像报表分析

– 订单指标表 dm_user_order_tag

drop table if exists ads_user_order_tag;
create table ads_user_order_tag(
user_id                        bigint     ,--用户
first_order_time               string     ,--第一次消费时间    --这个人的第一个订单时间
last_order_time                string     ,--最近一次消费时间
first_order_ago                bigint     ,--首单距今时间
last_order_ago                 bigint     ,--尾单距今时间
month1_order_cnt               bigint     ,--近30天购买次数
month1_order_amt               double     ,--近30天购买金额
month2_order_cnt               bigint     ,--近60天购买次数
month2_order_amt               double     ,--近60天购买金额
month3_order_cnt               bigint     ,--近90天购买次数
month3_order_amt               double     ,--近90天购买金额
max_order_amt                  double     ,--最大消费金额
min_order_amt                  double     ,--最小消费金额
total_order_cnt                bigint     ,--累计消费次数(不含退拒)
total_order_amt                double     ,--累计消费金额(不含退拒)
total_coupon_amt               double     ,--累计使用代金券金额
user_avg_amt                   double     ,--客单价(含退拒)
month3_user_avg_amt            double     ,--近90天客单价(含退拒)
common_address                 string     ,--常用收货地址
common_paytype                 string     ,--常用支付方式
month1_cart_cnt                bigint     ,--最近30天添加购物车次数
month1_cart_goods_cnt          bigint     ,--最近30天添加购物车商品总件数
month1_cart_submit_cnt         bigint     ,--最近30天提交商品件数
month1_cart_submit_rate        double     ,--最近30天提交的商品的比率
month1_cart_cancel_cnt         bigint     ,--最近30天商品取消数量
month1_cart_cancel_rate        double     ,--最近30天商品取消比率
dw_date                        string     ,--数仓计算日期
) partitioned by
(dt string)
;
  • 分析业务组合宽表(由订单和详细表组成)
    • 字段由三个表组成订单表、订单详细表、购物车
    • 但购物车和订单通过用户关联,所求字段有订单数据未必有购物车数据,用full join 实现 或用union获取所有用户
    • union 和 union all 区别: union 去重排序 union all 不去重不排序
    • explain 查看执行计划
-- etl 计算

with t1 as (

select
user_id,
min(create_time) as first_order_time, -- 首单时间
max(create_time) as last_order_time, --尾单时间
-- first_order_ago
-- last_order_ago
count(if(datediff('2019-06-16',create_time)<=30,1,null)) as month1_order_cnt,-- 30天内的订单总数
sum(if(datediff('2019-06-16',create_time)<=30,order_money,0)) as month1_order_amt, --30天内的订单总额

count(if(datediff('2019-06-16',create_time)<=60,1,null)) as month2_order_cnt,-- 60天内的订单总数
sum(if(datediff('2019-06-16',create_time)<=60,order_money,0)) as month2_order_amt, --60天内的订单总额

count(if(datediff('2019-06-16',create_time)<=90,1,null)) as month3_order_cnt,-- 90天内的订单总数
sum(if(datediff('2019-06-16',create_time)<=90,order_money,0)) as month3_order_amt, --90天内的订单总额

max(order_money) as max_order_amt, -- 最大消费金额
min(order_money) as min_order_amt, -- 最小消费金额

count(if(order_status in ('退货','拒收'),null,1)) as p_total_order_cnt, -- 累计订单总数(不含退拒)
sum(if(order_status in ('退货','拒收'),0,order_money)) as p_total_order_amt, -- 累计消费总额(不含退拒)

sum(coupon_money) as total_coupon_amt, -- 累计使用代金券总额

count(1) as total_order_cnt, -- 累计订单总数(含退拒)
sum(order_money) as total_order_amt -- 累计消费总额(含退拒)
-- user_avg_amt 客单价(含退拒)
-- month3_user_avg_amt --90天内的客单价(含退拒)

from dws_b2c_orders

group by user_id

),

t2 as (

select user_id,addr
from 
(
select
user_id,
addr,
row_number() over(partition by user_id order by cnt desc) as rn
from 
(
select
user_id,
concat_ws(',',nvl(area_name,''),nvl(address,''))  as addr, --地址
count(1) as cnt --次数
from dws_b2c_orders
group by user_id,concat_ws(',',nvl(area_name,''),nvl(address,''))
) o1

) o2
where rn=1
),

t3 as (

select user_id,pay_type
from
(
select user_id,pay_type,
row_number() over(partition by user_id order by cnt desc) as rn
from 
(
select
user_id,pay_type,count(1) as cnt
from dws_b2c_orders
group by user_id,pay_type
) o1
) o2
where rn=1
),

t4 as (
select
user_id,
count(distinct session_id) as month1_cart_cnt, --添加购物车次数
sum(number) as month1_cart_goods_cnt ,-- 添加商品总件数
sum(if(submit_time is not null,number,0)) as month1_cart_submit_cnt , -- 提交的商品件数
-- 提交比率
sum(if(cancel_time is not null,number,0)) as month1_cart_cancel_cnt 
-- 取消比率
from
(select * from ods_b2c_cart
where datediff('2019-06-16',add_time)<=30) cart
group by user_id
),

t5 as (

select user_id from dws_b2c_orders
union
select user_id from ods_b2c_cart

)

-- 总查询

insert into table ads_user_order_tag 
select

 t5.user_id                                                                         ,
 t1.first_order_time                                                                ,
 t1.last_order_time                                                                 ,
 datediff('2019-06-16',t1.first_order_time)  as first_order_ago                     ,
 datediff('2019-06-16',t1.last_order_time)  as last_order_ago                       ,
 t1.month1_order_cnt                                                                ,
 t1.month1_order_amt                                                                ,
 t1.month2_order_cnt                                                                ,
 t1.month2_order_amt                                                                ,
 t1.month3_order_cnt                                                                ,
 t1.month3_order_amt                                                                ,
 t1.max_order_amt                                                                   ,
 t1.min_order_amt                                                                   ,
 t1.total_order_cnt                                                                 ,
 t1.total_order_amt                                                                 ,
 t1.total_coupon_amt                                                                ,
 t1.total_order_amt/t1.total_order_cnt  as user_avg_amt                             ,
 t1.month3_order_amt/t1.month2_order_cnt as month3_user_avg_amt                     ,
 t2.addr as common_address                                                          ,
 t3.pay_type as common_paytype                                                      ,
 t4.month1_cart_cnt                                                                 ,
 t4.month1_cart_goods_cnt                                                           ,
 t4.month1_cart_submit_cnt                                                          ,
 t4.month1_cart_submit_cnt/t4.month1_cart_goods_cnt   as  month1_cart_submit_rate   ,
 t4.month1_cart_cancel_cnt                                                          ,
 t4.month1_cart_cancel_cnt/t4.month1_cart_goods_cnt   as  month1_cart_cancel_rate   ,
 '2019-06-16' as dw_date
from t5 
   join t1 on t5.user_id=t1.user_id
   join t2 on t5.user_id=t2.user_id
   join t3 on t5.user_id=t3.user_id
   join t4 on t5.user_id=t4.user_id
;
商品退换货分析
-- 订单与商品宽表
drop table if exists dws_orders_goods;
create table dws_orders_goods(
order_id            bigint    ,-- 订单ID
goods_id            bigint    ,-- 商品ID
size_id             bigint    ,-- 商品规格id
goods_price         double    ,-- 商品售价
goods_amount        bigint    ,-- 商品数量
cat1_id             bigint    ,-- 类目1ID
cat1_name           string    ,-- 类目1名称
cat2_id             bigint    ,-- 类目2ID
cat2_name          string    ,-- 类目2名称
cat3_id            bigint    ,-- 类目3id
cat3_name          string    ,-- 类目3名称
order_no           string    ,-- 订单流水号
order_date         string    ,-- 订单创建日期
user_id            bigint    ,-- 用户ID
user_name          string    ,-- 登录名
order_money        double    ,-- 订单金额  --扣除促销、减免之后的金额
order_type         string    ,-- 订单类型
order_status       string    ,-- 订单状态
pay_type           string    ,-- 支付类型
pay_status         string    ,-- 支付状态
order_source       string    ,-- 订单来源
dw_date            string     -- 数仓计算日期

)
stored as parquet
;
-- 宽表包含所有字段
insert into table dws_orders_goods
select
b.order_id         ,-- 订单ID
b.goods_id         ,-- 商品ID
b.size_id          ,-- 条码ID
b.goods_price      ,-- 商品价格
b.goods_amount     ,-- 数量
c.first_cat        ,-- 类目1ID
c.first_cat_name   ,-- 类目1名称
c.second_cat       ,-- 类目2ID
c.second_cat_name  ,-- 类目2名称
c.third_cat        ,-- 类目3ID
c.third_cat_name   ,-- 类目3名称
a.order_no         ,-- 订单号
a.order_date       ,-- 订单日期
a.user_id          ,-- 用户ID
a.user_name        ,-- 登录名
a.order_money      ,-- 订单金额  --扣除促销、减免之后的金额
a.order_type       ,-- 订单类型
a.order_status     ,-- 订单状态
a.pay_type         ,-- 支付类型
a.pay_status       ,-- 支付状态
a.order_source     ,-- 订单来源
'2019-06-16'
from ods_b2c_orders  a
  join ods_b2c_orders_goods b  on a.order_id = b.order_id
  join ods_b2c_goods c on b.goods_id= c.goods_id

--用户订单退拒商品指标计算
drop table if exists dm_user_goods_amt;
create table dm_user_goods_amt(
user_id                           bigint      ,-- 用户
p_sales_cnt                       bigint      ,-- 排除退拒商品销售数量
p_sales_amt                       double      ,-- 排除退拒商品销售金额
p_sales_cut_amt                   double      ,-- 排除退拒商品销售金额(扣促销减免)
h_sales_cnt                       bigint      ,-- 含退拒销售数量
h_sales_amt                       double      ,-- 含退拒销售金额
h_sales_cut_amt                   double      ,-- 含退拒销售金额(扣促销减免)
return_cnt                        bigint      ,-- 退货商品数量
return_amt                        double      ,-- 退货商品金额
reject_cnt                        bigint      ,-- 拒收商品数量
reject_amt                        double      ,-- 拒收商品金额
common_first_cat                  bigint      ,-- 最常购买商品一级类目名称
common_second_cat                 bigint      ,-- 最常购买商品二级类目名称
common_third_cat                  bigint      ,-- 最常购买商品三级类目名称
dw_date                           bigint
) partitioned by (dt string)
stored as parquet
;
with t1 as
(
select
user_id,
sum(if(order_status in ('退换', '拒收'), 0, goods_amount)) as p_sales_cnt,
sum(if(order_status in ('退换', '拒收'), 0, goods_amount*goods_price)) as p_sales_amt,

sum(goods_amount) as h_sales_cnt,
sum(goods_amount*goods_price) as h_sales_amt,

sum(if(order_status ='退换', 0, goods_amount)) as return_cnt,
sum(if(order_status ='退换', 0, goods_amount*goods_price)) as return_amt,

sum(if(order_status ='拒收', 0, goods_amount)) as reject_cnt,
sum(if(order_status ='拒收', 0, goods_amount*goods_price)) as reject_amt


from
dws_orders_goods
group by user_id),
--可从ods层获取join后变多条的数据不好求和字段
t2 as (
select
user_id,
sum(if(order_status in ('退货','拒收'),0,order_money)) as p_sales_cut_amt, -- 排除退拒的总金额(扣除了优惠)
sum(order_money) as h_sales_cut_amt -- 含退拒的总金额(扣除了优惠)
from ods_b2c_orders
group by user_id
),
--分组获取单个项目的topN
t3 as(

select
user_id,
cat1_name
from
(select
user_id,
cat1_name,
amt,
row_number() over(partition by user_id order by amt desc) rn
from(
select
user_id,
cat1_name,
sum(goods_amount) as amt
from
dws_orders_goods
group by user_id, cat1_name) o
)o1 where  rn = 1
)
,

t4 as (
select
user_id,
cat2_name
from
(
select
user_id,cat2_name,row_number() over(partition by user_id order by cat2_cnt desc) as rn
from
(
select
user_id,
cat2_name,
sum(goods_amount) as cat2_cnt
from dws_orders_goods
group by user_id,cat2_name
) o1
) o2
where rn=1
),

t5 as (
select
user_id,
cat3_name
from
(
select
user_id,cat3_name,row_number() over(partition by user_id order by cat3_cnt desc) as rn
from
(
select
user_id,
cat3_name,
sum(goods_amount) as cat3_cnt
from dws_orders_goods
group by user_id,cat3_name
) o1
) o2
where rn=1
)

select
t1.user_id              ,
t1.p_sales_cnt          ,
t1.p_sales_amt          ,
t2.p_sales_cut_amt      ,
t1.h_sales_cnt          ,
t1.h_sales_amt          ,
t2.h_sales_cut_amt      ,
t1.return_cnt           ,
t1.return_amt           ,
t1.reject_cnt           ,
t1.reject_amt           ,
t3.cat1_name            ,
t4.cat2_name            ,
t5.cat3_name            ,
'2019-06-16' first_cat

from t1
  join t2 on t1.user_id=t2.user_id
  join t3 on t1.user_id=t3.user_id
  join t4 on t1.user_id=t4.user_id
  join t5 on t1.user_id=t5.user_id
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值