用户订单画像报表分析
– 订单指标表 dm_user_order_tag
drop table if exists ads_user_order_tag;
create table ads_user_order_tag(
user_id bigint ,--用户
first_order_time string ,--第一次消费时间 --这个人的第一个订单时间
last_order_time string ,--最近一次消费时间
first_order_ago bigint ,--首单距今时间
last_order_ago bigint ,--尾单距今时间
month1_order_cnt bigint ,--近30天购买次数
month1_order_amt double ,--近30天购买金额
month2_order_cnt bigint ,--近60天购买次数
month2_order_amt double ,--近60天购买金额
month3_order_cnt bigint ,--近90天购买次数
month3_order_amt double ,--近90天购买金额
max_order_amt double ,--最大消费金额
min_order_amt double ,--最小消费金额
total_order_cnt bigint ,--累计消费次数(不含退拒)
total_order_amt double ,--累计消费金额(不含退拒)
total_coupon_amt double ,--累计使用代金券金额
user_avg_amt double ,--客单价(含退拒)
month3_user_avg_amt double ,--近90天客单价(含退拒)
common_address string ,--常用收货地址
common_paytype string ,--常用支付方式
month1_cart_cnt bigint ,--最近30天添加购物车次数
month1_cart_goods_cnt bigint ,--最近30天添加购物车商品总件数
month1_cart_submit_cnt bigint ,--最近30天提交商品件数
month1_cart_submit_rate double ,--最近30天提交的商品的比率
month1_cart_cancel_cnt bigint ,--最近30天商品取消数量
month1_cart_cancel_rate double ,--最近30天商品取消比率
dw_date string ,--数仓计算日期
) partitioned by
(dt string)
;
- 分析业务组合宽表(由订单和详细表组成)
- 字段由三个表组成订单表、订单详细表、购物车
- 但购物车和订单通过用户关联,所求字段有订单数据未必有购物车数据,用full join 实现 或用union获取所有用户
- union 和 union all 区别: union 去重排序 union all 不去重不排序
- explain 查看执行计划
-- etl 计算
with t1 as (
select
user_id,
min(create_time) as first_order_time, -- 首单时间
max(create_time) as last_order_time, --尾单时间
-- first_order_ago
-- last_order_ago
count(if(datediff('2019-06-16',create_time)<=30,1,null)) as month1_order_cnt,-- 30天内的订单总数
sum(if(datediff('2019-06-16',create_time)<=30,order_money,0)) as month1_order_amt, --30天内的订单总额
count(if(datediff('2019-06-16',create_time)<=60,1,null)) as month2_order_cnt,-- 60天内的订单总数
sum(if(datediff('2019-06-16',create_time)<=60,order_money,0)) as month2_order_amt, --60天内的订单总额
count(if(datediff('2019-06-16',create_time)<=90,1,null)) as month3_order_cnt,-- 90天内的订单总数
sum(if(datediff('2019-06-16',create_time)<=90,order_money,0)) as month3_order_amt, --90天内的订单总额
max(order_money) as max_order_amt, -- 最大消费金额
min(order_money) as min_order_amt, -- 最小消费金额
count(if(order_status in ('退货','拒收'),null,1)) as p_total_order_cnt, -- 累计订单总数(不含退拒)
sum(if(order_status in ('退货','拒收'),0,order_money)) as p_total_order_amt, -- 累计消费总额(不含退拒)
sum(coupon_money) as total_coupon_amt, -- 累计使用代金券总额
count(1) as total_order_cnt, -- 累计订单总数(含退拒)
sum(order_money) as total_order_amt -- 累计消费总额(含退拒)
-- user_avg_amt 客单价(含退拒)
-- month3_user_avg_amt --90天内的客单价(含退拒)
from dws_b2c_orders
group by user_id
),
t2 as (
select user_id,addr
from
(
select
user_id,
addr,
row_number() over(partition by user_id order by cnt desc) as rn
from
(
select
user_id,
concat_ws(',',nvl(area_name,''),nvl(address,'')) as addr, --地址
count(1) as cnt --次数
from dws_b2c_orders
group by user_id,concat_ws(',',nvl(area_name,''),nvl(address,''))
) o1
) o2
where rn=1
),
t3 as (
select user_id,pay_type
from
(
select user_id,pay_type,
row_number() over(partition by user_id order by cnt desc) as rn
from
(
select
user_id,pay_type,count(1) as cnt
from dws_b2c_orders
group by user_id,pay_type
) o1
) o2
where rn=1
),
t4 as (
select
user_id,
count(distinct session_id) as month1_cart_cnt, --添加购物车次数
sum(number) as month1_cart_goods_cnt ,-- 添加商品总件数
sum(if(submit_time is not null,number,0)) as month1_cart_submit_cnt , -- 提交的商品件数
-- 提交比率
sum(if(cancel_time is not null,number,0)) as month1_cart_cancel_cnt
-- 取消比率
from
(select * from ods_b2c_cart
where datediff('2019-06-16',add_time)<=30) cart
group by user_id
),
t5 as (
select user_id from dws_b2c_orders
union
select user_id from ods_b2c_cart
)
-- 总查询
insert into table ads_user_order_tag
select
t5.user_id ,
t1.first_order_time ,
t1.last_order_time ,
datediff('2019-06-16',t1.first_order_time) as first_order_ago ,
datediff('2019-06-16',t1.last_order_time) as last_order_ago ,
t1.month1_order_cnt ,
t1.month1_order_amt ,
t1.month2_order_cnt ,
t1.month2_order_amt ,
t1.month3_order_cnt ,
t1.month3_order_amt ,
t1.max_order_amt ,
t1.min_order_amt ,
t1.total_order_cnt ,
t1.total_order_amt ,
t1.total_coupon_amt ,
t1.total_order_amt/t1.total_order_cnt as user_avg_amt ,
t1.month3_order_amt/t1.month2_order_cnt as month3_user_avg_amt ,
t2.addr as common_address ,
t3.pay_type as common_paytype ,
t4.month1_cart_cnt ,
t4.month1_cart_goods_cnt ,
t4.month1_cart_submit_cnt ,
t4.month1_cart_submit_cnt/t4.month1_cart_goods_cnt as month1_cart_submit_rate ,
t4.month1_cart_cancel_cnt ,
t4.month1_cart_cancel_cnt/t4.month1_cart_goods_cnt as month1_cart_cancel_rate ,
'2019-06-16' as dw_date
from t5
join t1 on t5.user_id=t1.user_id
join t2 on t5.user_id=t2.user_id
join t3 on t5.user_id=t3.user_id
join t4 on t5.user_id=t4.user_id
;
商品退换货分析
-- 订单与商品宽表
drop table if exists dws_orders_goods;
create table dws_orders_goods(
order_id bigint ,-- 订单ID
goods_id bigint ,-- 商品ID
size_id bigint ,-- 商品规格id
goods_price double ,-- 商品售价
goods_amount bigint ,-- 商品数量
cat1_id bigint ,-- 类目1ID
cat1_name string ,-- 类目1名称
cat2_id bigint ,-- 类目2ID
cat2_name string ,-- 类目2名称
cat3_id bigint ,-- 类目3id
cat3_name string ,-- 类目3名称
order_no string ,-- 订单流水号
order_date string ,-- 订单创建日期
user_id bigint ,-- 用户ID
user_name string ,-- 登录名
order_money double ,-- 订单金额 --扣除促销、减免之后的金额
order_type string ,-- 订单类型
order_status string ,-- 订单状态
pay_type string ,-- 支付类型
pay_status string ,-- 支付状态
order_source string ,-- 订单来源
dw_date string -- 数仓计算日期
)
stored as parquet
;
-- 宽表包含所有字段
insert into table dws_orders_goods
select
b.order_id ,-- 订单ID
b.goods_id ,-- 商品ID
b.size_id ,-- 条码ID
b.goods_price ,-- 商品价格
b.goods_amount ,-- 数量
c.first_cat ,-- 类目1ID
c.first_cat_name ,-- 类目1名称
c.second_cat ,-- 类目2ID
c.second_cat_name ,-- 类目2名称
c.third_cat ,-- 类目3ID
c.third_cat_name ,-- 类目3名称
a.order_no ,-- 订单号
a.order_date ,-- 订单日期
a.user_id ,-- 用户ID
a.user_name ,-- 登录名
a.order_money ,-- 订单金额 --扣除促销、减免之后的金额
a.order_type ,-- 订单类型
a.order_status ,-- 订单状态
a.pay_type ,-- 支付类型
a.pay_status ,-- 支付状态
a.order_source ,-- 订单来源
'2019-06-16'
from ods_b2c_orders a
join ods_b2c_orders_goods b on a.order_id = b.order_id
join ods_b2c_goods c on b.goods_id= c.goods_id
--用户订单退拒商品指标计算
drop table if exists dm_user_goods_amt;
create table dm_user_goods_amt(
user_id bigint ,-- 用户
p_sales_cnt bigint ,-- 排除退拒商品销售数量
p_sales_amt double ,-- 排除退拒商品销售金额
p_sales_cut_amt double ,-- 排除退拒商品销售金额(扣促销减免)
h_sales_cnt bigint ,-- 含退拒销售数量
h_sales_amt double ,-- 含退拒销售金额
h_sales_cut_amt double ,-- 含退拒销售金额(扣促销减免)
return_cnt bigint ,-- 退货商品数量
return_amt double ,-- 退货商品金额
reject_cnt bigint ,-- 拒收商品数量
reject_amt double ,-- 拒收商品金额
common_first_cat bigint ,-- 最常购买商品一级类目名称
common_second_cat bigint ,-- 最常购买商品二级类目名称
common_third_cat bigint ,-- 最常购买商品三级类目名称
dw_date bigint
) partitioned by (dt string)
stored as parquet
;
with t1 as
(
select
user_id,
sum(if(order_status in ('退换', '拒收'), 0, goods_amount)) as p_sales_cnt,
sum(if(order_status in ('退换', '拒收'), 0, goods_amount*goods_price)) as p_sales_amt,
sum(goods_amount) as h_sales_cnt,
sum(goods_amount*goods_price) as h_sales_amt,
sum(if(order_status ='退换', 0, goods_amount)) as return_cnt,
sum(if(order_status ='退换', 0, goods_amount*goods_price)) as return_amt,
sum(if(order_status ='拒收', 0, goods_amount)) as reject_cnt,
sum(if(order_status ='拒收', 0, goods_amount*goods_price)) as reject_amt
from
dws_orders_goods
group by user_id),
--可从ods层获取join后变多条的数据不好求和字段
t2 as (
select
user_id,
sum(if(order_status in ('退货','拒收'),0,order_money)) as p_sales_cut_amt, -- 排除退拒的总金额(扣除了优惠)
sum(order_money) as h_sales_cut_amt -- 含退拒的总金额(扣除了优惠)
from ods_b2c_orders
group by user_id
),
--分组获取单个项目的topN
t3 as(
select
user_id,
cat1_name
from
(select
user_id,
cat1_name,
amt,
row_number() over(partition by user_id order by amt desc) rn
from(
select
user_id,
cat1_name,
sum(goods_amount) as amt
from
dws_orders_goods
group by user_id, cat1_name) o
)o1 where rn = 1
)
,
t4 as (
select
user_id,
cat2_name
from
(
select
user_id,cat2_name,row_number() over(partition by user_id order by cat2_cnt desc) as rn
from
(
select
user_id,
cat2_name,
sum(goods_amount) as cat2_cnt
from dws_orders_goods
group by user_id,cat2_name
) o1
) o2
where rn=1
),
t5 as (
select
user_id,
cat3_name
from
(
select
user_id,cat3_name,row_number() over(partition by user_id order by cat3_cnt desc) as rn
from
(
select
user_id,
cat3_name,
sum(goods_amount) as cat3_cnt
from dws_orders_goods
group by user_id,cat3_name
) o1
) o2
where rn=1
)
select
t1.user_id ,
t1.p_sales_cnt ,
t1.p_sales_amt ,
t2.p_sales_cut_amt ,
t1.h_sales_cnt ,
t1.h_sales_amt ,
t2.h_sales_cut_amt ,
t1.return_cnt ,
t1.return_amt ,
t1.reject_cnt ,
t1.reject_amt ,
t3.cat1_name ,
t4.cat2_name ,
t5.cat3_name ,
'2019-06-16' first_cat
from t1
join t2 on t1.user_id=t2.user_id
join t3 on t1.user_id=t3.user_id
join t4 on t1.user_id=t4.user_id
join t5 on t1.user_id=t5.user_id