(38)DWS层

1 每日设备行为
每日设备行为,主要按照设备 id 统计。

 

1 )建表语句
hive (gmall)>
drop table if exists dws_uv_detail_daycount;
create external table dws_uv_detail_daycount
(
`mid_id` string COMMENT ' 设备 id',
`brand` string COMMENT ' 手机品牌 ',
`model` string COMMENT ' 手机型号 ',
`login_count` bigint COMMENT ' 活跃次数 ',
`page_stats` array<struct<page_id:string,page_count:bigint>> COMMENT '
面访问统计 '
) COMMENT ' 每日设备行为表 '
partitioned by(dt string)
stored as parquet
location '/warehouse/gmall/dws/dws_uv_detail_daycount'
tblproperties ("parquet.compression"="lzo");
2 )数据装载
hive (gmall)>
with
tmp_start as
(
select
mid_id,
brand,
model,
count(*) login_count
from dwd_start_log
where dt='2020-06-14'
group by mid_id,brand,model
),
tmp_page as
(
select
mid_id,
brand,
model,
collect_set(named_struct('page_id',page_id,'page_count',page_count))
page_stats
from
(
select
mid_id,
brand,
model,
page_id,
count(*) page_count
from dwd_page_log
where dt='2020-06-14'
group by mid_id,brand,model, page_id
)tmp
group by mid_id,brand,model
)
insert overwrite table dws_uv_detail_daycount partition(dt='2020-06-14')
select
nvl(tmp_start.mid_id,tmp_page.mid_id),
nvl(tmp_start.brand,tmp_page.brand),
nvl(tmp_start.model,tmp_page.model),
tmp_start.login_count,
tmp_page.page_stats
from tmp_start
full outer join tmp_page
on tmp_start.mid_id=tmp_page.mid_id
and tmp_start.brand=tmp_page.brand
and tmp_start.model=tmp_page.model;
3 )查询加载结果
hive (gmall)>
select * from dws_uv_detail_daycount where dt='2020-06-14' limit 2;
2 . 每日会员行为
1 )建表语句
hive (gmall)>
drop table if exists dws_user_action_daycount;
create external table dws_user_action_daycount
(
user_id string comment ' 用户 id',
login_count bigint comment ' 登录次数 ',
cart_count bigint comment ' 加入购物车次数 ',
order_count bigint comment ' 下单次数 ',
order_amount decimal(16,2) comment ' 下单金额 ',
payment_count bigint comment ' 支付次数 ',
payment_amount decimal(16,2) comment ' 支付金额 ',
order_detail_stats
array<struct<sku_id:string,sku_num:bigint,order_count:bigint,order_amount:de
cimal(20,2)>> comment ' 下单明细统计 '
) COMMENT ' 每日会员行为 '
PARTITIONED BY (`dt` string)
stored as parquet
location '/warehouse/gmall/dws/dws_user_action_daycount/'
tblproperties ("parquet.compression"="lzo");
2 )数据装载

 

hive (gmall)>
with
tmp_login as
(
select
user_id,
count(*) login_count
from dwd_start_log
where dt='2020-06-14'
and user_id is not null
group by user_id
),
tmp_cart as
(
select
user_id,
count(*) cart_count
from dwd_action_log
where dt='2020-06-14'
and user_id is not null
and action_id='cart_add'
group by user_id
),tmp_order as
(
select
user_id,
count(*) order_count,
sum(final_total_amount) order_amount
from dwd_fact_order_info
where dt='2020-06-14'
group by user_id
) ,
tmp_payment as
(
select
user_id,
count(*) payment_count,
sum(payment_amount) payment_amount
from dwd_fact_payment_info
where dt='2020-06-14'
group by user_id
),
tmp_order_detail as
(
select
user_id,
collect_set(named_struct('sku_id',sku_id,'sku_num',sku_num,'order_count',ord
er_count,'order_amount',order_amount)) order_stats
from
(
select
user_id,
sku_id,
sum(sku_num) sku_num,
count(*) order_count,
cast(sum(final_amount_d) as decimal(20,2)) order_amount
from dwd_fact_order_detail
where dt='2020-06-14'
group by user_id,sku_id
)tmp
group by user_id
)
insert overwrite table dws_user_action_daycount partition(dt='2020-06-14')
select
tmp_login.user_id,
login_count,
nvl (cart_count,0),
nvl(order_count,0),
nvl(order_amount,0.0),
nvl(payment_count,0),
nvl(payment_amount,0.0),
order_stats
from tmp_login
left join tmp_cart on tmp_login.user_id=tmp_cart.user_id
left join tmp_order on tmp_login.user_id=tmp_order.user_id
left join tmp_payment on tmp_login.user_id=tmp_payment.user_id
left join tmp_order_detail on tmp_login.user_id=tmp_order_detail.user_id;
3 )查询加载结果
hive (gmall)>
select * from dws_user_action_daycount where dt='2020-06-14' limit 2;
3 每日商品行为
1 )建表语句
hive (gmall)>
drop table if exists dws_sku_action_daycount;
create external table dws_sku_action_daycount
(
sku_id string comment 'sku_id',
order_count bigint comment ' 被下单次数 ',
order_num bigint comment ' 被下单件数 ',
order_amount decimal(16,2) comment ' 被下单金额 ',
payment_count bigint comment ' 被支付次数 ',
payment_num bigint comment ' 被支付件数 ',
payment_amount decimal(16,2) comment ' 被支付金额 ',
refund_count bigint comment ' 被退款次数 ',
refund_num bigint comment ' 被退款件数 ',
refund_amount decimal(16,2) comment ' 被退款金额 ',
cart_count bigint comment ' 被加入购物车次数 ',
favor_count bigint comment ' 被收藏次数 ',
appraise_good_count bigint comment ' 好评数 ',
appraise_mid_count bigint comment ' 中评数 ',
appraise_bad_count bigint comment ' 差评数 ',
appraise_default_count bigint comment ' 默认评价数 '
) COMMENT ' 每日商品行为 '
PARTITIONED BY (`dt` string)
stored as parquet
location '/warehouse/gmall/dws/dws_sku_action_daycount/'
tblproperties ("parquet.compression"="lzo");
2 )数据装载
注意:如果是 23 59 下单,支付日期跨天。需要从订单详情里面取出支付时间是今天,且
订单时间是昨天或者今天的订单。
hive (gmall)>
with
tmp_order as
(
select
sku_id,
count(*) order_count,
sum(sku_num) order_num,
sum(final_amount_d) order_amount
from dwd_fact_order_detail
where dt='2020-06-14'
group by sku_id
),
tmp_payment as
(
select
sku_id,
count(*) payment_count,
sum(sku_num) payment_num,
sum(final_amount_d) payment_amount
from dwd_fact_order_detail
where (dt='2020-06-14'
or dt=date_add('2020-06-14',-1))
and order_id in
(
select
id
from dwd_fact_order_info
where (dt='2020-06-14'
or dt=date_add('2020-06-14',-1))
and date_format(payment_time,'yyyy-MM-dd')='2020-06-14'
)
group by sku_id
),
tmp_refund as
(
select
sku_id,
count(*) refund_count,
sum(refund_num) refund_num,
sum(refund_amount) refund_amount
from dwd_fact_order_refund_info
where dt='2020-06-14'
group by sku_id
),
tmp_cart as
(
select
item sku_id,
count(*) cart_count
from dwd_action_log
where dt='2020-06-14'
and user_id is not null
and action_id='cart_add'
group by item
),tmp_favor as
(
select
item sku_id,
count(*) favor_count
from dwd_action_log
where dt='2020-06-14'
and user_id is not null
and action_id='favor_add'
group by item
),
tmp_appraise as
(
select
sku_id,
sum(if(appraise='1201',1,0)) appraise_good_count,
sum(if(appraise='1202',1,0)) appraise_mid_count,
sum(if(appraise='1203',1,0)) appraise_bad_count,
sum(if(appraise='1204',1,0)) appraise_default_count
from dwd_fact_comment_info
where dt='2020-06-14'
group by sku_id
)
insert overwrite table dws_sku_action_daycount partition(dt='2020-06-14')
select
sku_id,
sum(order_count),
sum(order_num),
sum(order_amount),
sum(payment_count),
sum(payment_num),
sum(payment_amount),
sum(refund_count),
sum(refund_num),
sum(refund_amount),
sum(cart_count),
sum(favor_count),
sum(appraise_good_count),
sum(appraise_mid_count),
sum(appraise_bad_count),
sum(appraise_default_count)
from
(
select
sku_id,
order_count,
order_num,
order_amount,
0 payment_count,
0 payment_num,
0 payment_amount,
0 refund_count,
0 refund_num,
0 refund_amount,
0 cart_count,
0 favor_count,
0 appraise_good_count,
0 appraise_mid_count,
0 appraise_bad_count,
0 appraise_default_count
from tmp_order
union all
select
sku_id,
0 order_count,
0 order_num,
0 order_amount,
payment_count,
payment_num,
payment_amount,
0 refund_count,
0 refund_num,
0 refund_amount,
0 cart_count,
0 favor_count,
0 appraise_good_count,
0 appraise_mid_count,
0 appraise_bad_count,
0 appraise_default_count
from tmp_payment
union all
select
sku_id,
0 order_count,
0 order_num,
0 order_amount,
0 payment_count,
0 payment_num,
0 payment_amount,
refund_count,
refund_num,
refund_amount,
0 cart_count,
0 favor_count,
0 appraise_good_count,
0 appraise_mid_count,
0 appraise_bad_count,
0 appraise_default_count
from tmp_refund
union all
select
sku_id,
0 order_count,
0 order_num,
0 order_amount,
0 payment_count,
0 payment_num,
0 payment_amount,
0 refund_count,
0 refund_num,
0 refund_amount,
cart_count,
0 favor_count,
0 appraise_good_count,
0 appraise_mid_count,
0 appraise_bad_count,
0 appraise_default_count
from tmp_cart
union all
select
sku_id,
0 order_count,
0 order_num,
0 order_amount,
0 payment_count,
0 payment_num,
0 payment_amount,
0 refund_count,
0 refund_num,
0 refund_amount,
0 cart_count,
favor_count,
0 appraise_good_count,
0 appraise_mid_count,
0 appraise_bad_count,
0 appraise_default_count
from tmp_favor
union all
select
sku_id,
0 order_count,
0 order_num,
0 order_amount,
0 payment_count,
0 payment_num,
0 payment_amount,
0 refund_count,
0 refund_num,
0 refund_amount,
0 cart_count,
0 favor_count,
appraise_good_count,
appraise_mid_count,
appraise_bad_count,
appraise_default_count
from tmp_appraise
)tmp
group by sku_id;
3 )查询加载结果
hive (gmall)>
select * from dws_sku_action_daycount where dt='2020-06-14' limit 2;
(注:剩下主要是dws层,dwt,ADS层,表字段建立,及结合业务sql语句,内容主要结合实际业务情况,不做过多展示,主要是相关分层思路,在前期文档中也有相关介绍,建表导表过场相对枯燥,并且也对sql有一定要求,要夯实ssql基本功)
  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

大数据开发工程师-宋权

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值