实战-数据仓库构建(十)

131 篇文章 16 订阅
71 篇文章 22 订阅

问题导读:
1、如何设计设备主题?
2、如何设计会员主题(会员主题+漏斗分析)?
3、如何设计商品主题(个数+销量)?
4、如何设计营销主题(用户+商品+购买行为)?

一、数仓搭建 - ADS 层
1.1 设备主题
1.1.1 活跃设备数(日、周、月)

需求定义:
日活:当日活跃的设备数
周活:当周活跃的设备数
月活:当月活跃的设备数
1)建表语句

drop table if exists ads_uv_count;
create external table ads_uv_count(
`dt` string COMMENT '统计日期',
`day_count` bigint COMMENT '当日用户数量',
`wk_count` bigint COMMENT '当周用户数量',
`mn_count` bigint COMMENT '当月用户数量',
`is_weekend` string COMMENT 'Y,N 是否是周末,用于得到本周最终结果',
`is_monthend` string COMMENT 'Y,N 是否是月末,用于得到本月最终结果'
) COMMENT '活跃设备数'
row format delimited fields terminated by '\t'
location '/warehouse/gmall/ads/ads_uv_count/';

2)导入数据

insert into table ads_uv_count
select
'2020-03-10' dt,
daycount.ct,
wkcount.ct,
mncount.ct,
if(date_add(next_day('2020-03-10','MO'),-1)='2020-03-10','Y','N') ,
if(last_day('2020-03-10')='2020-03-10','Y','N')
from
(
select
'2020-03-10' dt,
count(*) ct
from dwt_uv_topic
where login_date_last='2020-03-10'
)daycount join
(
select
'2020-03-10' dt,
count (*) ct
from dwt_uv_topic
where login_date_last>=date_add(next_day('2020-03-10','MO'),-7)
and login_date_last<= date_add(next_day('2020-03-10','MO'),-1)
) wkcount on daycount.dt=wkcount.dt
join
(
select
'2020-03-10' dt,
count (*) ct
from dwt_uv_topic
where
date_format(login_date_last,'yyyy-MM')=date_format('2020-03-10','yyyy-MM')
)mncount on daycount.dt=mncount.dt;

1.1.2 每日新增设备
1)建表语句

drop table if exists ads_new_mid_count;
create external table ads_new_mid_count
(
`create_date` string comment '创建时间' ,
`new_mid_count` BIGINT comment '新增设备数量'
) COMMENT '每日新增设备信息数量'
row format delimited fields terminated by '\t'
location '/warehouse/gmall/ads/ads_new_mid_count/';

2)导入数据

insert into table ads_new_mid_count
select
login_date_first,
count(*)
from dwt_uv_topic
where login_date_first='2020-03-10'
group by login_date_first;

1.1.3 沉默用户数
需求定义:
沉默用户:只在安装当天启动过,且启动时间是在 7 天前
1)建表语句

drop table if exists ads_silent_count;
create external table ads_silent_count(
`dt` string COMMENT '统计日期',
`silent_count` bigint COMMENT '沉默设备数'
)
row format delimited fields terminated by '\t'
location '/warehouse/gmall/ads/ads_silent_count';

2)导入 2020-03-20 数据

drop table if exists ads_silent_count;
create external table ads_silent_count(
`dt` string COMMENT '统计日期',
`silent_count` bigint COMMENT '沉默设备数'
)
row format delimited fields terminated by '\t'
location '/warehouse/gmall/ads/ads_silent_count';

1.1.4 本周回流用户数

需求定义:
本周回流用户:上周未活跃,本周活跃的设备,且不是本周新增设备
1)建表语句

drop table if exists ads_back_count;
create external table ads_back_count(
`dt` string COMMENT '统计日期',
`wk_dt` string COMMENT '统计日期所在周',
`wastage_count` bigint COMMENT '回流设备数'
)
row format delimited fields terminated by '\t'
location '/warehouse/gmall/ads/ads_back_count';

2)导入数据:

insert into table ads_back_count
select
'2020-03-15',
count(*)
from
(
select
mid_id
from dwt_uv_topic
where login_date_last>=date_add(next_day('2020-03-15','MO'),-7)
and login_date_last<= date_add(next_day('2020-03-15','MO'),-1)
and login_date_first<date_add(next_day('2020-03-15','MO'),-7)
)current_wk
left join
(
select
mid_id
from dws_uv_detail_daycount
where dt>=date_add(next_day('2020-03-15','MO'),-7*2)
and dt<= date_add(next_day('2020-03-15','MO'),-7-1)
group by mid_id
)last_wk
on current_wk.mid_id=last_wk.mid_id
where last_wk.mid_id is null;

1.1.5 流失用户数
需求定义:
流失用户:最近 7 天未活跃的设备
1)建表语句

drop table if exists ads_wastage_count;
create external table ads_wastage_count(
`dt` string COMMENT '统计日期',
`wastage_count` bigint COMMENT '流失设备数'
)
row format delimited fields terminated by '\t'
location '/warehouse/gmall/ads/ads_wastage_count';

2)导入 2020-03-20 数据

insert into table ads_wastage_count
select
'2020-03-20',
count(*)
from
(
select
mid_id
from dwt_uv_topic
where login_date_last<=date_add('2020-03-20',-7)
group by mid_id
)t1;

1.1.6 留存率



1)建表语句

drop table if exists ads_user_retention_day_rate;
create external table ads_user_retention_day_rate
(
`stat_date` string comment '统计日期',
`create_date` string comment '设备新增日期',
`retention_day` int comment '截止当前日期留存天数',
`retention_count` bigint comment '留存数量',
`new_mid_count` bigint comment '设备新增数量',
`retention_ratio` decimal(10,2) comment '留存率'
) COMMENT '每日用户留存情况'
row format delimited fields terminated by '\t'
location '/warehouse/gmall/ads/ads_user_retention_day_rate/';

2)导入数据

insert into table ads_user_retention_day_rate
select
'2020-03-10',--统计日期
date_add('2020-03-10',-1),--新增日期
1,--留存天数
sum(if(login_date_first=date_add('2020-03-10',-1) and
login_date_last='2020-03-10',1,0)),--2020-03-09 的 1 日留存数
sum(if(login_date_first=date_add('2020-03-10',-1),1,0)),--2020-03-09 新增
sum(if(login_date_first=date_add('2020-03-10',-1) and
login_date_last='2020-03-10',1,0))/sum(if(login_date_first=date_add('2020-03-10',-
1),1,0))*100
from dwt_uv_topic
union all
select
'2020-03-10',--统计日期
date_add('2020-03-10',-2),--新增日期
2,--留存天数
sum(if(login_date_first=date_add('2020-03-10',-2) and
login_date_last='2020-03-10',1,0)),--2020-03-08 的 2 日留存数
sum(if(login_date_first=date_add('2020-03-10',-2),1,0)),--2020-03-08 新增
sum(if(login_date_first=date_add('2020-03-10',-2) and
login_date_last='2020-03-10',1,0))/sum(if(login_date_first=date_add('2020-03-10',-
2),1,0))*100
from dwt_uv_topic
union all
select
'2020-03-10',--统计日期
date_add('2020-03-10',-3),--新增日期
3,--留存天数
sum(if(login_date_first=date_add('2020-03-10',-3) and
login_date_last='2020-03-10',1,0)),--2020-03-07 的 3 日留存数
sum(if(login_date_first=date_add('2020-03-10',-3),1,0)),--2020-03-07 新增
sum(if(login_date_first=date_add('2020-03-10',-3) and
login_date_last='2020-03-10',1,0))/sum(if(login_date_first=date_add('2020-03-10',-
3),1,0))*100
from dwt_uv_topic;

1.1.7 最近连续三周活跃用户数

1)建表语句

drop table if exists ads_continuity_wk_count;
create external table ads_continuity_wk_count(
`dt` string COMMENT '统计日期,一般用结束周周日日期,如果每天计算一次,可用当天日
期',
`wk_dt` string COMMENT '持续时间',
`continuity_count` bigint COMMENT '活跃次数'
)
row format delimited fields terminated by '\t'
location '/warehouse/gmall/ads/ads_continuity_wk_count';

2)导入 2020-03-20 所在周的数据

insert into table ads_continuity_wk_count
select
'2020-03-15',
concat(date_add(next_day('2020-03-15','MO'),-7*3),'_',date_add(next_day('
2020-03-15','MO'),-1)),
count(*)
from
(
select
mid_id
from
(
select
mid_id
from dws_uv_detail_daycount
where dt>=date_add(next_day('2020-03-10','monday'),-7)
and dt<=date_add(next_day('2020-03-10','monday'),-1)
group by mid_id
union all
select
mid_id
from dws_uv_detail_daycount
where dt>=date_add(next_day('2020-03-10','monday'),-7*2)
and dt<=date_add(next_day('2020-03-10','monday'),-7-1)
group by mid_id
union all
select
mid_id
from dws_uv_detail_daycount
where dt>=date_add(next_day('2020-03-10','monday'),-7*3)
and dt<=date_add(next_day('2020-03-10','monday'),-7*2-1)
group by mid_id
)t1
group by mid_id
having count(*)=3
)t2

1.1.8 最近七天内连续三天活跃用户数

1)建表语句

drop table if exists ads_continuity_uv_count;
create external table ads_continuity_uv_count(
`dt` string COMMENT '统计日期',
`wk_dt` string COMMENT '最近 7 天日期',
`continuity_count` bigint
) COMMENT '连续活跃设备数'
row format delimited fields terminated by '\t'
location '/warehouse/gmall/ads/ads_continuity_uv_count';

2)写出导入数据的 SQL 语句

insert into table ads_continuity_uv_count
select
'2020-03-12',
concat(date_add('2020-03-12',-6),'_','2020-03-12'),
count(*)
from
(
select mid_id
from
(
select mid_id
from
(
select
mid_id,
date_sub(dt,rank) date_dif
from
(
select
mid_id,
dt,
rank() over(partition by mid_id order by dt) rank
from dws_uv_detail_daycount
where dt>=date_add('2020-03-12',-6) and
dt<='2020-03-12'
)t1
)t2
group by mid_id,date_dif
having count(*)>=3
)t3
group by mid_id
)t4;

7.2 会员主题
7.2.1 会员主题信息

1)建表

drop table if exists ads_user_topic;
create external table ads_user_topic(
`dt` string COMMENT '统计日期',
`day_users` string COMMENT '活跃会员数',
`day_new_users` string COMMENT '新增会员数',
`day_new_payment_users` string COMMENT '新增消费会员数',
`payment_users` string COMMENT '总付费会员数',
`users` string COMMENT '总会员数',
`day_users2users` decimal(10,2) COMMENT '会员活跃率',
`payment_users2users` decimal(10,2) COMMENT '会员付费率',
`day_new_users2users` decimal(10,2) COMMENT '会员新鲜度'
) COMMENT '会员主题信息表'
row format delimited fields terminated by '\t'
location '/warehouse/gmall/ads/ads_user_topic';

2)导入数据

insert into table ads_user_topic
select
'2020-03-10',
sum(if(login_date_last='2020-03-10',1,0)),
sum(if(login_date_first='2020-03-10',1,0)),
sum(if(payment_date_first='2020-03-10',1,0)),
sum(if(payment_count>0,1,0)),
count(*),
sum(if(login_date_last='2020-03-10',1,0))/count(*),
sum(if(payment_count>0,1,0))/count(*),
sum(if(login_date_first='2020-03-10',1,0))/sum(if(login_date_last='2020-03-10',1,0))
from dwt_user_topic

4)vim ads_user_topic.sh
添加如下内容:

#!/bin/bash
APP=gmall
hive=/opt/modules/hive/bin/hive
# 如果是输入的日期按照取输入日期;如果没输入日期取当前时间的前一天
if [ -n "$1" ] ;then
do_date=$1
else
do_date=`date -d "-1 day" +%F`
fi
sql="
with
tmp_day_users as
(
select
'$do_date' dt,
count(*) day_users
from
${APP}.dwt_user_topic
where
login_date_last='$do_date'
),
tmp_day_new_users as
(
select
'$do_date' dt,
count(*) day_new_users
from
${APP}.dwt_user_topic
where
login_date_last='$do_date' and login_date_first='$do_date'
),
tmp_day_new_payment_users as
(
select
'$do_date' dt,
count(*) day_new_payment_users
from
${APP}.dwt_user_topic
where
payment_date_first='$do_date'
),
tmp_payment_users as
(
select
'$do_date' dt,
count(*) payment_users
from
${APP}.dwt_user_topic
where
payment_date_first is not null
),
tmp_users as
(
select
'$do_date' dt,
count(*) users
from
${APP}.dwt_user_topic
tmp_users
)
insert into table ${APP}.ads_user_topic
select
'$do_date' dt,
day_users,
day_new_users,
day_new_payment_users,
payment_users,
users,
day_users/users,
payment_users/users,
day_new_users/users
from
tmp_day_users
join
tmp_day_new_users
on
tmp_day_users.dt=tmp_day_new_users.dt
join
tmp_day_new_payment_users
on
tmp_day_users.dt=tmp_day_new_payment_users.dt
join
tmp_payment_users
on
tmp_day_users.dt=tmp_payment_users.dt
join
tmp_users
on
tmp_day_users.dt=tmp_users.dt;
"
$hive -e "$sql"

5)增加脚本执行权限

chmod 770 ads_user_topic.sh

6)执行脚本导入数据

ads_user_topic.sh 2020-03-11

7.2.2 漏斗分析

统计“浏览->购物车->下单->支付”的转化率
思路:统计各个行为的人数,然后计算比值
1)建表语句

drop table if exists ads_user_action_convert_day;
create external table ads_user_action_convert_day(
`dt` string COMMENT '统计日期',
`total_visitor_m_count` bigint COMMENT '总访问人数',
`cart_u_count` bigint COMMENT '加入购物车的人数',
`visitor2cart_convert_ratio` decimal(10,2) COMMENT '访问到加入购物车转化率',
`order_u_count` bigint COMMENT '下单人数',
`cart2order_convert_ratio` decimal(10,2) COMMENT '加入购物车到下单转化率',
`payment_u_count` bigint COMMENT '支付人数',
`order2payment_convert_ratio` decimal(10,2) COMMENT '下单到支付的转化率'
) COMMENT '用户行为漏斗分析'
row format delimited fields terminated by '\t'
location '/warehouse/gmall/ads/ads_user_action_convert_day/';

2)数据装载

insert into table ads_user_action_convert_day
select
'2020-03-10',
uv.day_count,
ua.cart_count,
cast(ua.cart_count/uv.day_count as decimal(10,2)) visitor2cart_convert_ratio,
ua.order_count,
cast(ua.order_count/ua.cart_count as decimal(10,2)) visitor2order_convert_ratio,
ua.payment_count,
cast(ua.payment_count/ua.order_count as decimal(10,2)) order2payment_convert_ratio
from
(
select
dt,
sum(if(cart_count>0,1,0)) cart_count,
sum(if(order_count>0,1,0)) order_count,
sum(if(payment_count>0,1,0)) payment_count
from dws_user_action_daycount
where dt='2020-03-10'
group by dt
)ua join ads_uv_count uv on uv.dt=ua.dt; 

 

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值