1.数据准备
1.1准备数据
create table user (
name String,
install_date Date,
active_date Date,
)ENGINE = MergeTree
order by (name,install_date ,active_date)
insert into table user values
('张三','2021-01-01','2021-01-01'),
('张三','2021-01-01','2021-01-02'),
('张三','2021-01-01','2021-01-03'),
('张三','2021-01-01','2021-01-04'),
('张三','2021-01-01','2021-01-08'),
('张三','2021-01-01','2021-01-09'),
('张三','2021-01-01','2021-01-10'),
('张三','2021-01-01','2021-01-11'),
('张三','2021-01-01','2021-01-17'),
('张三','2021-01-01','2021-01-18'),
('张三','2021-01-01','2021-01-19'),
('李四','2021-01-01','2021-01-01'),
('李四','2021-01-01','2021-01-02'),
('李四','2021-01-01','2021-01-15'),
('李四','2021-01-01','2021-01-16'),
('李四','2021-01-01','2021-01-17'),
('李四','2021-01-01','2021-01-18'),
('李四','2021-01-01','2021-01-19')
2、求最大连续登录天数
关键点:diff-rank,diff表示每用户每活跃日期距新增日期间隔天数,rank表示每用户按活跃日期生序排的序号,两者相减可理解为计算用户是否连续活跃的衡量指标,如果连续活跃则相减值value应该相等。
-- 最大连续登录天数
select name,max(activedays) as `最大连续登录天数`
from
( -- 1、求单个用户每一次连续登录的天数
select name,diff-rank AS value,count(1) as activedays
from
(
select
name,
active_date,
row_number() over(partition by name order by active_date) as rank,
datediff(day,install_date,active_date) as diff
from fairy.user
) group by 1,2
) group by 1
order by 1
3、当前连续登录天数(求用户最后一次获取活跃的连续登录天数)
-- 当前连续登录天数(求用户最后一次获取活跃的连续登录天数)
with active as
(
select name,active_date,rank,diff,diff-rank as value
from
(
select name,active_date
,row_number() over(partition by name order by active_date) as rank
,datediff(day,install_date,active_date) as diff
from fairy.user
)tmp
)
select t1.name,count(1) as `当前连续登录天数`
from
active as t1
join
( -- 没用用户的当前的最后一次活跃日期记录
select name,max(value) lastactive
from active
group by 1
) as t2
on t1.name = t2.name and t1.value = t2.lastactive
group by t1.name
order by 1
4、最大连续未登录天数
with active as
(
select name,active_date,toInt64(rank) rank,diff,diff-rank as value
from
(
select name,active_date
,row_number() over(partition by name order by active_date) as rank
,datediff(day,install_date, active_date) as diff
from fairy.user
)tmp
)
select name,max(noactivedays) as `最大连续未登录天数`
from
(
select t1.name,t1.active_date as start_date,t2.active_date as end_date ,t2.value - t1.value as noactivedays
from active as t1
join active as t2
on t1.name = t2.name and t1.rank = t2.rank - 1
union all
select name,max(active_date) as start_date,toDate(${current_date}) as end_date,datediff(day,max(active_date),toDate(${current_date})) as noactivedays
from fairy.user
group by 1
)
group by 1
order by 1;
5、用户自从安装app以来的连续未登录天数历史
with active as
(
select name,active_date,toInt64(rank) rank,diff,diff-rank as value
from
(
select name,active_date
,row_number() over(partition by name order by active_date) as rank
,datediff(day,install_date, active_date) as diff
from fairy.user
)tmp
)
select name,start_date,end_date,noactivedays as `连续未登录天数`
from
(
select t1.name,t1.active_date as start_date,t2.active_date as end_date ,t2.value - t1.value as noactivedays
from active as t1
join active as t2
on t1.name = t2.name and t1.rank = t2.rank - 1
union all
select name,max(active_date) as start_date,toDate(${current_date}) as end_date,datediff(day,max(active_date),toDate(${current_date})) as noactivedays
from fairy.user
group by 1
)
where noactivedays>0
order by 1;
6、连续登陆3天用户
select
DISTINCT(name) name
from
( -- 1、求单个用户每一次连续登录的天数
select
name,
diff-rank AS value,
count(1) as activedays
from
(
select
name,
active_date,
row_number() over(partition by name order by active_date) as rank,
datediff(day,install_date,active_date) as diff
from fairy.user
) group by 1,2
)
where activedays > 3