with active as(select name
,active_date
,rank
,diff
,diff-rank asvaluefrom(select name
,active_date
,dense_rank()over(partitionby name orderby active_date)as rank
,datediff(active_date,install_date)as diff
from tmpdb.hzy_0930_test
)tmp
)select t1.name,count(1)as`最近连续登录天数`from active t1
join(-- 没用用户的当前的最后一次活跃日期记录select name
,max(value) lastactive
from active
groupby name
)as t2
on t1.name = t2.name
and t1.value= t2.lastactive
groupby t1.name
;
张三 2
李四 3--给个中间结果方便理解select name
,active_date
,rank
,diff
,diff-rank asvaluefrom(select name
,active_date
,dense_rank()over(partitionby name orderby active_date)as rank
,datediff(active_date,install_date)as diff
from tmpdb.hzy_0930_test
)tmp
张三 2021-01-0110-1
张三 2021-01-0221-1
张三 2021-01-0332-1
张三 2021-01-0443-1
张三 2021-01-07561
张三 2021-01-09682
张三 2021-01-10792
李四 2021-01-0110-1
李四 2021-01-0221-1
李四 2021-01-0332-1
李四 2021-01-0443-1
李四 2021-01-115105
李四 2021-01-146137
李四 2021-01-157147
李四 2021-01-168157
小需求三:最大连续未登录天数(截止到某一天,这里随便取一个吧’2021-01-20’)
计算用户"最大连续未登录天数",还需考虑用户最后一次活跃日期距某一天的时间间隔,然后再取最大值
方法一
with active as(selectdistinct--防止用户每日活跃数据重复(即用户在某一天登陆两次)
name
,active_date
,rank
-- 如果连续,那么这个date_value应该是一样的,date_sub(active_date,rank)as date_value
-- 需考虑用户最后一次活跃日期距某一天的时间间隔,max(active_date)over(partitionby name)as last_active_date
from(select name
,active_date
-- 相同日期排名一样,下一日期的排名与上一日期连续(不留空位),dense_rank()over(partitionby name orderby active_date)as rank
from tmpdb.hzy_0930_test
)a
)select name
,max(no_active_days)as`最大连续未登录天数`from(select t1.name
,t1.active_date as start_date
,t2.active_date as end_date
,datediff(t2.active_date,t1.active_date)-1as no_active_days --这里需要和方法二区分,方法二不需要减1from active t1
join active t2
on t1.name=t2.name
and t1.rank=t2.rank-1unionall-- 用户最后一次活跃日期距某一天的时间间隔,只需要一天即可selectdistinct
name
,max(active_date)as start_date
,'2021-01-20'as end_date -- 截止到某一天,当然可以取当天 current_date,datediff('2021-01-20',max(active_date))-1as no_active_days -- 这里不需要减1from active
groupby name
)t3
groupby name
;
张三 10
李四 6-- 给个中间结果方便理解with active as(selectdistinct--防止用户每日活跃数据重复(即用户在某一天登陆两次)
name
,active_date
,rank
-- 如果连续,那么这个date_value应该是一样的,date_sub(active_date,rank)as date_value
from(select name
,active_date
-- 相同日期排名一样,下一日期的排名与上一日期连续(不留空位),dense_rank()over(partitionby name orderby active_date)as rank
from tmpdb.hzy_0930_test
)a
)select t1.name
,t1.active_date as start_date
,t2.active_date as end_date
,datediff(t2.active_date,t1.active_date)-1as no_active_days --这里需要和方法二区分,方法二不需要减1from active t1
join active t2
on t1.name=t2.name
and t1.rank=t2.rank-1;
张三 2021-01-012021-01-020
张三 2021-01-022021-01-030
张三 2021-01-032021-01-040
张三 2021-01-042021-01-072
张三 2021-01-072021-01-091
张三 2021-01-092021-01-100
李四 2021-01-012021-01-020
李四 2021-01-022021-01-030
李四 2021-01-032021-01-040
李四 2021-01-042021-01-116
李四 2021-01-112021-01-142
李四 2021-01-142021-01-150
李四 2021-01-152021-01-160
with active as(select name
,active_date
,rank
,diff
,diff-rank asvaluefrom(select name
,active_date
,row_number()over(partitionby name orderby active_date)as rank
,datediff(active_date,install_date)as diff
from tmpdb.hzy_0930_test
)tmp
)select name
,max(noactivedays)as`最大连续未登录天数`from(select t1.name
,t1.active_date as start_date
,t2.active_date as end_date
,t2.value- t1.valueas noactivedays
from active as t1
join active as t2
on t1.name = t2.name
and t1.rank = t2.rank -1unionallselect name
,max(active_date)as start_date
,'2021-01-20'as end_date
,datediff('2021-01-20',max(active_date))as noactivedays
from tmpdb.hzy_0930_test
groupby name
) t
groupby name
orderby name;
张三 10
李四 6
小需求四:用户自从安装app以来的连续未登录天数历史
其实这和需求三类似
不需要group by 出 最大连续未登录天数
方法一
with active as(selectdistinct--防止用户每日活跃数据重复(即用户在某一天登陆两次)
name
,active_date
,rank
-- 如果连续,那么这个date_value应该是一样的,date_sub(active_date,rank)as date_value
from(select name
,active_date
-- 相同日期排名一样,下一日期的排名与上一日期连续(不留空位),dense_rank()over(partitionby name orderby active_date)as rank
from tmpdb.hzy_0930_test
)a
)select name
,start_date
,end_date
,no_active_days as`连续未登录天数`from(select t1.name
,t1.active_date as start_date
,t2.active_date as end_date
,datediff(t2.active_date,t1.active_date)-1as no_active_days --这里需要和方法二区分,方法二不需要减1from active t1
join active t2
on t1.name=t2.name
and t1.rank=t2.rank-1unionallselect name
,max(active_date)as start_date
,'2021-01-20'as end_date -- 截止到某一天,当然可以取当天 current_date,datediff('2021-01-20',max(active_date))as no_active_days
from active
groupby name
)t3
where no_active_days >0;
张三 2021-01-042021-01-072
张三 2021-01-072021-01-091
张三 2021-01-102021-01-2010
李四 2021-01-042021-01-116
李四 2021-01-112021-01-142
李四 2021-01-162021-01-204
方法二
with active as(select name
,active_date
,rank
,diff
,diff-rank asvaluefrom(select name
,active_date
,row_number()over(partitionby name orderby active_date)as rank
,datediff(active_date,install_date)as diff
from tmpdb.hzy_0930_test
)tmp
)select name
,start_date
,end_date
,noactivedays as`连续未登录天数`from(select t1.name
,t1.active_date as start_date
,t2.active_date as end_date
,t2.value- t1.valueas noactivedays
from active as t1
join active as t2
on t1.name = t2.name and t1.rank = t2.rank -1unionallselect name
,max(active_date)as start_date
,'2021-01-20'as end_date
,datediff('2021-01-20',max(active_date))as noactivedays
from tmpdb.hzy_0930_test
groupby name
) t
where noactivedays>0;
张三 2021-01-042021-01-072
张三 2021-01-072021-01-091
张三 2021-01-102021-01-2010
李四 2021-01-042021-01-116
李四 2021-01-112021-01-142
李四 2021-01-162021-01-204