数据如下:用户ID及登陆日期
user_id | login_date |
1001 | 2024-08-01 |
1001 | 2024-08-03 |
1002 | 2024-08-01 |
1002 | 2024-08-02 |
1002 | 2024-08-03 |
1001 | 2024-08-07 |
1.每天1天内登陆,3天内登陆的用户数
思路:1).取每个用户下一次登陆的日期,用lead开窗
2).当前日期和下一次登陆的日期比差值,
select login_date
,count( distinct if(date_diff(lead_login_date,login_date)<=1,user_id,null)) retail_1day_users
,count( distinct if(date_diff(lead_login_date,login_date)<=3,user_id,null)) retail_3day_users
from
(
select user_id
,login_date
,lead(login,date) over (partition by user_id order by login_date asc) as lead_login_date
from dws_data
)
2 连续留存1,3,7天的用户数
思路:1)留存1天的,升序后,下一个日期是当前日期+1
留存3天的,升序后,下三个日期是当前日期+3
select login_date
,count( distinct if(date_diff(lead1_login_date,login_date)=1,user_id,null)) retail_1day_users
,count( distinct if(date_diff(lead3_login_date,login_date)=3,user_id,null)) retail_3day_users
,count( distinct if(date_diff(lead7_login_date,login_date)<=7,user_id,null)) retail_1day_users
from
(
select user_id
,login_date
,lead(login_date,1,'1970-01-01') over (partition by user_id order by login_date asc) as lead1_login_date
,lead(login_date,3,'1970-01-01') over (partition by user_id order by login_date asc) as lead3_login_date
,lead(login_date,7,'1970-01-01') over (partition by user_id order by login_date asc) as lead7_login_date
from dws_data
)