Hive中连续多少天登录问题分析函数lead处理

1、需求描述
时段:3月18日~3月24日
人群:时段内注册判定为新增用户(新设备新ID&180天回归)的用户
人群的如下数据:
uid,注册日期,首次发生连续7日不登录的最后登录日期,首次发生7日连续不登录后的首次登录日期
2、分析说明
涉及到连续多少天登录问题,一般最先想到分析函数lead;然后根据计算出来的间隔天数,计算需求中的内容。
分析函数在oracle中用到比较广泛和深入,但在Hive下各方面完善后,表现也不错。
3、具体Sql

drop table if exists xxxxx_liuyl0514_base;
create table xxxxx_liuyl0514_base as
with tab_newidentifier_newuser180 as(
select pt_day,uid
from oss_bi_type_of_retention_user
where remain_type in(1,3) and pt_day between '2018-03-18' and '2018-03-24' and remain_day_num=0
group by pt_day,uid
),
tab_dau as(
select uid,pt_day
from oss_bi_type_of_all_user 
where pt_day>='2018-03-18' and type in (1,2,3,4))
select a1.uid,a1.pt_day reg_date,a2.pt_day login_date
from tab_newidentifier_newuser180 a1
left join tab_dau a2 on a1.uid=a2.uid
;
-------------------------------------------------------------
with tab_uid_login_info as(
select a1.uid,a1.reg_date,a1.login_date,lead(a1.login_date,1)over(partition by a1.uid,a1.reg_date order by a1.login_date asc) login_date_up1,datediff(lead(a1.login_date,1)over(partition by a1.uid,a1.reg_date order by a1.login_date asc),a1.login_date) date_intervals
from xxxxx_liuyl0514_base a1)
select a1.uid,a1.reg_date,a1.login_date,a1.login_date_up1,a1.date_intervals,row_number()over(partition by a1.uid,a1.reg_date order by a1.login_date) rn
from tab_uid_login_info a1
where a1.date_intervals>=7
;
select a1.uid,a1.reg_date
from xxxxx_liuyl0514_base a1
group by a1.uid,a1.reg_date;
-------------------------------------------------------------
drop table if exists xxxxx_liuyl0514_result;
create table xxxxx_liuyl0514_result as
with tab_all_new_user as(
select a1.uid,a1.reg_date
from xxxxx_liuyl0514_base a1
group by a1.uid,a1.reg_date),
tab_uid_login_info_out7 as(
select a1.uid,a1.reg_date,a1.login_date,a1.login_date_up1,a1.date_intervals,row_number()over(partition by a1.uid,a1.reg_date order by a1.login_date) rn
from (select a1.uid,a1.reg_date,a1.login_date,lead(a1.login_date,1)over(partition by a1.uid,a1.reg_date order by a1.login_date asc) login_date_up1,datediff(lead(a1.login_date,1)over(partition by a1.uid,a1.reg_date order by a1.login_date asc),a1.login_date) date_intervals
from xxxxx_liuyl0514_base a1) a1
where a1.date_intervals>=7),
tab_uid_login_info_lastlogin as(
select a1.uid,a1.reg_date,a1.login_date,a1.login_date_up1,a1.date_intervals,row_number()over(partition by a1.uid,a1.reg_date order by a1.login_date) rn
from (select a1.uid,a1.reg_date,a1.login_date,coalesce(lead(a1.login_date,1)over(partition by a1.uid,a1.reg_date order by a1.login_date asc),'xxxxxx') login_date_up1,coalesce(datediff(lead(a1.login_date,1)over(partition by a1.uid,a1.reg_date order by a1.login_date asc),a1.login_date),999) date_intervals
from xxxxx_liuyl0514_base a1) a1
where a1.date_intervals=999 and a1.login_date_up1='xxxxxx')
select a1.uid,a1.reg_date,a2.login_date,a2.login_date_up1,a2.date_intervals,a3.login_date last_login
from tab_all_new_user a1
left join (select uid,reg_date,login_date,login_date_up1,date_intervals from tab_uid_login_info_out7 where rn=1) a2 on a1.uid=a2.uid and a1.reg_date=a2.reg_date
left join tab_uid_login_info_lastlogin a3 on a1.uid=a3.uid and a1.reg_date=a3.reg_date
;
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值