hive---连续登陆

uid login_date
001,2017-02-05 12:00:00
001,2017-02-05 14:00:00
001,2017-02-06 13:00:00
001,2017-02-07 12:00:00
001,2017-02-08 12:00:00
001,2017-02-10 14:00:00
002,2017-02-05 13:00:00
002,2017-02-06 12:00:00
002,2017-02-06 14:00:00
002,2017-02-08 12:00:00
002,2017-02-09 16:00:00
002,2017-02-10 12:00:00
003,2017-01-31 13:00:00
003,2017-01-31 12:00:00
003,2017-02-01 12:00:00
004,2017-02-02 12:00:00
004,2017-02-03 12:00:00
004,2017-02-10 12:00:00
004,2017-03-01 12:00:00


create table  t_login_user(
                              uid string,
                              login_date string
)row format delimited fields terminated by ",";

load data local inpath "/root/login_user.txt" overwrite into table t_login_user;

select * from t_login_user;

计算连续登陆2天的用户

第一种方式

在这里插入图片描述

-- 去掉用户重复登陆的记录
select distinct uid,date_format(login_date,'yyyy-MM-dd') from t_login_user;
-- 在去掉用户重复登录的基础上,对用户分组,对登陆日期排序 计算如果连续登陆 那么下一次登陆的日期
with t1 as ( select distinct uid,date_format(login_date,'yyyy-MM-dd') as login_date from t_login_user )
    select * ,
           date_add(login_date,1) as next_date,
           lead(login_date,1,0) over (partition by uid order by login_date) as next_login
from t1;
-- 查询下一次登陆日期 和 下一行记录相等的用户 就是连续登陆2天的用户
with t1 as ( select distinct uid,date_format(login_date,'yyyy-MM-dd') as login_date from t_login_user ),
     t2 as (select *,
    date_add(login_date,1) as next_date,
    lead(login_date,1,0) over (partition by uid order by login_date) as next_login
	    from t1 )
	select distinct uid from t2 where t2.next_date == t2.next_login;
	
	-- 查询连续3天登陆的用户 
	with t1 as ( select distinct uid,date_format(login_date,'yyyy-MM-dd') as login_date from t_login_user ),
	     t2 as (select *,
	    date_add(login_date,2) as next_date,
	    lead(login_date,2,0) over (partition by uid order by login_date) as next_login
	    from t1 )
	select distinct uid from t2 where t2.next_date == t2.next_login;
	
	-- 查询连续N天登陆的用户 
	select *,
	    --本次登陆日期的第N天
	    date_add(登陆日期,N-1) as next_date,
	    --按照用户id分区,按照登陆日期排序 取对应N-1行的数据
	    lead(登陆日期,N-1,0) over (partition by 用户 order by 登陆日期) as next_login
	    from t
-- 查询连续登陆大于4天的用户
with t1 as (select distinct uid, date_format(login_date, 'yyyy-MM-dd') as login_date from t_login_user),
     t2 as (
         select uid,
                date_format(login_date, 'yyyy-MM-dd')                     as interval_date,
                row_number() over (partition by uid order by login_date ) as rn
         from t1
     ),
     t3 as (
         select *, date_sub(interval_date, rn) as login_date
         from t2
     )
select uid, count(1)
from t3
group by uid, login_date
having count(1) >= 4;

第二种方式
在这里插入图片描述

-- 去掉当天重复登陆信息
select distinct uid, date_format(login_date, 'yyyy-MM-dd') as login_date
from t_login_user;
-- 窗口函数用户分组,登陆日期排序,行号
with t1 as (select distinct uid, date_format(login_date, 'yyyy-MM-dd') as login_date from t_login_user)
select *,
       row_number() over (partition by uid order by login_date) as rn
from t1

-- 登陆日期-编号 = 间隙日期 
with t1 as (select distinct uid, date_format(login_date, 'yyyy-MM-dd') as login_date from t_login_user),
     t2 as (select *,
                   row_number() over (partition by uid order by login_date) as rn
            from t1)
select *,
       date_sub(login_date, rn) as interval_date
from t2;

-- 用户 间隙日期分组 计数 >=2 为连续两天登陆  >=n 为连续n天记录
with t1 as (select distinct uid, date_format(login_date, 'yyyy-MM-dd') as login_date from t_login_user),
     t2 as (select *, row_number() over (partition by uid order by login_date) as rn from t1),
     t3 as (select *, date_sub(login_date, rn) as interval_date from t2)
select uid, count(1) as login_count
from t3
group by uid, interval_date
having count(1) >= 2;

分组topN
查询每个用户最高连续登陆天数
在这里插入图片描述

-- 查询每个用户连续登陆的天数
with t1 as (select distinct uid, date_format(login_date, 'yyyy-MM-dd') as login_date from t_login_user),
     t2 as (select *, row_number() over (partition by uid order by login_date) as rn from t1),
     t3 as (select *, date_sub(login_date, rn) as interval_date from t2)
select uid, count(1) as login_count
from t3
group by uid, interval_date

-- 分组 设置编号
with t1 as (select distinct uid, date_format(login_date, 'yyyy-MM-dd') as login_date from t_login_user),
     t2 as (select *, row_number() over (partition by uid order by login_date) as rn from t1),
     t3 as (select *, date_sub(login_date, rn) as interval_date from t2),
     t4 as (select uid, count(1) as login_count from t3 group by uid, interval_date)
select *, row_number() over (partition by uid order by login_count desc) as rn
from t4;

-- topn
with t1 as (select distinct uid, date_format(login_date, 'yyyy-MM-dd') as login_date from t_login_user),
     t2 as (select *, row_number() over (partition by uid order by login_date) as rn from t1),
     t3 as (select *, date_sub(login_date, rn) as interval_date from t2),
     t4 as (select uid, count(1) as login_count from t3 group by uid, interval_date),
     t5 as (select *, row_number() over (partition by uid order by login_count desc) as rn from t4)
select *
from t5
where rn <= 1;
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值