分组问题
题目
数据
1001 17523641234
1001 17523641256
1002 17523641278
1001 17523641334
1002 17523641434
1001 17523641534
1001 17523641544
1002 17523641634
1001 17523641638
1001 17523641654
建表
create table if not exists team(
id int,
dt bigint
)row format delimited fields terminated by '\t';
加载数据
load data local inpath '/opt/module/data/hive-interviews/team' into table team;
解题步骤
步骤一:将上一行数据下移
sql语句
select id,dt,
lag(dt,1,0) over(partition by id order by dt) lagdt
from team
结果
id dt lagdt
1001 17523641234 0
1001 17523641256 17523641234
1001 17523641334 17523641256
1001 17523641534 17523641334
1001 17523641544 17523641534
1001 17523641638 17523641544
1001 17523641654 17523641638
1002 17523641278 0
1002 17523641434 17523641278
1002 17523641634 17523641434
步骤二:将两行数据相减
sql语句
select id,dt,dt-lagdt dtdiff
from
(
select id,dt,
lag(dt,1,0) over(partition by id order by dt) lagdt
from team
) t1
运行结果
id dt dtdiff
1001 17523641234 17523641234
1001 17523641256 22
1001 17523641334 78
1001 17523641534 200
1001 17523641544 10
1001 17523641638 94
1001 17523641654 16
1002 17523641278 17523641278
1002 17523641434 156
1002 17523641634 200
步骤三:开窗计算每个用户从第一行到当前行lagdt大于60的个数
sql语句
select id,dt,
sum(if(dtdiff>=60,1,0)) over(partition by id order by dt) groupid
from(
select id,dt,dt-lagdt dtdiff
from (
select id,dt,
lag(dt,1,0) over(partition by id order by dt) lagdt
from team
)t1
)t2
结果
id dt groupid
1001 17523641234 1
1001 17523641256 1
1001 17523641334 2
1001 17523641534 3
1001 17523641544 3
1001 17523641638 4
1001 17523641654 4
1002 17523641278 1
1002 17523641434 2
1002 17523641634 3