连续命中次数在3次以上的用户
数据:
用户名,打击序次,命中
u01,1,1
u01,2,0
u01,3,1
u01,4,1
u01,5,0
u01,6,1
u02,1,1
u02,2,1
u02,3,0
u02,4,1
u02,5,1
u02,6,0
u02,7,0
u02,8,1
u02,9,1
u03,1,1
u03,2,1
u03,3,1
u03,4,1
u03,5,1
u03,6,0
建表 , 导入数据:
create table tb_game(
name string ,
times int ,
hit int
)
row format delimited fields terminated by "," ;
load data local inpath "/hive/data/game.txt" into table tb_game ;
查询结果:
+---------------+----------------+--------------+
| tb_game.name | tb_game.times | tb_game.hit |
+---------------+----------------+--------------+
| u01 | 1 | 1 |
| u01 | 2 | 0 |
| u01 | 3 | 1 |
| u01 | 4 | 1 |
| u01 | 5 | 0 |
| u01 | 6 | 1 |
| u02 | 1 | 1 |
| u02 | 2 | 1 |
| u02 | 3 | 0 |
| u02 | 4 | 1 |
| u02 | 5 | 1 |
| u02 | 6 | 0 |
| u02 | 7 | 0 |
| u02 | 8 | 1 |
| u02 | 9 | 1 |
| u03 | 1 | 1 |
| u03 | 2 | 1 |
| u03 | 3 | 1 |
| u03 | 4 | 1 |
| u03 | 5 | 1 |
| u03 | 6 | 0 |
+---------------+----------------+--------------+
1)过滤出命中的数据
select
*
from
tb_game
where hit = 1;
查询结果:
+---------------+----------------+--------------+
| tb_game.name | tb_game.times | tb_game.hit |
+---------------+----------------+--------------+
| u01 | 1 | 1 |
| u01 | 3 | 1 |
| u01 | 4 | 1 |
| u01 | 6 | 1 |
| u02 | 1 | 1 |
| u02 | 2 | 1 |
| u02 | 4 | 1 |
| u02 | 5 | 1 |
| u02 | 8 | 1 |
| u02 | 9 | 1 |
| u03 | 1 | 1 |
| u03 | 2 | 1 |
| u03 | 3 | 1 |
| u03 | 4 | 1 |
| u03 | 5 | 1 |
+---------------+----------------+--------------+
2)用户名分组 按照编号排序
select
name ,
times ,
hit ,
row_number() over(partition by name order by times) n
from
(select
*
from
tb_game
where hit = 1) t ;
结果:
+-------+--------+------+----+
| name | times | hit | n |
+-------+--------+------+----+
| u01 | 1 | 1 | 1 |
| u01 | 3 | 1 | 2 |
| u01 | 4 | 1 | 3 |
| u01 | 6 | 1 | 4 |
| u02 | 1 | 1 | 1 |
| u02 | 2 | 1 | 2 |
| u02 | 4 | 1 | 3 |
| u02 | 5 | 1 | 4 |
| u02 | 8 | 1 | 5 |
| u02 | 9 | 1 | 6 |
| u03 | 1 | 1 | 1 |
| u03 | 2 | 1 | 2 |
| u03 | 3 | 1 | 3 |
| u03 | 4 | 1 | 4 |
| u03 | 5 | 1 | 5 |
+-------+--------+------+----+
3)序次和编号求差值
select
* ,
(times - n) as tn
from
(select
name ,
times ,
hit ,
row_number() over(partition by name order by times) n
from
(select
*
from
tb_game
where hit = 1) t) t2 ;
结果:
+----------+-----------+---------+-------+-----+
| t2.name | t2.times | t2.hit | t2.n | tn |
+----------+-----------+---------+-------+-----+
| u01 | 1 | 1 | 1 | 0 |
| u01 | 3 | 1 | 2 | 1 |
| u01 | 4 | 1 | 3 | 1 |
| u01 | 6 | 1 | 4 | 2 |
| u02 | 1 | 1 | 1 | 0 |
| u02 | 2 | 1 | 2 | 0 |
| u02 | 4 | 1 | 3 | 1 |
| u02 | 5 | 1 | 4 | 1 |
| u02 | 8 | 1 | 5 | 3 |
| u02 | 9 | 1 | 6 | 3 |
| u03 | 1 | 1 | 1 | 0 |
| u03 | 2 | 1 | 2 | 0 |
| u03 | 3 | 1 | 3 | 0 |
| u03 | 4 | 1 | 4 | 0 |
| u03 | 5 | 1 | 5 | 0 |
+----------+-----------+---------+-------+-----+
4)对用户名和上述差值分组 聚合 并过滤出连续击中3次及以上用户
select
name ,
tn ,
count(*) counts
from
(select
* ,
(times - n) as tn
from
(select
name ,
times ,
hit ,
row_number() over(partition by name order by times) n
from
(select
*
from
tb_game
where hit = 1) t) t2) t3
group by name , tn
having counts >= 3;
结果:
+-------+-----+---------+
| name | tn | counts |
+-------+-----+---------+
| u03 | 0 | 5 |
+-------+-----+---------+
5)进行去重(如果有重复)
select
distinct name
from
(select
name ,
tn ,
count(*) counts
from
(select
* ,
(times - n) as tn
from
(select
name ,
times ,
hit ,
row_number() over(partition by name order by times) n
from
(select
*
from
tb_game
where hit = 1) t) t2) t3
group by name , tn
having counts >= 3) t4;
结果:
+-------+
| name |
+-------+
| u03 |
+-------+