# Hive 编号函数

row_number ()

rank()

dense_rank()

## 习题练习

1）先建表 导入数据

create table  tb_sub(
name string ,
subject string ,
score double
)
row format delimited fields terminated by "\t" ;
load data local inpath "/data/stu/" into table  tb_sub ;


2） 对每门科目的成绩进行排名

select
* ,
rank()  over(partition by subject  order by score desc) rn
from
tb_sub ;


## 打地鼠案列练习

u01,1,1
u01,2,0
u01,3,1
u01,6,1
u02,5,1
u02,6,0
u02,7,0
u02,1,1
u02,2,1
u03,4,1
u03,5,1
u03,6,0
u02,3,0
u02,4,1
u02,8,1
u01,4,1
u01,5,0
u02,9,1
u03,1,1
u03,2,1
u03,3,1

1） 建表 导入数据

 create table  tb_ds(
uid string ,
seq int  ,
m int
)
row format delimited fields terminated by "," ;
load data local inpath "/data/ds/" into table  tb_ds ;


2） 过滤掉没有命中的记录 , 进行分组 ,排序 , 编号

select
uid ,
seq ,
row_number()  over(partition by uid order by seq) rn
from
tb_ds
where  m = 1 ;


3）求出连续命中两次或两次以上的 uid

select
uid ,
max(cnt) mm     最大的 cnt
from
(select
uid , diff ,
count(1)  cnt    总个数
from
(select
uid ,
seq ,
row_number()  over(partition by uid order by seq) rn ,
(seq - (row_number()  over(partition by uid order by seq))) diff 求出命中的次数
from
tb_ds
where  m = 1)t
group by uid , diff
having cnt >=2)t1    总个数大于等于2的
group by uid  ;  过滤 uid


05-31 3万+
09-07 1万+
05-05 3万+
06-07 7762
10-19 2万+
11-15 7123
09-02 4639
03-27 4910
01-13 4万+
06-26 6207
10-17 1万+
11-20 3万+
02-28 5491
11-17 2497
05-15 2万+
12-02 3131