hive rownumber内置函数使用例子
-
Hive-0.11.0中内置row_number函数
org.apache.hadoop.hive.ql.exe.FunctionRegistry registerHiveUDAFsAsWindowFunctions(); registerWindowFunction("row_number", new GenericUDAFRowNumber()); --row_number实现类 registerWindowFunction("rank", new GenericUDAFRank()); registerWindowFunction("dense_rank", new GenericUDAFDenseRank()); registerWindowFunction("percent_rank", new GenericUDAFPercentRank()); registerWindowFunction("cume_dist", new GenericUDAFCumeDist()); registerWindowFunction("ntile", new GenericUDAFNTile()); registerWindowFunction("first_value", new GenericUDAFFirstValue()); registerWindowFunction("last_value", new GenericUDAFLastValue()); registerWindowFunction(LEAD_FUNC_NAME, new GenericUDAFLead(), false); registerWindowFunction(LAG_FUNC_NAME, new GenericUDAFLag(), false);
-
使用的DEMO
- 从test_s表中根据c1字段除重,选取c2最大的值,导入test_c表.
- DEMO
hive>
desc test_s;
OK
col_name data_type comment
c1 int None
c2 string None
Time taken: 1.87 seconds, Fetched: 2 row(s)
hive>
> select * from test_s;
OK
c1 c2
1 str1
2 str2
3 str3
3 str31
3 str33
4 str41
4 str42
Time taken: 0.671 seconds, Fetched: 7 row(s)
hive> select c1,c2,row_number() over (distribute by c1
> sort by c2 desc) rownum
> from test_s ;
OK
c1 c2 rownum
1 str1 1
2 str2 1
3 str33 1
3 str31 2
3 str3 3
4 str42 1
4 str41 2
Time taken: 17.23 seconds, Fetched: 7 row(s)
hive>
>
> desc test_c;
OK
col_name data_type comment
c1 int None
c2 string None
Time taken: 0.118 seconds, Fetched: 2 row(s)
hive>
> insert into table test_c
> select c1,c2
> from (
> select c1,c2,row_number() over (distribute by c1
> sort by c2 desc) rownum
> from test_s
> ) aa
> where aa.rownum = 1;
OK
Time taken: 19.609 seconds
hive>
>
>
>
> select * from test_c;
OK
c1 c2
1 str1
2 str2
3 str33
4 str42
Time taken: 0.144 seconds, Fetched: 4 row(s)