前言:
HQL常用函数语法和RDBMS语法相似
官网参考:
https://cwiki.apache.org/confluence/display/Hive/LanguageManual+Select
https://cwiki.apache.org/confluence/display/Hive/LanguageManual+UDF
一.Hive自带函数
hive
> desc function extended upper;
Example:
> SELECT upper('Facebook') FROM src LIMIT 1;
'FACEBOOK'
二.常用的函数语法
举例:
三.常用时间函数
四.自定义函数
.....
HQL常用函数语法和RDBMS语法相似
官网参考:
https://cwiki.apache.org/confluence/display/Hive/LanguageManual+Select
https://cwiki.apache.org/confluence/display/Hive/LanguageManual+UDF
一.Hive自带函数
- -- 查看系统自带函数
hive> show functions; - least
- length
- like
- max
- min
- -- 查看函数使用帮助
hive
> desc function extended upper;
Example:
> SELECT upper('Facebook') FROM src LIMIT 1;
'FACEBOOK'
二.常用的函数语法
举例:
- hive> select * from emp;
- OK
- 7369 SMITH CLERK 7902 1980-12-17 800.0 NULL 20
- 7499 ALLEN SALESMAN 7698 1981-2-20 1600.0 300.0 30
- 7521 WARD SALESMAN 7698 1981-2-22 1250.0 500.0 30
- 7566 JONES MANAGER 7839 1981-4-2 2975.0 NULL 20
- 7654 MARTIN SALESMAN 7698 1981-9-28 1250.0 1400.0 30
- 7698 BLAKE MANAGER 7839 1981-5-1 2850.0 NULL 30
- 7782 CLARK MANAGER 7839 1981-6-9 2450.0 NULL 10
- 7788 SCOTT ANALYST 7566 1987-4-19 3000.0 NULL 20
- 7839 KING PRESIDENT NULL 1981-11-17 5000.0 NULL 10
- 7844 TURNER SALESMAN 7698 1981-9-8 1500.0 0.0 30
- 7876 ADAMS CLERK 7788 1987-5-23 1100.0 NULL 20
- 7900 JAMES CLERK 7698 1981-12-3 950.0 NULL 30
- 7902 FORD ANALYST 7566 1981-12-3 3000.0 NULL 20
- 7934 MILLER CLERK 7782 1982-1-23 1300.0 NULL 10
- 8888 HIVE PROGRAM 7839 1988-1-23 10300.0 NULL NULL
- -- 根据工资降序排序 取前三
- hive> select * from emp order by salary desc limit 3;
- OK
- 7369 SMITH CLERK 7902 1980-12-17 800.0 NULL 20
- 7900 JAMES CLERK 7698 1981-12-3 950.0 NULL 30
- 7876 ADAMS CLERK 7788 1987-5-23 1100.0 NULL 20
- -- 求每个部门的最大工资,最小工资,平均工资,所有工资
- select deptno, max(salary),min(salary),avg(salary),sum(salary) from ruozedata_emp group by deptno
- OK
NULL 10300.0 10300.0 10300.0 10300.0
10 5000.0 1300.0 2916.6666666666665 8750.0
20 3000.0 800.0 2175.0 10875.0
30 2850.0 950.0 1566.6666666666667 9400.0
- -- 求每个部门的最大工资,最小工资,平均工资,所有工资中平均工资大于2000
- select deptno, max(salary),min(salary),avg(salary),sum(salary) from ruozedata_emp group by deptno
- where avg(salary)>3000
- OK
- NULL 10300.0 10300.0 10300.0 10300.0
- 20 3000.0 800.0 2175.0 10875.0
- -- 求每个部门工资前2的姓名和工资
- select t.*
- from
- (select
- deptno,
- ename,
- salary,
- row_number() over(partition by deptno order by salary desc ) as number
- from emp)t
- where t.number<=2;
- OK
- NULL HIVE 10300.0 1
- 10 KING 5000.0 1
- 10 CLARK 2450.0 2
- 20 SCOTT 3000.0 1
- 20 FORD 3000.0 2
- 30 BLAKE 2850.0 1
- 30 ALLEN 1600.0 2
- -- 统计每个部门的人数
- select deptno,count(1) from emp group by deptno;
- OK
- NULL 1
- 10 3
- 20 5
- 30 6
- -- CASE WHEN统计工资情况
- select ename,salary,
- case
- when salary>1 and salary<=1000 then 'lower'
- when salary>1000 and salary<=2000 then 'middle'
- when salary>2000 and salary<=4000 then 'high'
- else 'highest'
- end
- from emp
OK
SMITH 800.0 lower
ALLEN 1600.0 middle
WARD 1250.0 middle
JONES 2975.0 high
MARTIN 1250.0 middle
BLAKE 2850.0 high
SMITH 800.0 lower
ALLEN 1600.0 middle
WARD 1250.0 middle
JONES 2975.0 high
MARTIN 1250.0 middle
BLAKE 2850.0 high
- -- 类型转换
- hive> select cast('2018-06-17' as date) from dual;
OK
2018-06-17 -
- -- 截取字符串
- hive> select substr('abcdef',1,5) from dual;
OK
abcde -
- -- 拼接
- hive> select concat('123','456') from dual;
OK
123456 - -- 制定分割拼接
- hive> select concat_ws('.','www','baidu','com') from dual;
OK
www.baidu.com
- -- 获取长度
- hive> select length(concat_ws('.','www','baidu','com')) from dual;
OK
- -- split拆分
- hive> select split(concat_ws('.','www','baidu','com'),'\\.') from dual;
OK
["www","baidu","com"]
Time taken: 0.084 seconds, Fetched: 1 row(s)
- -- 拆分换行
hive> select
explode(split(concat_ws('.','www','baidu','com'),'\\.') ) from dual;
OK
www
baidu
com
OK
www
baidu
com
- -- 大写转换小写
hive> select lower('ABC') from dual;
OK
abc -
- -- 小写转换大写
hive> select upper('abc') from dual;
OK
ABC
- -- json导入查询
- hive> create table json(json string);
- OK
- Time taken: 0.059 seconds
- -- 加载json文件
- hive> load data local inpath'/home/hadoop/data/test.json' into table json;
- hive> select * from json;
- OK
- {"name":"zhangsan","age":28,"addr":"hz"}
- {"name":"lisi","age":18,"addr":"bj"}
- {"name":"wangwu","age":10,"addr":"sh"}
- Time taken: 0.151 seconds, Fetched: 3 row(s)
- -- json_tuple查询
- hive> select json_tuple(json,'name','age','addr') as (name,age,addr) from json;
OK
zhangsan 28 hz
lisi 18 bj
wangwu 10 sh
Time taken: 0.077 seconds, Fetched: 3 row(s)
三.常用时间函数
- -- current_date返回年 月 日
hive> select current_date from dual;
OK
2018-06-07
-- 返回unix时间搓
hive> select unix_timestamp() from dual;
OK
1528403209
-- current_timestamp返回年月入时分秒
hive> select current_timestamp from dual;
OK
2018-06-07 16:35:12.752
Time taken: 0.217 seconds, Fetched: 1 row(s)
-- 获取年月日
hive> select to_date('2018-06-07 16:44:44') from dual;
OK
2018-06-07
-- 获取年
hive> select year('2018-06-07 16:44:44') from dual;
OK
2018
-- 获取月
hive> select month('2018-06-07 16:44:44') from dual;
OK
6
-- 获取日
hive> select day('2018-06-07 16:44:44') from dual;
OK
7
-- 获取小时
hive> select hour('2018-06-07 16:44:44') from dual;
OK
16
-- 获取分钟
hive> select minute('2018-06-07 16:44:44') from dual;
OK
44
-- 获取秒
hive> select second('2018-06-07 16:44:44') from dual;
OK
-- 时间天相加
hive> select date_add('2018-06-07',10) from dual;
OK
2018-06-17
-- 时间天相减
hive> select date_sub('2018-06-17',10) from dual;
OK
2018-06-07
OK
2018-06-07
-- 返回unix时间搓
hive> select unix_timestamp() from dual;
OK
1528403209
-- current_timestamp返回年月入时分秒
hive> select current_timestamp from dual;
OK
2018-06-07 16:35:12.752
Time taken: 0.217 seconds, Fetched: 1 row(s)
-- 获取年月日
hive> select to_date('2018-06-07 16:44:44') from dual;
OK
2018-06-07
-- 获取年
hive> select year('2018-06-07 16:44:44') from dual;
OK
2018
-- 获取月
hive> select month('2018-06-07 16:44:44') from dual;
OK
6
-- 获取日
hive> select day('2018-06-07 16:44:44') from dual;
OK
7
-- 获取小时
hive> select hour('2018-06-07 16:44:44') from dual;
OK
16
-- 获取分钟
hive> select minute('2018-06-07 16:44:44') from dual;
OK
44
-- 获取秒
hive> select second('2018-06-07 16:44:44') from dual;
OK
-- 时间天相加
hive> select date_add('2018-06-07',10) from dual;
OK
2018-06-17
-- 时间天相减
hive> select date_sub('2018-06-17',10) from dual;
OK
2018-06-07
.....
来自 “ ITPUB博客 ” ,链接:http://blog.itpub.net/31441024/viewspace-2155864/,如需转载,请注明出处,否则将追究法律责任。
转载于:http://blog.itpub.net/31441024/viewspace-2155864/