Hive常用函数及自定义函数

最新推荐文章于 2022-08-09 10:32:20 发布

转载最新推荐文章于 2022-08-09 10:32:20 发布

· 245 阅读

文章标签：

#大数据

本文介绍了 Hive SQL 中常用的函数及用法，包括系统自带函数、时间函数等，并通过实例展示了如何进行数据处理和分析。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

前言:
    HQL常用函数语法和RDBMS语法相似
官网参考:
    https://cwiki.apache.org/confluence/display/Hive/LanguageManual+Select
    https://cwiki.apache.org/confluence/display/Hive/LanguageManual+UDF

一.Hive自带函数

-- 查看系统自带函数
hive> show functions;
least
length
like
max
min
-- 查看函数使用帮助

hive
> desc function extended upper;
Example:
> SELECT upper('Facebook') FROM src LIMIT 1;
'FACEBOOK'

二.常用的函数语法
举例:

hive> select * from emp;
OK
7369 SMITH CLERK 7902 1980-12-17 800.0 NULL 20
7499 ALLEN SALESMAN 7698 1981-2-20 1600.0 300.0 30
7521 WARD SALESMAN 7698 1981-2-22 1250.0 500.0 30
7566 JONES MANAGER 7839 1981-4-2 2975.0 NULL 20
7654 MARTIN SALESMAN 7698 1981-9-28 1250.0 1400.0 30
7698 BLAKE MANAGER 7839 1981-5-1 2850.0 NULL 30
7782 CLARK MANAGER 7839 1981-6-9 2450.0 NULL 10
7788 SCOTT ANALYST 7566 1987-4-19 3000.0 NULL 20
7839 KING PRESIDENT NULL 1981-11-17 5000.0 NULL 10
7844 TURNER SALESMAN 7698 1981-9-8 1500.0 0.0 30
7876 ADAMS CLERK 7788 1987-5-23 1100.0 NULL 20
7900 JAMES CLERK 7698 1981-12-3 950.0 NULL 30
7902 FORD ANALYST 7566 1981-12-3 3000.0 NULL 20
7934 MILLER CLERK 7782 1982-1-23 1300.0 NULL 10
8888 HIVE PROGRAM 7839 1988-1-23 10300.0 NULL NULL

-- 根据工资降序排序取前三
hive> select * from emp order by salary desc limit 3;
OK
7369 SMITH CLERK 7902 1980-12-17 800.0 NULL 20
7900 JAMES CLERK 7698 1981-12-3 950.0 NULL 30
7876 ADAMS CLERK 7788 1987-5-23 1100.0 NULL 20

-- 求每个部门的最大工资,最小工资,平均工资,所有工资
select deptno, max(salary),min(salary),avg(salary),sum(salary) from ruozedata_emp group by deptno
OK
NULL 10300.0 10300.0 10300.0 10300.0
10 5000.0 1300.0 2916.6666666666665 8750.0
20 3000.0 800.0 2175.0 10875.0
30 2850.0 950.0 1566.6666666666667 9400.0

-- 求每个部门的最大工资,最小工资,平均工资,所有工资中平均工资大于2000
select deptno, max(salary),min(salary),avg(salary),sum(salary) from ruozedata_emp group by deptno
where avg(salary)>3000
OK
NULL 10300.0 10300.0 10300.0 10300.0
20 3000.0 800.0 2175.0 10875.0

-- 求每个部门工资前2的姓名和工资
select t.*
from
(select
deptno,
ename,
salary,
row_number() over(partition by deptno order by salary desc ) as number
from emp)t
where t.number<=2;
OK
NULL HIVE 10300.0 1
10 KING 5000.0 1
10 CLARK 2450.0 2
20 SCOTT 3000.0 1
20 FORD 3000.0 2
30 BLAKE 2850.0 1
30 ALLEN 1600.0 2

-- 统计每个部门的人数
select deptno,count(1) from emp group by deptno;
OK
NULL 1
10 3
20 5
30 6

-- CASE WHEN统计工资情况
select ename,salary,
case
when salary>1 and salary<=1000 then 'lower'
when salary>1000 and salary<=2000 then 'middle'
when salary>2000 and salary<=4000 then 'high'
else 'highest'
end
from emp

OK
SMITH 800.0 lower
ALLEN 1600.0 middle
WARD 1250.0 middle
JONES 2975.0 high
MARTIN 1250.0 middle
BLAKE 2850.0 high

-- 类型转换
hive> select cast('2018-06-17' as date) from dual;
OK
2018-06-17
-- 截取字符串
hive> select substr('abcdef',1,5) from dual;
OK
abcde
-- 拼接
hive> select concat('123','456') from dual;
OK
123456
-- 制定分割拼接
hive> select concat_ws('.','www','baidu','com') from dual;
OK
www.baidu.com

-- 获取长度
hive> select length(concat_ws('.','www','baidu','com')) from dual;
OK
-- split拆分
hive> select split(concat_ws('.','www','baidu','com'),'\\.') from dual;
OK
["www","baidu","com"]
Time taken: 0.084 seconds, Fetched: 1 row(s)

-- 拆分换行

hive> select explode(split(concat_ws('.','www','baidu','com'),'\\.') ) from dual;
OK
www
baidu
com

-- 大写转换小写
hive> select lower('ABC') from dual;
OK
abc
-- 小写转换大写
hive> select upper('abc') from dual;
OK
ABC

-- json导入查询
hive> create table json(json string);
OK
Time taken: 0.059 seconds
-- 加载json文件
hive> load data local inpath'/home/hadoop/data/test.json' into table json;
hive> select * from json;
OK
{"name":"zhangsan","age":28,"addr":"hz"}
{"name":"lisi","age":18,"addr":"bj"}
{"name":"wangwu","age":10,"addr":"sh"}
Time taken: 0.151 seconds, Fetched: 3 row(s)
-- json_tuple查询
hive> select json_tuple(json,'name','age','addr') as (name,age,addr) from json;
OK
zhangsan 28 hz
lisi 18 bj
wangwu 10 sh
Time taken: 0.077 seconds, Fetched: 3 row(s)

三.常用时间函数

-- current_date返回年月日

hive> select current_date from dual;
OK
2018-06-07

-- 返回unix时间搓
hive> select unix_timestamp() from dual;
OK
1528403209

-- current_timestamp返回年月入时分秒
hive> select current_timestamp from dual;
OK
2018-06-07 16:35:12.752
Time taken: 0.217 seconds, Fetched: 1 row(s)

-- 获取年月日
hive> select to_date('2018-06-07 16:44:44') from dual;
OK
2018-06-07

-- 获取年
hive> select year('2018-06-07 16:44:44') from dual;
OK
2018

-- 获取月
hive> select month('2018-06-07 16:44:44') from dual;
OK
6

-- 获取日
hive> select day('2018-06-07 16:44:44') from dual;
OK
7

-- 获取小时
hive> select hour('2018-06-07 16:44:44') from dual;
OK
16

-- 获取分钟
hive> select minute('2018-06-07 16:44:44') from dual;
OK
44

-- 获取秒
hive> select second('2018-06-07 16:44:44') from dual;
OK

-- 时间天相加
hive> select date_add('2018-06-07',10) from dual;
OK
2018-06-17

-- 时间天相减
hive> select date_sub('2018-06-17',10) from dual;
OK
2018-06-07

四.自定义函数

.....

来自 “ ITPUB博客 ” ，链接：http://blog.itpub.net/31441024/viewspace-2155864/，如需转载，请注明出处，否则将追究法律责任。

转载于:http://blog.itpub.net/31441024/viewspace-2155864/