Hive：内置函数

最新推荐文章于 2024-05-21 12:51:11 发布

阿济1907

最新推荐文章于 2024-05-21 12:51:11 发布

阅读量415

点赞数

分类专栏： Hive 文章标签： hive

本文链接：https://blog.csdn.net/qq_43697577/article/details/107166483

版权

Hive 专栏收录该内容

4 篇文章 0 订阅

订阅专栏

Hive内置函数

内置函数：
自定义函数：
UDF : 用户自定义函数（重点）
UDAF : 用户自定义聚合函数
UDTF : explode

帮助命令

show functions  			查看hive中所有的内置函数
desc function func_name 	查看具体的函数的使用方式

1. 数值函数

--1 round(x[,d]):将一个小数四舍五入(x为数字，d为取几位小数)
hive (hive)> select round(3.55);
OK
4.0
hive (hive)> select round(3.55,1);
OK
3.6
hive (hive)> select round(3.45,1);
OK
3.5
hive (hive)> select round(3.45);
OK
3.0

--2 floor(x):向下取整(x为数字)
hive (hive)> select floor(3.99);
OK
3
hive (hive)> select floor(-3.99);
OK
-4

--3 ceil(x):向上取整(x为数字)
hive (hive)> select ceil(3.2);
OK
4
hive (hive)> select ceil(-3.2);
OK
-3

--4 rand([seed]):产生一个0~1之间的随机小数:如果seed不变，输出的随机数是相同的。
hive (hive)> select rand();
OK
0.7494681889083568
hive (hive)> select rand(10);
OK
0.7304302967434272

2. 运算函数

--1 abs(x):求x的绝对值
hive (hive)> select abs(-2);
OK
2
hive (hive)> select abs(2);
OK
2

--2 pow(x1,x2):求x1的x2次幂
hive (hive)> select pow(2,3);
OK
8.0
hive (hive)> select pow(9,1/2);
OK
3.0

3. 条件函数

--1 if(boolean,t1,t2):若布尔值成立，则返回t1,反正返回t2。
IF(expr1,expr2,expr3) - If expr1 is TRUE (expr1 <> 0 and expr1 <> NULL) then IF() returns expr2; otherwise it returns expr3. IF() returns a numeric or string value, depending on the context in which it is used.

hive> select if(1=1,"basketball", "football");
OK
basketball;

--2 case when boolean then t1 else t2 end:若布尔值成立，则t1,否则t2,可加多重判断
CASE a WHEN b THEN c [WHEN d THEN e]* [ELSE f] END - When a = b, returns c; when a = d, return e; else return f

hive> select case 1 when 1 then "abc" else "def" end;
OK
abc

--3 coalesce(v0,v1,v2):返回参数中的第一个非空值,若所有值均为null,则返回null。
coalesce(a1, a2, ...) - Returns the first non-null argument

hive (hive)> select coalesce(null,1,3);
OK
1
hive (hive)> select coalesce(null,null,null,31,3);
OK
31

--4 isnull(a):若a为null则返回true，否则返回false
isnull a - Returns true if a is NULL and false otherwise

hive (hive)> select isnull(1);
OK
false
hive (hive)> select isnull(null);
OK
true

4. 日期函数

--1. current_date():获取当前日期
hive (hive)> select current_date;
OK
2020-07-03

--2. current_timestamp():获取当前时间
hive (hive)> select current_timestamp;
OK
2020-07-03 13:35:54.614

--3. add_months(start_date, num_months):对指定的日期添加num_months个月
add_months(start_date, num_months) - Returns the date that is num_months after start_date.

hive (hive)> select add_months('2020-07-03',3);
OK
2020-10-03

--4. date_add(start_date, num_days):对指定的日期添加num_days天
date_add(start_date, num_days) - Returns the date that is num_days after start_date.

hive (hive)> select date_add('2020-07-03',12)；
OK
2020-07-15

--5. date_sub(start_date, num_days):对指定的日期减少num_days天
date_sub(start_date, num_days) - Returns the date that is num_days before start_date.

hive (hive)> select date_sub('2020-07-03',12);
OK
2020-06-21

--6. next_day(start_date, day_of_week):对指定的日期计算距离下个星期几的日期
next_day(start_date, day_of_week) - Returns the first date which is later than start_date and named as indicated.

hive> select next_day(current_date(), 'SATURDAY');
OK
2020-07-04

--7. dayofmonth(date):该日期是一个月中的第几天
dayofmonth(param) - Returns the day of the month of date/timestamp, or day component of interval

hive (hive)> select dayofmonth('2020-07-03');
OK
3

--8. weekofyear(date):该日期是一年中的第几周
weekofyear(date) - Returns the week of the year of the given date. A week is considered to start on a Monday and week 1 is the first week with >3 days.

hive (hive)> select weekofyear('2020-07-03');
OK
27

--9. minute/hour/day/month/year:求该时间的分钟/小时/天/月/年
hive (hive)> select minute(current_timestamp),hour(current_timestamp),day(current_timestamp),month(current_timestamp),year(current_timestamp);
OK
16      13      4       7       2020

--10. date_format(date/timestamp/string, fmt):以指定的格式显示我们的日期
hive (hive)> select date_format('2020-07-03','MM-dd');
OK
07-03
hive (hive)> select date_format('2020-07-03 13:40:28','MM-dd mm:ss');
OK
07-03 40:28

--11. datediff(date1, date2):求两个日期的时间差
datediff(date1, date2) - Returns the number of days between date1 and date2

hive (hive)> select datediff('2020-07-03','2020-10-01');
OK
-90

--12. to_unix_timestamp(date[, pattern]):求该日期的时间戳
to_unix_timestamp(date[, pattern]) - Returns the UNIX timestamp

hive (hive)> select to_unix_timestamp('2020-07-03 13:19:28');
OK
1593753568

--13. from_unixtime(unix_time, format):将unix时间戳转换为一个日期格式
from_unixtime(unix_time, format) - returns unix_time in the specified format

hive (hive)> select from_unixtime(1593753568,'yyyy-MM-dd hh-mm-ss');
OK
2020-07-03 01-19-28

--14. to_date(datetime):求该时间的日期
to_date(expr) - Extracts the date part of the date or datetime expression expr

hive (hive)> select to_date('2020-07-03 13:29:28');
OK
2020-07-03

5. 排名函数（会使用到窗口函数）

5.1 hive中的三种排名函数

row_number():没有并列，相同名次顺序排列
rank():有并列，相同名次采取空位
dense_rank():有并列，相同名次不空位
三种排名函数使用的方法一致。

5.2 举例

1）数据

01 gp1802 84
02 gp1801 84
03 gp1802 84
04 gp1802 84
05 gp1801 81
06 gp1802 81
07 gp1802 81
08 gp1801 81
09 gp1802 81
10 gp1802 81
11 gp1803 81
12 gp1802 89
13 gp1802 89
14 gp1802 89
15 gp1803 89
16 gp1802 91
17 gp1802 97
18 gp1802 72
19 gp1804 73
20 gp1802 77
21 gp1802 71
22 gp1802 61
23 gp1803 65
24 gp1804 67
25 gp1804 62
26 gp1804 61
27 gp1802 91
28 gp1801 93
29 gp1802 91
30 gp1804 92
31 gp1803 41
32 gp1802 41
33 gp1802 42

2）建表

create table if not exists t_class(
sid int,
sclass string,
score int
)
row format delimited
fields terminated by ' ';

load data local inpath '/root/class.txt' into table t_class;

3) 使用

--按班级分组并按分数排序
select
sclass,sid,score,
row_number() over(distribute by(sclass) sort by (score desc)) rank
from
t_class
;
gp1801  28      93      1
gp1801  2       84      2
gp1801  5       81      3
gp1801  8       81      4
gp1802  17      97      1
gp1802  29      91      2
gp1802  27      91      3
gp1802  16      91      4
gp1802  12      89      5
gp1802  13      89      6
gp1802  14      89      7
gp1802  1       84      8
gp1802  4       84      9
gp1802  3       84      10
gp1802  10      81      11
gp1802  6       81      12
gp1802  7       81      13
gp1802  9       81      14
gp1802  20      77      15
gp1802  18      72      16
gp1802  21      71      17
gp1802  22      61      18
gp1802  33      42      19
gp1802  32      41      20
gp1803  15      89      1
gp1803  11      81      2
gp1803  23      65      3
gp1803  31      41      4
gp1804  30      92      1
gp1804  19      73      2
gp1804  24      67      3
gp1804  25      62      4
gp1804  26      61      5

--按班级分组并按分数排序,求其中的第2-4名
select
rank.sclass,
rank.sid,
rank.score,
rank.r
from
(select
sclass,sid,score,
row_number() over(distribute by sclass sort by score desc) r
from
t_class) rank
where rank.r >=2 and rank.r <=4;

6. 字符串函数

--1. instr:获取第一次出现的substr的字符串在str中的位置索引（从1开始）
instr(str, substr) - Returns the index of the first occurance of substr in str

select instr('lixi','l');

--2. length:获取字符串的长度
select length('lixi');

--3. substr/substring:截取字符串，从4这个位置开始，长度截取5个字符
select substr('lixirocklee',4,5);

--4. concat: 将n个字符串进行拼接
select concat('www','baidu','com')

--5. concat-ws:将n个字符串进行拼接，以指定的分隔符
select concat_ws(".","www","baidu","com");

7. 特殊函数

--1. array(n0,n1...) -返回一个数组
select array(1,2,3,4,5);
[1,2,3,4,5]

--2. collecct_set(x) - 返回一个元素不重复的set集合
select collect_set(age) from teacher;
[0,1,2,3]

--3. collect_list(x) - 返回一个元素可重复的list集合
select collect_list(age) from teacher;
[0,0,0,1,2,3]

--4. explode(array) - 将一个数组转换为多行
select explode(array(1,2,3,4,5));
1
2
3
4
5

--5. cast(type1 as type2) -将数据类型type1转换为数据类型type2
select length(cast(1.0 as int));

---统计单词(统计每个单词出现的个数相当于wordcount)
select
count(1)
from
(select explode (array("iny","qwe","hive","java","单词")) tmp)
word
group by word.tmp;

阿济1907

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫

专栏目录