Hive函数(单行,聚合,炸裂,窗口)

Hive函数(单行,聚合,炸裂,窗口)

单行函数

1. 数学函数

    val a = 5
    val b = 2
    //加 +
    println(a + b) // 7
    //减 -
    println(a - b) // 3
    //乘 *
    println(a * b) // 10
    //除 /
    println(a / b) // 2
    //取余 %
    println(a % b) // 1
    //按位取与 &
    println(a & b) // 0
    //按位取或 |
    println(a | b) // 7
    //按位取异或 ^
    println(a ^ b) // 7
    //按位取反 ~
    println(~a) // -6

2. 日期函数

# 返回指定时间的时间戳
select unix_timestamp('2022-01-01 00:00:00') ; # 1640966400
# 将时间戳转换成时间
select from_unixtime(1640966400); # 2022-01-01 00:00:00
# 返回日期中的年
select year('2022-01-01 00:00:00'); # 2022
# 返回日期中的月
select month('2022-01-01 00:00:00'); # 1
# 返回日期中的日
select day('2022-01-01 00:00:00') ; # 1
# 计算日期差
select datediff('2022-02-01','2022-01-02') ; # 30
# 日期加上天数
select date_add('2022-01-01',2) ; # 2022-01-03
# 日期减去天数
select date_sub('2022-01-01',2) ; # 2021-12-30
# 将日期转换为指定日期格式
select date_format('2022-01-01 00:00:00','yyyy-MM-dd') ; # 2022-01-01

3. 字符串函数

# 转大写
select upper('abcabc') ; # ABCABC
# 转小写
select lower('ABCABC') ; # abcabc
# 获取字符串的长度
select length('abcabc') ; # 6
# 去除字符串两头空格
select trim(' abcabc ') ; # abcabc
# 截取字符串
select substring('abcabc',2,4) ; # bcab
# 字符串替换
select replace('abcabc','b','p') ; # apcapc
# 字符串切割
select split('abcabc','b') ; # [a, ca, c]
# 字符串拼接
select concat_ws('-','abc','abc') ; # abc-abc
# json字符串,获取值
select get_json_object('{"id1":1001,"id2":1002}','$.id1') ; #/ 1001

聚合函数

1.普通聚合函数

idnameage
1ly15
2yu13
3ly17
# 求行数, NULL不计算
select count(*) from t; # 3
# 求和
select sum(age) from t; # 45
# 求最大值
select max(age) from t; # 17
# 求最小值
select min(age) from t; # 13
# 求平均值
select avg(age) from t; # 15

2. 高级聚合

# collect_list 收集形成list集合,结果不去重
select collect_list(name) from t;
# ["ly","wu","ly"]

# collect_set 收集形成set集合,结果去重
select collect_set(name) from t;
# ["ly","wu"]

炸裂函数

1. explode

# 将字符串展开
SELECT 'lyf' as name,explode(split("a,d,c,f,g,j", ",")) as col;
+----+---+
|name|col|
+----+---+
|lyf |a  |
|lyf |d  |
|lyf |c  |
|lyf |f  |
|lyf |g  |
|lyf |j  |
+----+---+

2. json_tuple 取出json字符串中的属性值

select json_tuple('{"name":"lyf","sex":"女","age":"22"}','name','sex','age');
# lyf 女 22

3. lateral view 侧写

SELECT 'lyf' as name , col2
LATERAL VIEW explode(split("a,d,c,f", ",")) as col2 ;
+----+----+
|name|col2|
+----+----+
|lyf |a   |
|lyf |d   |
|lyf |c   |
|lyf |f   |
+----+----+
SELECT 'lyf' as name ,explode(split("a,b,c", ",")) as col1, col2
LATERAL VIEW explode(split("a,b,c", ",")) as col2 ;
+----+----+----+
|name|col1|col2|
+----+----+----+
|lyf |a   |a   |
|lyf |b   |a   |
|lyf |c   |a   |
|lyf |a   |b   |
|lyf |b   |b   |
|lyf |c   |b   |
|lyf |a   |c   |
|lyf |b   |c   |
|lyf |c   |c   |
+----+----+----+

窗口函数

基本语法:函数 + over( [partition by …] [order by …] [窗口子句] )

1. 聚合类

# sum,avg,count,max,min
sum(COL1) OVER(PARTITION BY COL1 ORDER BY COL2 )
avg(COL1) OVER(PARTITION BY COL1 ORDER BY COL2 )
count(COL1) OVER(PARTITION BY COL1 ORDER BY COL2 )

2. 排序类

#样例数据: 10 20 20 30
# 疏松排序
RANK ( ) OVER ( PARTITION BY COL1 ORDER BY COL2 ) 		--1 2 2 4
# 密集排序
dense_RANK ( ) OVER (PARTITION BY COL1 ORDER BY COL2 ) 	--1 2 2 3
# 递推排序
ROW_NUMBER() OVER (PARTITION BY COL1 ORDER BY COL2)		--1 2 3 4

3. 窗口类

# 上升
lead(type_id,1,0) over (PARTITION BY uuid,p_id ORDER BY times) as type_id_lead 
# 下降
lag(type_id,1,0) over (PARTITION BY uuid,p_id ORDER BY times) as type_id_lead  
# type_id: 需要变动的列, 1: 变动几行, 0: (最后一个/第一个)的默认值
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值