hive函数

最新推荐文章于 2024-08-16 17:56:46 发布

jamie956

最新推荐文章于 2024-08-16 17:56:46 发布

阅读量66

点赞数

文章标签： hadoop hdfs hive

本文链接：https://blog.csdn.net/qq_25817753/article/details/114857850

版权

#COALESCE
由左到右，返回第一个非空值

select coalesce(1,2,3);
select coalesce(null,2,3);
select coalesce(1,null,3);
select coalesce(null,null,3);
select coalesce(null,null,null);

#EXPLODE
一进多出 属于udtf函数，能将map, array 拆成多行
select explode(array('A','B','C'));
select explode(map('A',10,'B',20,'C',30));

drop table if exists t1;
create table t1(
    id int, 
    name string, 
    likes array<string>, 
    address map<string,string>
);
desc formatted t1;

insert into table t1 values
(1,'tim',array('A','B','C'),map('k1','10','k2','20','k3','30')),
(2,'tom',array('z','A','m'),map('a1','2','a2','3','a3','4'));

select * from t1;
select explode(likes) as likes from t1;
select explode(address) as (k,v) from t1;

#LATERAL VIEW 能将 udtf 结果转换成一个视图subView
select * from t1 
lateral view explode(likes) t2 as subLikes;

select * from t1 
lateral view explode(address) t2 as k,v;

select * from t1 
lateral view json_tuple('{"name":"tim","age":12}', 'name','age') t2 as name, age;

#NVL
如果第一个值为空，就去第二个值
select nvl('1', '2');
select nvl(null, '2');

#时间
select date_format('2019-06-24 12:12:12','yyyyMMdd');
select unix_timestamp();
select from_unixtime(1565858389,'yyyy-MM-dd HH:mm:ss');

#天数计算
select date_add('2019-06-29',5);
select date_add('2019-06-29',-5);
select datediff('2019-06-29','2019-06-24');
select date_sub('2019-06-29',5);

#CASE WHEN
select 
case 
when 'ios' = 'android' then 'android'
when 'ios' = 'ios' then 'iPhone'
else 'PC'
end;

select
case 'other'
when 'android' then 'android'
when 'ios' then 'iPhone'
else 'PC'
end;

drop table if exists t1;
create table t1(
id string,
name string,
code string
);
insert into table t1 values
('1', 'a', '01'),
('2', 'b', '02');
select * from t1;

select 
case id
when '1' then name
when '2' then code
end
from t1;

#CONCAT_WS
聚合函数，多字段合并 （行转列）
concat_ws(',', collect_set(name))
concat_ws(',', collect_list(name))

select concat_ws(',', 'hi', 'world','my','my');
select concat('a', ",", 'b');

#if
select if('hi' is not null, 1, 2);
select if(null is not null, 1, 2);

#截取字符串
select substring('2017-04-13',1,7);

#窗口函数 over() 通常搭配 row_number() ,min(),max(),sum()来使用
drop table if exists t1;
create table t1(
    id string,
    name string,
    num int
);
desc formatted t1;

insert into table t1 values 
(1,'a',1),
(2,'b',3),
(3,'a',5),
(4,'c',9),
(5,'b',3);

select * from t1;

-- over() 每一行的窗口都一样
SELECT id,name,num,
sum(num) over() sum1 
from t1;

id	name	num	sum1
5	b	3	21
4	c	9	21
3	a	5	21
2	b	3	21
1	a	1	21

-- over(order by id) 逐行累计计算
SELECT id,name,num,
sum(num) over(order by id) sum1 
from t1;

id	name	num	sum1
1	a	1	1
2	b	3	4
3	a	5	9
4	c	9	18
5	b	3	21

-- over(partition by name) 分区内计算
SELECT id,name,num,
sum(num) over(partition by name) sum1 
from t1;

id	name	num	sum1
3	a	5	6
1	a	1	6
5	b	3	6
2	b	3	6
4	c	9	9

-- over(partition by name order by id) 分区内逐行计算
SELECT id,name,num,
sum(num) over(partition by name order by id) sum1 
from t1;

id	name	num	sum1
1	a	1	1
3	a	5	6
2	b	3	3
5	b	3	6
4	c	9	9


-- row_number() 行号
SELECT id,name,num,
row_number() over() rownum from t1;

id	name	num	rownum
5	b	3	1
4	c	9	2
3	a	5	3
2	b	3	4
1	a	1	5

SELECT id,name,num,
row_number() over(partition by name) rn 
from t1;

id	name	num	rn
3	a	5	1
1	a	1	2
5	b	3	1
2	b	3	2
4	c	9	1

SELECT id,name,num,
row_number() over(partition by name order by id) rn 
from t1;

id	name	num	rn
1	a	1	1
3	a	5	2
2	b	3	1
5	b	3	2
4	c	9	1

#regexp_replace()
select regexp_replace("12\r3", "\r", "\\r");
select regexp_replace("12\r3", "\r", "");
select regexp_replace("123", "1", "a");
select regexp_replace('12\r3', '\n|\t|\r', '\\r');
select regexp_replace('12\r3', '\r', '<br/>');

jamie956

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
hive函数

#COALESCE由左到右，返回第一个非空值select coalesce(1,2,3);select coalesce(null,2,3);select coalesce(1,null,3);select coalesce(null,null,3);select coalesce(null,null,null);#EXPLODE一进多出属于udtf函数，能将map, array 拆成多行select explode(array('A','B','C'));select expl.
复制链接

扫一扫