hive 去重字符串_Hive常用函数傻瓜学习笔记附完整示例

最新推荐文章于 2024-01-05 08:00:00 发布

weixin_39778668

最新推荐文章于 2024-01-05 08:00:00 发布

阅读量115

点赞数

文章标签： hive 去重字符串

本文链接：https://blog.csdn.net/weixin_39778668/article/details/111740306

版权

创建表

drop table if exists mydatabase.test;

create table mydatabase.test

(id int, name string, timestring string, salary double, bonus double)

row format delimited

fields terminated by ‘\t‘

stored as textfile;

插入数据

方式1

vim test.txt

hadoop fs -mkdir test

hadoop fs -put test.txt /user/myname/test

load data inpath ‘/user/myname/test‘

overwrite into table mydatabase.test;

方式2

insert into mydatabase.test values

(1,‘ J ‘,‘2018-01-08 10:11:32‘,128.54,-45.23),

(2,‘ J ‘,‘2018-02-09 10:51:12‘,128.54,-78.25),

(3,‘ J ‘,‘2018-03-05 11:22:21‘,128.52,null),

(4,‘ J ‘,‘2018-04-08 15:40:51‘,256.23,345.23),

(5,‘ J ‘,‘2018-05-08 10:21:21‘,128.54,267.12),

(6,‘ J ‘,‘2018-06-08 10:00:50‘,256.27,-78.49),

(7,‘Rose‘,‘2018-01-08 10:11:32‘,512.65,-76.44),

(8,‘Rose‘,‘2018-02-09 10:51:12‘,512.54,-45.30),

(9,‘Rose‘,‘2018-03-05 11:22:21‘,512.13,-87.09),

(10,‘Rose‘,‘2018-04-08 15:40:51‘,512.34,19.12),

(11,‘Dickson‘,‘2018-01-08 10:21:21‘,256.87,null),

(12,‘Dickson‘,‘2018-02-08 10:00:50‘,256.52,null),

(13,‘Dickson‘,‘2018-04-08 11:00:00‘,256.12,3.69);

数学函数

四舍五入

select id, round(salary) from mydatabase.test;

四舍五入，小数保留

select id, round(salary, 1) from mydatabase.test;

向下，向上取整

select id, floor(salary), ceil(salary) from mydatabase.test;

随机数(0~1)

select id, salary*(1+rand()*0.1) from mydatabase.test;

指数，对数，取模

select id, pow(e(), salary), log(e(), salary), pmod(id, 3) from mydatabase.test;

绝对值，最大值，最小值

select id, abs(bonus), greatest(salary, bonus), least(salary, bonus) from mydatabase.test;

类型转换函数

select id, cast(salary as int) from mydatabase.test;

日期函数

当前时间

select id, name, unix_timestamp() from mydatabase.test;

时间戳转换为字符串

select id, name, from_unixtime(unix_timestamp(), ‘yyyy-MM-dd hh:mm:ss‘) from mydatabase.test;

字符串转换为时间戳

select id, name, unix_timestamp(timestring) from mydatabase.test;

时间子元素

select id, name, to_date(timestring), year(timestring), month(timestring), day(timestring), hour(timestring), minute(timestring), second(timestring) from mydatabase.test;

条件函数

IF条件

select id, if(bonus > 0, ‘yes‘, ‘no‘) from mydatabase.test;

NULL判断

select id, isnull(bonus) from mydatabase.test;

NULL条件，第二参数为默认值

select id, nvl(bonus, 0) from mydatabase.test;

CASE匹配条件

select id, name,

(case name

when ‘Jack‘ then ‘A‘

when ‘Rose‘ then ‘B‘

else ‘C‘

end)

from mydatabase.test;

CASE搜索条件

select id, salary, bonus,

(case

when salary > 500 then ‘A‘

when salary > 100 and bonus > 0 then ‘B‘

else ‘C‘

end)

from mydatabase.test;

聚合函数

去重

select distinct(name) from mydatabase.test;

计数

select count(*) from mydatabase.test;

条件计数

select count(bonus > 0) from mydatabase.test;

求和，求平均，最大，最小，方差

select name, sum(salary), avg(salary), min(salary), max(salary), variance(salary) from mydatabase.test group by name;

生成列表

select name, collect_list(salary) from mydatabase.test group by name;

字符串函数

长度

select name, length(name) from mydatabase.test;

查找

select name, locate(‘o‘, name) from mydatabase.test;

左填充，右填充

select name, lpad(name, 4, ‘_‘), rpad(name, 4, ‘_‘) from mydatabase.test;

去除左空格，去除右空格，去除左右空格

select name, ltrim(name), rtrim(name), trim(name) from mydatabase.test;

字符距离

select n1, n2, levenshtein(n1, n2) from

(select distinct(name) as n1 from mydatabase.test)db0

join

(select distinct(name) as n2 from mydatabase.test)db1

on n1 != n2;

分割

select name, split(timestring, ‘-‘) from mydatabase.test;

拼接

select name, concat(year(timestring), ‘|‘, month(timestring), ‘|‘, cast(salary as string)) from mydatabase.test;

拼接列表

select name,

concat_ws(‘|‘,

collect_list(cast(salary as string))

) from mydatabase.test group by name;

生成函数

EXPLODE

select id, part from mydatabase.test lateral view explode(split(timestring,‘ ‘)) t as part;

选择函数

select id, name from mydatabase.test where name in(‘Dickson‘, ‘Rose‘);

分组排序函数

ROW_NUMBER

select id, name, salary, row_number() over(partition by name order by salary desc) rank from mydatabase.test;

RANK

select id, name, salary, rank() over(partition by name order by salary desc) rank from mydatabase.test;

DENSE_RANK

select id, name, salary, dense_rank() over(partition by name order by salary desc) rank from mydatabase.test;

分组函数

GROUPING SETS

select month, day, sum(salary) from

(select month(timestring) month, day(timestring) day, salary from mydatabase.test) db

group by month, day grouping sets(month, (month, day)) order by month asc, day asc;

CUBE

select month, day, sum(salary) from

(select month(timestring) month, day(timestring) day, salary from mydatabase.test) db

group by month, day with cube order by month asc, day asc;

ROLLUP

select month, day, sum(salary) from

(select month(timestring) month, day(timestring) day, salary from mydatabase.test) db

group by month, day with rollup order by month asc, day asc;

参考文献：

原文：https://www.cnblogs.com/jhc888007/p/11085012.html

weixin_39778668

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
hive 去重字符串_Hive常用函数傻瓜学习笔记附完整示例

创建表drop table if exists mydatabase.test;create table mydatabase.test(id int, name string, timestring string, salary double, bonus double)row format delimitedfields terminated by ‘\t‘stored as textfile...
复制链接

扫一扫