hive 去重 字符串_Hive常用函数 傻瓜学习笔记 附完整示例

创建表

drop table if exists mydatabase.test;

create table mydatabase.test

(id int, name string, timestring string, salary double, bonus double)

row format delimited

fields terminated by ‘\t‘

stored as textfile;

插入数据

方式1

vim test.txt

hadoop fs -mkdir test

hadoop fs -put test.txt /user/myname/test

load data inpath ‘/user/myname/test‘

overwrite into table mydatabase.test;

方式2

insert into mydatabase.test values

(1,‘ J ‘,‘2018-01-08 10:11:32‘,128.54,-45.23),

(2,‘ J ‘,‘2018-02-09 10:51:12‘,128.54,-78.25),

(3,‘ J ‘,‘2018-03-05 11:22:21‘,128.52,null),

(4,‘ J ‘,‘2018-04-08 15:40:51‘,256.23,345.23),

(5,‘ J ‘,‘2018-05-08 10:21:21‘,128.54,267.12),

(6,‘ J ‘,‘2018-06-08 10:00:50‘,256.27,-78.49),

(7,‘Rose‘,‘2018-01-08 10:11:32‘,512.65,-76.44),

(8,‘Rose‘,‘2018-02-09 10:51:12‘,512.54,-45.30),

(9,‘Rose‘,‘2018-03-05 11:22:21‘,512.13,-87.09),

(10,‘Rose‘,‘2018-04-08 15:40:51‘,512.34,19.12),

(11,‘Dickson‘,‘2018-01-08 10:21:21‘,256.87,null),

(12,‘Dickson‘,‘2018-02-08 10:00:50‘,256.52,null),

(13,‘Dickson‘,‘2018-04-08 11:00:00‘,256.12,3.69);

数学函数

四舍五入

select id, round(salary) from mydatabase.test;

四舍五入,小数保留

select id, round(salary, 1) from mydatabase.test;

向下,向上取整

select id, floor(salary), ceil(salary) from mydatabase.test;

随机数(0~1)

select id, salary*(1+rand()*0.1) from mydatabase.test;

指数,对数,取模

select id, pow(e(), salary), log(e(), salary), pmod(id, 3) from mydatabase.test;

绝对值,最大值,最小值

select id, abs(bonus), greatest(salary, bonus), least(salary, bonus) from mydatabase.test;

类型转换函数

select id, cast(salary as int) from mydatabase.test;

日期函数

当前时间

select id, name, unix_timestamp() from mydatabase.test;

时间戳转换为字符串

select id, name, from_unixtime(unix_timestamp(), ‘yyyy-MM-dd hh:mm:ss‘) from mydatabase.test;

字符串转换为时间戳

select id, name, unix_timestamp(timestring) from mydatabase.test;

时间子元素

select id, name, to_date(timestring), year(timestring), month(timestring), day(timestring), hour(timestring), minute(timestring), second(timestring) from mydatabase.test;

条件函数

IF条件

select id, if(bonus > 0, ‘yes‘, ‘no‘) from mydatabase.test;

NULL判断

select id, isnull(bonus) from mydatabase.test;

NULL条件,第二参数为默认值

select id, nvl(bonus, 0) from mydatabase.test;

CASE匹配条件

select id, name,

(case name

when ‘Jack‘ then ‘A‘

when ‘Rose‘ then ‘B‘

else ‘C‘

end)

from mydatabase.test;

CASE搜索条件

select id, salary, bonus,

(case

when salary > 500 then ‘A‘

when salary > 100 and bonus > 0 then ‘B‘

else ‘C‘

end)

from mydatabase.test;

聚合函数

去重

select distinct(name) from mydatabase.test;

计数

select count(*) from mydatabase.test;

条件计数

select count(bonus > 0) from mydatabase.test;

求和,求平均,最大,最小,方差

select name, sum(salary), avg(salary), min(salary), max(salary), variance(salary) from mydatabase.test group by name;

生成列表

select name, collect_list(salary) from mydatabase.test group by name;

字符串函数

长度

select name, length(name) from mydatabase.test;

查找

select name, locate(‘o‘, name) from mydatabase.test;

左填充,右填充

select name, lpad(name, 4, ‘_‘), rpad(name, 4, ‘_‘) from mydatabase.test;

去除左空格,去除右空格,去除左右空格

select name, ltrim(name), rtrim(name), trim(name) from mydatabase.test;

字符距离

select n1, n2, levenshtein(n1, n2) from

(select distinct(name) as n1 from mydatabase.test)db0

join

(select distinct(name) as n2 from mydatabase.test)db1

on n1 != n2;

分割

select name, split(timestring, ‘-‘) from mydatabase.test;

拼接

select name, concat(year(timestring), ‘|‘, month(timestring), ‘|‘, cast(salary as string)) from mydatabase.test;

拼接列表

select name,

concat_ws(‘|‘,

collect_list(cast(salary as string))

) from mydatabase.test group by name;

生成函数

EXPLODE

select id, part from mydatabase.test lateral view explode(split(timestring,‘ ‘)) t as part;

选择函数

IN

select id, name from mydatabase.test where name in(‘Dickson‘, ‘Rose‘);

分组排序函数

ROW_NUMBER

select id, name, salary, row_number() over(partition by name order by salary desc) rank from mydatabase.test;

RANK

select id, name, salary, rank() over(partition by name order by salary desc) rank from mydatabase.test;

DENSE_RANK

select id, name, salary, dense_rank() over(partition by name order by salary desc) rank from mydatabase.test;

分组函数

GROUPING SETS

select month, day, sum(salary) from

(select month(timestring) month, day(timestring) day, salary from mydatabase.test) db

group by month, day grouping sets(month, (month, day)) order by month asc, day asc;

CUBE

select month, day, sum(salary) from

(select month(timestring) month, day(timestring) day, salary from mydatabase.test) db

group by month, day with cube order by month asc, day asc;

ROLLUP

select month, day, sum(salary) from

(select month(timestring) month, day(timestring) day, salary from mydatabase.test) db

group by month, day with rollup order by month asc, day asc;

参考文献:

原文:https://www.cnblogs.com/jhc888007/p/11085012.html

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值