hive常用函数

1.1、partitioned by (fiels string…)

CREATE TABLE dept_partition(
 deptno int,
 dname string,
 loc string
)
PARTITIONED BY (day string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOAD DATA LOCAL INPATH '/home/hadoop/data/dept.txt' OVERWRITE INTO TABLE dept_partition PARTITION (day=20201225)

1.2、上传文件文件至指定分区,然后MSCK进行分区修复-(慎用,全部分区进行重新扫描)

[hadoop@node01 data]$ hadoop fs -mkdir  /user/hive/warehouse/test.db/dept_partition/day=20201226
[hadoop@node01 data]$ hadoop fs -put dept.txt /user/hive/warehouse/test.db/dept_partition/day=20201226
MSCK REPAIR TABLE dept_partition;

1.3、删除、添加分区

ALTER TABLE dept_partition DROP PARTITION(day='20201226'),PARTITION(day='20201225');
[hadoop@node01 data]$ hadoop fs -mkdir  /user/hive/warehouse/test.db/dept_partition/day=20201226
[hadoop@node01 data]$ hadoop fs -put dept.txt /user/hive/warehouse/test.db/dept_partition/day=20201226
ALTER TABLE dept_partition ADD  IF NOT EXISTS PARTITION (day=20201226) 
insert into table dept_partition partition(day='20201226')
select * from dept;

1.4、多级分区

CREATE TABLE dept_partition_d_h(
  deptno int,
  dname string,
  loc string
)
PARTITIONED BY (day string,hour string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';

LOAD DATA LOCAL INPATH '/home/hadoop/data/dept.txt' overwrite INTO table dept_partition_d_h partition(day='20201111',hour='06')

1.5、动态分区

CREATE TABLE emp_partition(
  `empno` int, 
  `ename` string, 
  `job` string, 
  `mgr` int, 
  `hiredate` string, 
  `sal` double, 
  `comm` double)
 partitioned by (deptno int)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';

set hive.exec.dynamic.partition.mode=nonstrict;
insert into table emp_partition partition(deptno)
select empno,ename,job,mgr,hiredate,sal,comm,deptno from emp;
 

2.1、时间函数

0: jdbc:hive2://node01:10000> current_date   
2020-12-13
0: jdbc:hive2://node01:10000> current_timestamp; 
2020-12-13 10:41:03.687

2.2. 日期->时间戳

1.unix_timestamp() 获取当前时间戳

select unix_timestamp() 
1565858389

2.unix_timestamp(string timestame) 输入的时间戳格式必须为’yyyy-MM-dd HH:mm:ss’,如不符合则返回null

select unix_timestamp('2019-08-15 16:40:00') 
1565858400
select unix_timestamp('2019-08-15') 
null

3.unix_timestamp(string date,string pattern) 将指定时间字符串格式字符串转化成unix时间戳,如不符合则返回null

select unix_timestamp('2019-08-15','yyyy-MM-dd') 
1565798400

select unix_timestamp('2019-08-15 16:40:00','yyyy-MM-dd HH:mm:ss')
1565858400

select unix_timestamp('2019-08-15','yyyy-MM-dd HH:mm:ss') 
null

2.2. 时间戳->日期
1.from_unixtime(bigint unixtime,string format) 将时间戳秒数转化为UTC时间,并用字符串表示,可通过format规定的时间格式,指定输出的时间格式,其中unixtime 是10位的时间戳值,而13位的所谓毫秒的是不可以的。

select from_unixtime(1565858389,'yyyy-MM-dd HH:mm:ss')
2019-08-15 16:39:49

select from_unixtime(1565858389,'yyyy-MM-dd')
2019-08-15

2.如果unixtime为13位的,需要先转成10位

select from_unixtime(cast(1553184000488/1000 as int),'yyyy-MM-dd HH:mm:ss')
2019-03-22 00:00:00

select from_unixtime(cast(substr(1553184000488,1,10) as int),'yyyy-MM-dd HH:mm:ss') 
2019-03-22 00:00:00

3.获取当前时间

select from_unixtime(unix_timestamp(),'yyyy-MM-dd HH:mm:ss')  
 2019-08-15 17:18:55

3.1、to_date

select to_date('2020-02-22 12:12:12')
2020-02-22

3.2、year month day hour minute second

select year('2020-02-22 12:12:12')
 2020

3.3、weekofyear 、 dayofmonth

select weekofyear('2020-10-21')
 43

3.4、months_between

select monthbetween('2020-02-01','2020-12-01-09');   ---10.25806452

3.5、add_months,datediff

select add_months('2020-09-09',2);  --2020-11-09
select datediff('2020-09-09 10:10:10','2020-12-01 12:12:12')  ---83

date_add

select date_add('2020-11-15',5);   -- 2020-11-20

date_sub

select date_sub('2020-11-15',5);   --2020-11-10

last_day

 select last_day('2020-12-01')    -- 2020-12-31

round、ceil、floor

select round(1.4)   1.0
select round(1.5)   2.0
select ceil(1.01);  2
select floor(1.999) 1

upper/lower

select upper('abc');   ABC
select lower('AbCDE')  abcde

length

select length('你好abc'); 7
select length('abc'); 3

trim

select trim(" effwa   ")   effwa

lpad/rpad

select lpad("123",10,"*");   *******123
select lpad("123",10,"*");   123******* 

regexp_replace

select regexp_replace("abcdefag","a","+")           +bcdef+g

substr

 select substr('abcde',3)         cde
 select substring('abcde',3)      cde
 select substr('abcde',-1)        e
 select substr('abcde',3,2)       cd
 select substring('abcde',3,2)    cd
 select substring('abcde',-2,2)   de

concat

concat_ws
select concat("abc","-","哈");   abc-哈哈
 select concat_ws('+',"abc","1321","哈哈") abc+1321+哈哈

json_tuple

create table t_rate
as
select 
movie, rate, time, userid,
year(from_unixtime(cast(time as bigint))) as year,
month(from_unixtime(cast(time as bigint))) as month,
day(from_unixtime(cast(time as bigint))) as day,
hour(from_unixtime(cast(time as bigint))) as hour,
minute(from_unixtime(cast(time as bigint))) as minute,
from_unixtime(cast(time as bigint)) ts
from 
(
select json_tuple(json, "movie","rate","time","userid") as(movie, rate, time, userid) from rating_json 
) tmp;

parse_url_tuple

select parse_url_tuple("http://www.google.com/test/film?param1=value1&param2=value2" ,'HOST', 'PATH', 'QUERY', 'REF', 'PROTOCOL', 'FILE',  'AUTHORITY', 'USERINFO')
+-----------------+-------------+------------------------------+-------+-------+-----------------------------------------+-----------------+-------+--+
|       c0        |     c1      |              c2              |  c3   |  c4   |                   c5                    |       c6        |  c7   |
+-----------------+-------------+------------------------------+-------+-------+-----------------------------------------+-----------------+-------+--+
| www.google.com  | /test/film  | param1=value1&param2=value2  | NULL  | http  | /test/film?param1=value1&param2=value2  | www.google.com  | NULL  |
+-----------------+-------------+------------------------------+-------+-------+-----------------------------------------+-----------------+-------+--+
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值