1.1、partitioned by (fiels string…)
CREATE TABLE dept_partition(
deptno int,
dname string,
loc string
)
PARTITIONED BY (day string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOAD DATA LOCAL INPATH '/home/hadoop/data/dept.txt' OVERWRITE INTO TABLE dept_partition PARTITION (day=20201225)
1.2、上传文件文件至指定分区,然后MSCK进行分区修复-(慎用,全部分区进行重新扫描)
[hadoop@node01 data]$ hadoop fs -mkdir /user/hive/warehouse/test.db/dept_partition/day=20201226
[hadoop@node01 data]$ hadoop fs -put dept.txt /user/hive/warehouse/test.db/dept_partition/day=20201226
MSCK REPAIR TABLE dept_partition;
1.3、删除、添加分区
ALTER TABLE dept_partition DROP PARTITION(day='20201226'),PARTITION(day='20201225');
[hadoop@node01 data]$ hadoop fs -mkdir /user/hive/warehouse/test.db/dept_partition/day=20201226
[hadoop@node01 data]$ hadoop fs -put dept.txt /user/hive/warehouse/test.db/dept_partition/day=20201226
ALTER TABLE dept_partition ADD IF NOT EXISTS PARTITION (day=20201226)
insert into table dept_partition partition(day='20201226')
select * from dept;
1.4、多级分区
CREATE TABLE dept_partition_d_h(
deptno int,
dname string,
loc string
)
PARTITIONED BY (day string,hour string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';
LOAD DATA LOCAL INPATH '/home/hadoop/data/dept.txt' overwrite INTO table dept_partition_d_h partition(day='20201111',hour='06')
1.5、动态分区
CREATE TABLE emp_partition(
`empno` int,
`ename` string,
`job` string,
`mgr` int,
`hiredate` string,
`sal` double,
`comm` double)
partitioned by (deptno int)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';
set hive.exec.dynamic.partition.mode=nonstrict;
insert into table emp_partition partition(deptno)
select empno,ename,job,mgr,hiredate,sal,comm,deptno from emp;
2.1、时间函数
0: jdbc:hive2://node01:10000> current_date
2020-12-13
0: jdbc:hive2://node01:10000> current_timestamp;
2020-12-13 10:41:03.687
2.2. 日期->时间戳
1.unix_timestamp() 获取当前时间戳
select unix_timestamp()
1565858389
2.unix_timestamp(string timestame) 输入的时间戳格式必须为’yyyy-MM-dd HH:mm:ss’,如不符合则返回null
select unix_timestamp('2019-08-15 16:40:00')
1565858400
select unix_timestamp('2019-08-15')
null
3.unix_timestamp(string date,string pattern) 将指定时间字符串格式字符串转化成unix时间戳,如不符合则返回null
select unix_timestamp('2019-08-15','yyyy-MM-dd')
1565798400
select unix_timestamp('2019-08-15 16:40:00','yyyy-MM-dd HH:mm:ss')
1565858400
select unix_timestamp('2019-08-15','yyyy-MM-dd HH:mm:ss')
null
2.2. 时间戳->日期
1.from_unixtime(bigint unixtime,string format) 将时间戳秒数转化为UTC时间,并用字符串表示,可通过format规定的时间格式,指定输出的时间格式,其中unixtime 是10位的时间戳值,而13位的所谓毫秒的是不可以的。
select from_unixtime(1565858389,'yyyy-MM-dd HH:mm:ss')
2019-08-15 16:39:49
select from_unixtime(1565858389,'yyyy-MM-dd')
2019-08-15
2.如果unixtime为13位的,需要先转成10位
select from_unixtime(cast(1553184000488/1000 as int),'yyyy-MM-dd HH:mm:ss')
2019-03-22 00:00:00
select from_unixtime(cast(substr(1553184000488,1,10) as int),'yyyy-MM-dd HH:mm:ss')
2019-03-22 00:00:00
3.获取当前时间
select from_unixtime(unix_timestamp(),'yyyy-MM-dd HH:mm:ss')
2019-08-15 17:18:55
3.1、to_date
select to_date('2020-02-22 12:12:12')
2020-02-22
3.2、year month day hour minute second
select year('2020-02-22 12:12:12')
2020
3.3、weekofyear 、 dayofmonth
select weekofyear('2020-10-21')
43
3.4、months_between
select monthbetween('2020-02-01','2020-12-01-09'); ---10.25806452
3.5、add_months,datediff
select add_months('2020-09-09',2); --2020-11-09
select datediff('2020-09-09 10:10:10','2020-12-01 12:12:12') ---83
date_add
select date_add('2020-11-15',5); -- 2020-11-20
date_sub
select date_sub('2020-11-15',5); --2020-11-10
last_day
select last_day('2020-12-01') -- 2020-12-31
round、ceil、floor
select round(1.4) 1.0
select round(1.5) 2.0
select ceil(1.01); 2
select floor(1.999) 1
upper/lower
select upper('abc'); ABC
select lower('AbCDE') abcde
length
select length('你好abc'); 7
select length('abc'); 3
trim
select trim(" effwa ") effwa
lpad/rpad
select lpad("123",10,"*"); *******123
select lpad("123",10,"*"); 123*******
regexp_replace
select regexp_replace("abcdefag","a","+") +bcdef+g
substr
select substr('abcde',3) cde
select substring('abcde',3) cde
select substr('abcde',-1) e
select substr('abcde',3,2) cd
select substring('abcde',3,2) cd
select substring('abcde',-2,2) de
concat
concat_ws
select concat("abc","-","哈"); abc-哈哈
select concat_ws('+',"abc","1321","哈哈") abc+1321+哈哈
json_tuple
create table t_rate
as
select
movie, rate, time, userid,
year(from_unixtime(cast(time as bigint))) as year,
month(from_unixtime(cast(time as bigint))) as month,
day(from_unixtime(cast(time as bigint))) as day,
hour(from_unixtime(cast(time as bigint))) as hour,
minute(from_unixtime(cast(time as bigint))) as minute,
from_unixtime(cast(time as bigint)) ts
from
(
select json_tuple(json, "movie","rate","time","userid") as(movie, rate, time, userid) from rating_json
) tmp;
parse_url_tuple
select parse_url_tuple("http://www.google.com/test/film?param1=value1¶m2=value2" ,'HOST', 'PATH', 'QUERY', 'REF', 'PROTOCOL', 'FILE', 'AUTHORITY', 'USERINFO')
+-----------------+-------------+------------------------------+-------+-------+-----------------------------------------+-----------------+-------+--+
| c0 | c1 | c2 | c3 | c4 | c5 | c6 | c7 |
+-----------------+-------------+------------------------------+-------+-------+-----------------------------------------+-----------------+-------+--+
| www.google.com | /test/film | param1=value1¶m2=value2 | NULL | http | /test/film?param1=value1¶m2=value2 | www.google.com | NULL |
+-----------------+-------------+------------------------------+-------+-------+-----------------------------------------+-----------------+-------+--+