Hive-DDL语法
1 database
1.1 create database
CREATE database if NOT exists ods
comment 'ods'
location 'hdfs://path/to/dic' --用于外部表
-- managedLocation '' 用于内部表
with dbproperties("propertyname" = "propertyvalue",...)
1.2 alter database
ALTER (DATABASE|SCHEMA) database_name SET DBPROPERTIES (property_name=property_value, ...); -- (Note: SCHEMA added in Hive 0.14.0)
ALTER (DATABASE|SCHEMA) database_name SET OWNER [USER|ROLE] user_or_role; -- (Note: Hive 0.13.0 and later; SCHEMA added in Hive 0.14.0)
ALTER (DATABASE|SCHEMA) database_name SET LOCATION hdfs_path; -- (Note: Hive 2.2.1, 2.4.0 and later)
ALTER (DATABASE|SCHEMA) database_name SET MANAGEDLOCATION hdfs_path; -- (Note: Hive 4.0.0 and later)
1.3 drop database
DROP (DATABASE|SCHEMA) [IF EXISTS] database_name [RESTRICT|CASCADE];
1.4 use database
USE ods;
2 table
2.1 create table
--从0开始创建表
CREATE [TEMPORARY] [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name(
col_name data_type [COMMENT col_comment], ...
)[COMMENT table_comment]
[PARTITIONED BY (col_name data_type [COMMENT col_comment], ...)]
[CLUSTERED BY (col_name, col_name, ...) [SORTED BY (col_name [ASC|DESC], ...)] INTO num_buckets BUCKETS]
[ROW FORMAT row_format]
[STORED AS file_format]
| STORED BY 'storage.handler.class.name' [WITH SERDEPROPERTIES (...)] -- (Note: Available in Hive 0.6.0 and later,一般用于Hbase的存储,外部存储)]
[LOCATION hdfs_path] --指定已有的表HDFS的位置进行HDFS的数据关联
[TBLPROPERTIES (property_name=property_value, ...)] --设置表的属性如:("orc.compress = snappy")
--从现有的表进行创建表|表结构
CREATE TABLE tablename Like select * from othertable ;
CREATE TABLE tablename AS select * from othertable;
--ROW FORMAT的具体指定
ROW FORMAT
DELIMITED [FIELDS TERMINATED BY char [ESCAPED BY char]] [COLLECTION ITEMS TERMINATED BY char]
[MAP KEYS TERMINATED BY char] [LINES TERMINATED BY char]
[NULL DEFINED AS char]
--或者
SERDE serde_name [WITH SERDEPROPERTIES (property_name=property_value, property_name=property_value, ...)]
2.1.1 partitioned table
CREATE TABLE page_view(viewTime INT, userid BIGINT,
page_url STRING, referrer_url STRING,
ip STRING COMMENT 'IP Address of the User')
COMMENT 'This is the page view table'
PARTITIONED BY(dt STRING, country STRING)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\001'
STORED AS SEQUENCEFILE;
2.1.2 external table
CREATE EXTERNAL TABLE page_view(viewTime INT, userid BIGINT,
page_url STRING, referrer_url STRING,
ip STRING COMMENT 'IP Address of the User',
country STRING COMMENT 'country of origination')
COMMENT 'This is the staging page view table'
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\054'
STORED AS TEXTFILE
LOCATION '<hdfs_location>';
2.1.3 bucketed table(clustered table)
CREATE TABLE page_view(viewTime INT, userid BIGINT,
page_url STRING, referrer_url STRING,
ip STRING COMMENT 'IP Address of the User')
COMMENT 'This is the page view table'
PARTITIONED BY(dt STRING, country STRING)
CLUSTERED BY(userid) SORTED BY(viewTime) INTO 32 BUCKETS
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\001'
COLLECTION ITEMS TERMINATED BY '\002'
MAP KEYS TERMINATED BY '\003'
STORED AS SEQUENCEFILE;
2.2 drop table
PURGE
除了在SQL中指定以外,还可以通过TBLPROPERTIES(auto.purge=true)
来进行配置,在DROP掉Table或者Partition的时候,被删除的数据不会进入到.Trash/Current
目录,而是直接被物理删除,无法被检索。
DROP TABLE [IF EXISTS] table_name [PURGE];
2.3 alter table
假如这里有一个table a
ALTER TABLE a RENAME TO a1;
ALTER TABLE a SET TBLPROPERTIES("ORC.COMPRESS = SNAPPY");
ALTER TABLE a SET SERDE 'CLASS OF SERDE'
ALTER TABLE a SET SERDEPROPERTIES("A"="B");
ALTER TABLE a CLUSTERED BY (COL1,COL2...) [SORTED BY COL1,..] INTO 32 BUCKETS;
3 partition
3.1 添加分区
分区可以被添加,删除,重命名,移动,通过alter关键字
Alter table a add [IF NOT EXISTS]
PARTITION (dt = '20200510') LOCATION ''
......
PARTITION (dt = '20200511') LOCATION '';
3.2 删除分区
alter table a drop [IF EXISTS] PARTITION(dt = '20200513')
3.3 重命名分区
ALTER TABLE a RENAME PARTITION(dt = '20200511') TO PARTITION (dt = '20200512');
3.4 dynamic partition(动态分区)
Hive支持动态分区,默认情况下hive.exec.dynamic.partition=true
,具体动态分区用在Insert操作,dynamic partition的相关配置如下。
配置属性 | 默认值 | 官网描述 |
---|---|---|
Hive.exec.dynamic.partition | true | Needs to be set to true to enable dynamic partition inserts |
hive.exec.dynamic.partition.mode | strict | In strict mode, the user must specify at least one static partition in case the user accidentally overwrites all partitions, in nonstrict mode all partitions are allowed to be dynamic |
hive.exec.max.dynamic.partitions.pernode | 100 | Maximum number of dynamic partitions allowed to be created in each mapper/reducer node |
hive.exec.max.dynamic.partitions | 1000 | Maximum number of dynamic partitions allowed to be created in total |
hive.exec.max.created.files | 100000 | Maximum number of HDFS files created by all mappers/reducers in a MapReduce job |
hive.error.on.empty.partition | false | Whether to throw an exception if dynamic partition insert generates empty results |
dynamic的Insert操作案例
FROM TABLE a
INSERT OVERWRITE TABLE a1 PARTITION(dt = '20200512',country)
SELECT a.id,a.name...,'china' as country;
--此处dt='20200512'属于静态分区字段,由select语句的最后一个字段country的值填充另一个分区键country;
3.5 move partition
exchange with
关键字可以将分区从table1移动到table2.
ALTER TABLE table2 EXCHANGE PARTITION(dt = '20200512',dt ='20200513',...) WITH table1;
3.6 show partitions
SHOW PARTITIONS TABLE a;
3.7 recover partitions(msck repair)
-- 1
alter table a recover partitions;
-- 2
msck repair table a; --一般用于external table
4 其它的Alter
ALTER TABLE a PARTITION(dt = '20200505') SET FILEFORMAT ORC;
ALTER TABLE a PARTITION(dt = '20200505') SET LOCATION '';
5 column
hive.support.quoted.identifiers = none
去掉关键词的使用报错警告。
5.1 add/replace
ALTER TABLE a [partition()] add/replace columns (id int comment '',name string comment '',...)
ALTER TABLE a[partition()] change column id new_id int comment '' [FIRST/AFTER colname];
官网示意。
CREATE TABLE test_change (a int, b int, c int);
// First change column a's name to a1.
ALTER TABLE test_change CHANGE a a1 INT;
// Next change column a1's name to a2, its data type to string, and put it after column b.
ALTER TABLE test_change CHANGE a1 a2 STRING AFTER b;
// The new table's structure is: b int, a2 string, c int.
// Then change column c's name to c1, and put it as the first column.
ALTER TABLE test_change CHANGE c c1 INT FIRST;
// The new table's structure is: c1 int, b int, a2 string.
// Add a comment to column a1
ALTER TABLE test_change CHANGE a1 a1 INT COMMENT 'this is column a1';