http://blog.csdn.net/u013980127/article/details/52604882
与传统SQL一致,并且概念简单的,本文没有详细说明或完全没有说明。另外,本文也没有按照官文一字不漏地翻译,只是提炼本人认为需要的。
1 DDL
1.1 Create/Drop/Alter/Use Database
1.1.1 Create Database
CREATE (DATABASE|SCHEMA) [IF NOT EXISTS] database_name
[COMMENT database_comment]
[LOCATION hdfs_path]
[WITH DBPROPERTIES (property_name=property_value, ...)];
注:DATABASE与SCHEMA用途相同
1.1.2 Drop Database
DROP (DATABASE|SCHEMA) [IF EXISTS] database_name [RESTRICT|CASCADE];
注:默认RESTRICT,使用CASCADE可删除含表的数据库。
1.1.3 Alter Database
ALTER (DATABASE|SCHEMA) database_name SET DBPROPERTIES (property_name=property_value, ...); -- (Note: SCHEMA added in Hive 0.14.0)
ALTER (DATABASE|SCHEMA) database_name SET OWNER [USER|ROLE] user_or_role; -- (Note: Hive 0.13.0 and later; SCHEMA added in Hive 0.14.0)
1.1.4 Use Database
USE database_name;
USE DEFAULT;
-- 获取当前数据库
SELECT current_database()
1.2 Create/Drop/Truncate Table
1.2.1 Create Table
1.2.1.1 语法
CREATE [TEMPORARY] [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name -- (Note: TEMPORARY available in Hive 0.14.0 and later)
[(col_name data_type [COMMENT col_comment], ...)]
[COMMENT table_comment]
[PARTITIONED BY (col_name data_type [COMMENT col_comment], ...)]
[CLUSTERED BY (col_name, col_name, ...) [SORTED BY (col_name [ASC|DESC], ...)] INTO num_buckets BUCKETS]
[SKEWED BY (col_name, col_name, ...) -- (Note: Available in Hive 0.10.0 and later)]
ON ((col_value, col_value, ...), (col_value, col_value, ...), ...)
[STORED AS DIRECTORIES]
[
[ROW FORMAT row_format]
[STORED AS file_format]
| STORED BY 'storage.handler.class.name' [WITH SERDEPROPERTIES (...)] -- (Note: Available in Hive 0.6.0 and later)
]
[LOCATION hdfs_path]
[TBLPROPERTIES (property_name=property_value, ...)] -- (Note: Available in Hive 0.6.0 and later)
[AS select_statement]; -- (Note: Available in Hive 0.5.0 and later; not supported for external tables)
CREATE [TEMPORARY] [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name
LIKE existing_table_or_view_name
[LOCATION hdfs_path];
data_type
: primitive_type
| array_type
| map_type
| struct_type
| union_type -- (Note: Available in Hive 0.7.0 and later)
primitive_type
: TINYINT
| SMALLINT
| INT
| BIGINT
| BOOLEAN
| FLOAT
| DOUBLE
| DOUBLE PRECISION -- (Note: Available in Hive 2.2.0 and later)
| STRING
| BINARY -- (Note: Available in Hive 0.8.0 and later)
| TIMESTAMP -- (Note: Available in Hive 0.8.0 and later)
| DECIMAL -- (Note: Available in Hive 0.11.0 and later)
| DECIMAL(precision, scale) -- (Note: Available in Hive 0.13.0 and later)
| DATE -- (Note: Available in Hive 0.12.0 and later)
| VARCHAR -- (Note: Available in Hive 0.12.0 and later)
| CHAR -- (Note: Available in Hive 0.13.0 and later)
array_type
: ARRAY
map_type
: MAP
1.2.1.2 Row Format, Storage Format, and SerDe
可以使用自定义的SerDe或者Hive自带的SerDe。如果没有指定ROW FORMAT或指定了ROW FORMAT DELIMITED时,会使用自带的SerDe。
‘hive.default.fileformat’设置默认存储格式。
参考
[Hive中的InputFormat、OutputFormat与SerDe]
(https://www.coder4.com/archives/4031)
1.2.1.3 Partitioned Table
通过PARTITIONED BY创建。可以指定一个或多个分区列,用CLUSTERED BY columns分桶,通过SORT BY排序桶中的数据。
-- 示例
id int,
date date,
name varchar
create table table_name (
id int,
dtDontQuery string,
name string
)
partitioned by (date string)
1.2.1.4 External Tables
LOCATION指定数据所在位置。删除外部表时,表中数据不会被删除。
CREATE EXTERNAL TABLE page_view(viewTime INT, userid BIGINT,
page_url STRING, referrer_url STRING,
ip STRING COMMENT 'IP Address of the User',
country STRING COMMENT 'country of origination')
COMMENT 'This is the staging page view table'
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\054'
STORED AS TEXTFILE
LOCATION '<hdfs_location>';
1.2.1.5 Create Table As Select (CTAS)
限制:
1. 目标表不能是分区表;
2. 目标表不能是外部表;
3. 目标表不能是list bucketing表。
示例
CREATE TABLE new_key_value_store
ROW FORMAT SERDE "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe"
STORED AS RCFile
AS
SELECT (key % 1024) new_key, concat(key, value) key_value_pair
FROM key_value_store
SORT BY new_key, key_value_pair;
1.2.1.6 Create Table Like
复制存在的表的表定义,创建新表(不含数据)。
CREATE TABLE empty_key_value_store
LIKE key_value_store;
1.2.1.7 Bucketed Sorted Tables
对于每一个表(table)或者分区, Hive可以进一步组织成桶,也就是说桶是更为细粒度的数据范围划分。使用CLUSTERED BY 子句来指定划分桶所用的列和要划分的桶的个数。
CREATE TABLE page_view(viewTime INT, userid BIGINT,
page_url STRING, referrer_url STRING,
ip STRING COMMENT 'IP Address of the User')
COMMENT 'This is the page view table'
PARTITIONED BY(dt STRING, country STRING)
CLUSTERED BY(userid) SORTED BY(viewTime) INTO 32 BUCKETS
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\001'
COLLECTION ITEMS TERMINATED BY '\002'
MAP KEYS TERMINATED BY '\003'
STORED AS SEQUENCEFILE;
参考:
[Hive 基础之:分区、桶、Sort Merge Bucket Join]
(http://blog.csdn.net/wisgood/article/details/17186107)
1.2.1.8 Skewed Tables
该特性为了优化表中一列或几列有数据倾斜的值。
CREATE TABLE list_bucket_single (key STRING, value STRING)
SKEWED BY (key) ON (1,5,6) [STORED AS DIRECTORIES];
CREATE TABLE list_bucket_multiple (col1 STRING, col2 int, col3 STRING)
SKEWED BY (col1, col2) ON (('s1',1), ('s3',3), ('s13',13), ('s78',78)) [STORED AS DIRECTORIES];
参考:
[HIVE 数据倾斜调优总结]
(http://www.cnblogs.com/end/archive/2012/06/19/2554582.html)
1.2.1.9 Temporary Tables
1. 不支持分区;
2. 不支持索引。
示例
CREATE TEMPORARY TABLE temp_table (key STRING, value STRING);
1.2.2 Drop Table
DROP TABLE [IF EXISTS] table_name [PURGE];
注:指定PURGE后,数据不会放到回收箱,会直接删除。
1.2.3 Truncate Table
TRUNCATE TABLE table_name [PARTITION partition_spec];
partition_spec:
: (partition_column = partition_col_value, partition_column = partition_col_value, ...)
注:指定PARTITION时,只删除PARTITION的数据,否则删除所有数据。
1.3 Alter Table/Partition/Column
1.3.1 Alter Table
-- 重命名表名
ALTER TABLE table_name RENAME TO new_table_name;
-- 修改表属性
ALTER TABLE table_name SET TBLPROPERTIES table_properties;
table_properties:
: (property_name = property_value, property_name = property_value, ... )
-- 修改表注释
ALTER TABLE table_name SET TBLPROPERTIES ('comment' = new_comment);
-- 增加SerDe属性
ALTER TABLE table_name [PARTITION partition_spec] SET SERDE serde_class_name [WITH SERDEPROPERTIES serde_properties];
ALTER TABLE table_name [PARTITION partition_spec] SET SERDEPROPERTIES serde_properties;
serde_properties:
: (property_name = property_value, property_name = property_value, ... )
示例
ALTER TABLE table_name SET SERDEPROPERTIES ('field.delim' = ',');
-- 修改存储属性
ALTER TABLE table_name CLUSTERED BY (col_name, col_name, ...) [SORTED BY (col_name, ...)]
INTO num_buckets BUCKETS;
-- Alter Table Skewed
ALTER TABLE table_name SKEWED BY (col_name1, col_name2, ...)
ON ([(col_name1_value, col_name2_value, ...) [, (col_name1_value, col_name2_value), ...]
[STORED AS DIRECTORIES];
-- Alter Table Not Skewed
ALTER TABLE table_name NOT SKEWED;
-- Alter Table Not Stored as Directories
ALTER TABLE table_name NOT STORED AS DIRECTORIES;
-- Alter Table Set Skewed Location
ALTER TABLE table_name SET SKEWED LOCATION (col_name1="location1" [, col_name2="location2", ...] );
1.3.2 Alter Partition
增加分区
ALTER TABLE table_name ADD [IF NOT EXISTS] PARTITION partition_spec
[LOCATION 'location1'] partition_spec [LOCATION 'location2'] ...;
partition_spec:
: (partition_column = partition_col_value, partition_column = partition_col_value, ...)
示例
ALTER TABLE page_view ADD PARTITION (dt='2008-08-08', country='us') location '/path/to/us/part080808'
PARTITION (dt='2008-08-09', country='us') location '/path/to/us/part080809';
动态分区
重命名分区
ALTER TABLE table_name PARTITION partition_spec RENAME TO PARTITION partition_spec;
交换分区
分区可以在表之间交换
ALTER TABLE table_name_1 EXCHANGE PARTITION (partition_spec) WITH TABLE table_name_2;
-- multiple partitions
ALTER TABLE table_name_1 EXCHANGE PARTITION (partition_spec, partition_spec2, ...) WITH TABLE table_name_2;
恢复分区 (MSCK REPAIR TABLE)
通过HDFS命令增加的分区,不会在Hive的metastore中有记录,可以通过下面命令,自动修复。
MSCK REPAIR TABLE table_name;
删除分区
ALTER TABLE table_name DROP [IF EXISTS] PARTITION partition_spec[, PARTITION partition_spec, ...]
[IGNORE PROTECTION] [PURGE]; -- (Note: PURGE available in Hive 1.2.0 and later, IGNORE PROTECTION not available 2.0.0 and later)
(Un)Archive Partition
ALTER TABLE table_name ARCHIVE PARTITION partition_spec;
ALTER TABLE table_name UNARCHIVE PARTITION partition_spec;
1.3.3 Alter Either Table or Partition
修改表或分区的文件格式
ALTER TABLE table_name [PARTITION partition_spec] SET FILEFORMAT file_format;
修改表或分区的存储位置
ALTER TABLE table_name [PARTITION partition_spec] SET LOCATION "new location";
Alter Table/Partition Touch
ALTER TABLE table_name TOUCH [PARTITION partition_spec];
Alter Table/Partition Protections
ALTER TABLE table_name [PARTITION partition_spec] ENABLE|DISABLE NO_DROP [CASCADE];
ALTER TABLE table_name [PARTITION partition_spec] ENABLE|DISABLE OFFLINE;
Alter Table/Partition Compact
ALTER TABLE table_name [PARTITION (partition_key = 'partition_value' [, ...])]
COMPACT 'compaction_type'
[WITH OVERWRITE TBLPROPERTIES ("property"="value" [, ...])];
Alter Table/Partition Concatenate
如果表或分区包含很多小的RCF文件或ORC文件,下面命令会合并小文件成大文件。
ALTER TABLE table_name [PARTITION (partition_key = 'partition_value' [, ...])] CONCATENATE;
1.3.4 Alter Column
修改列名、类型、位置与注释
ALTER TABLE table_name [PARTITION partition_spec] CHANGE [COLUMN] col_old_name col_new_name column_type
[COMMENT col_comment] [FIRST|AFTER column_name] [CASCADE|RESTRICT];
-- 默认RESTRICT,CASCADE除了修改表的metadata,也会修改所有分区的metadata。
增加或替换
ALTER TABLE table_name
[PARTITION partition_spec] -- (Note: Hive 0.14.0 and later)
ADD|REPLACE COLUMNS (col_name data_type [COMMENT col_comment], ...)
[CASCADE|RESTRICT] -- (Note: Hive 0.15.0 and later)
-- REPLACE会移除所有存在的列,增加新的列。
Partial Partition Specification
-- 以下操作
SET hive.exec.dynamic.partition = true;
ALTER TABLE foo PARTITION (ds='2008-04-08', hr) CHANGE COLUMN dec_column_name dec_column_name DECIMAL(38,18);
-- 等价于
ALTER TABLE foo PARTITION (ds='2008-04-08', hr=11) CHANGE COLUMN dec_column_name dec_column_name DECIMAL(38,18);
ALTER TABLE foo PARTITION (ds='2008-04-08', hr=12) CHANGE COLUMN dec_column_name dec_column_name DECIMAL(38,18);
...
支持的操作:Change column、Add column、Replace column、File Format、Serde Properties
1.4 Create/Drop/Alter View
1.4.1 Create View
语法:
CREATE VIEW [IF NOT EXISTS] [db_name.]view_name [(column_name [COMMENT column_comment], ...) ]
[COMMENT view_comment]
[TBLPROPERTIES (property_name = property_value, ...)]
AS SELECT ...;
示例:
CREATE VIEW onion_referrers(url COMMENT 'URL of Referring page')
COMMENT 'Referrers to The Onion website'
AS
SELECT DISTINCT referrer_url
FROM page_view
WHERE page_url='http://www.theonion.com';
1.4.2 Drop View
DROP VIEW [IF EXISTS] [db_name.]view_name;
1.4.3 Alter View Properties
ALTER VIEW [db_name.]view_name SET TBLPROPERTIES table_properties;
table_properties:
: (property_name = property_value, property_name = property_value, ...)
1.4.4 Alter View As Select
ALTER VIEW [db_name.]view_name AS select_statement;
1.5 Create/Drop/Alter Index
1.5.1 Create Index
CREATE INDEX index_name
ON TABLE base_table_name (col_name, ...)
AS index_type
[WITH DEFERRED REBUILD]
[IDXPROPERTIES (property_name=property_value, ...)]
[IN TABLE index_table_name]
[
[ ROW FORMAT ...] STORED AS ...
| STORED BY ...
]
[LOCATION hdfs_path]
[TBLPROPERTIES (...)]
[COMMENT "index comment"];
参考:
CREATE INDEX
1.5.2 Drop Index
DROP INDEX [IF EXISTS] index_name ON table_name;
1.5.3 Alter Index
ALTER INDEX index_name ON table_name [PARTITION partition_spec] REBUILD;