1 创建表
1.1 语法
#1.
CREATE [TEMPORARY] [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name # 表名
[(col_name data_type [COMMENT col_comment], ...)] # 列名
[COMMENT table_comment] #表批注
[PARTITIONED BY (col_name data_type [COMMENT col_comment], ...)] # 分区
[CLUSTERED BY (col_name, col_name, ...) [SORTED BY (col_name [ASC|DESC], ...)] INTO num_buckets BUCKETS] #分桶
[
[ROW FORMAT row_format]
[STORED AS file_format]
| STORED BY 'storage.handler.class.name' [WITH SERDEPROPERTIES (...)]
] # 指定行和文件的格式化
[LOCATION hdfs_path] # 指定表存放的路径
[TBLPROPERTIES (property_name=property_value, ...)] # 指定表属性
[AS select_statement] # 在建表的时候,以当前的查询语句的结果作为表数据
#2.
CREATE [TEMPORARY] [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name
LIKE existing_table_or_view_name
[LOCATION hdfs_path]; # 创建一张新表,以原有的表或者视图作为数据
1.2 相关属性
data_type
: primitive_type
| array_type
| map_type
| struct_type
| union_type -- (Note: Available in Hive 0.7.0 and later)
primitive_type
: TINYINT
| SMALLINT
| INT
| BIGINT
| BOOLEAN
| FLOAT
| DOUBLE
| DOUBLE PRECISION -- (Note: Available in Hive 2.2.0 and later)
| STRING
| BINARY -- (Note: Available in Hive 0.8.0 and later)
| TIMESTAMP -- (Note: Available in Hive 0.8.0 and later)
| DECIMAL -- (Note: Available in Hive 0.11.0 and later)
| DECIMAL(precision, scale) -- (Note: Available in Hive 0.13.0 and later)
| DATE -- (Note: Available in Hive 0.12.0 and later)
| VARCHAR -- (Note: Available in Hive 0.12.0 and later)
| CHAR -- (Note: Available in Hive 0.13.0 and later)
array_type
: ARRAY < data_type >
map_type
: MAP < primitive_type, data_type >
struct_type
: STRUCT < col_name : data_type [COMMENT col_comment], ...>
union_type
: UNIONTYPE < data_type, data_type, ... > -- (Note: Available in Hive 0.7.0 and later)
row_format
: DELIMITED [FIELDS TERMINATED BY char [ESCAPED BY char]] [COLLECTION ITEMS TERMINATED BY char]
[MAP KEYS TERMINATED BY char] [LINES TERMINATED BY char]
[NULL DEFINED AS char] -- (Note: Available in Hive 0.13 and later)
| SERDE serde_name [WITH SERDEPROPERTIES (property_name=property_value, property_name=property_value, ...)]
file_format:
: SEQUENCEFILE
| TEXTFILE -- (Default, depending on hive.default.fileformat configuration)
| RCFILE -- (Note: Available in Hive 0.6.0 and later)
| ORC -- (Note: Available in Hive 0.11.0 and later)
| PARQUET -- (Note: Available in Hive 0.13.0 and later)
| AVRO -- (Note: Available in Hive 0.14.0 and later)
| JSONFILE -- (Note: Available in Hive 4.0.0 and later)
| INPUTFORMAT input_format_classname OUTPUTFORMAT output_format_classname
column_constraint_specification:
: [ PRIMARY KEY|UNIQUE|NOT NULL|DEFAULT [default_value]|CHECK [check_expression] ENABLE|DISABLE NOVALIDATE RELY/NORELY ]
default_value:
: [ LITERAL|CURRENT_USER()|CURRENT_DATE()|CURRENT_TIMESTAMP()|NULL ]
constraint_specification:
: [, PRIMARY KEY (col_name, ...) DISABLE NOVALIDATE RELY/NORELY ]
[, PRIMARY KEY (col_name, ...) DISABLE NOVALIDATE RELY/NORELY ]
[, CONSTRAINT constraint_name FOREIGN KEY (col_name, ...) REFERENCES table_name(col_name, ...) DISABLE NOVALIDATE
[, CONSTRAINT constraint_name UNIQUE (col_name, ...) DISABLE NOVALIDATE RELY/NORELY ]
[, CONSTRAINT constraint_name CHECK [check_expression] ENABLE|DISABLE NOVALIDATE RELY/NORELY ]
1.3 实例
# 创建内部表
CREATE TABLE IF NOT EXISTS `users`.`info`(
`id` int COMMENT 'user id',
`name` string COMMENT 'user name',
`age` int COMMENT 'user age'
) COMMENT 'user info'
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ',';
# 数据加载
LOAD DATA LOCAL INPATH '' INTO TABLE table_name;
# 创建外部表
CREATE EXTERNAL TABLE IF NOT EXISTS `users`.`info_1`(
`id` int COMMENT 'user id',
`name` string COMMENT 'user name',
`age` int COMMENT 'user age'
) COMMENT 'user info'
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','
LOCATION 'hdfs_path';
# 数据加载
LOAD DATA LOCAL INPATH '/home/user.txt' INTO TABLE `users`.`info_1`;
# 创建临时表
CREATE TEMPORARY TABLE IF NOT EXISTS `users`.`info_2`(
`id` int COMMENT 'user id',
`name` string COMMENT 'user name',
`age` int COMMENT 'user age'
) COMMENT 'user info'
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ',';
# 数据加载
LOAD DATA LOCAL INPATH '/home/user.txt' INTO TABLE `users`.`info_2`;
总结:
- 当我们删除了内部表时,元数据和数据都会被删除
- 当我们删除了外部表时,元数据会删除,数据不会被删除
- 临时表没有元数据,而且数据也没有存放到HDFS中,他的数据都在内存里面
- 当Hive关闭后,临时表会消失
2 修改表
2.1 语法
1. 重命名表
ALTER TABLE table_name RENAME TO new_table_name;
2. Alter Table Properties
ALTER TABLE table_name SET TBLPROPERTIES table_properties;
table_properties:
(property_name = property_value, property_name = property_value, ... )
2.1 修改表的批注信息
ALTER TABLE table_name SET TBLPROPERTIES ('comment' = new_comment);
3 修改列
3.1 语法
# 1. 修改列信息
ALTER TABLE table_name CHANGE [COLUMN] col_old_name col_new_name column_type
[COMMENT col_comment] [FIRST|AFTER column_name] [CASCADE];
# 2. 添加或者删除列字段
ALTER TABLE table_name ADD|REPLACE COLUMNS (col_name data_type [COMMENT col_comment], ...)
[CASCADE]
3.2 实例
# 1. 修改列名
ALTER TABLE `user_1`.`t_user` CHANGE id uid int;
# 2. 修改字段类型
ALTER TABLE `user_1`.`t_user` CHANGE uid uid string;
# 2. 向表中添加字段
ALTER TABLE `user_1`.`t_user` ADD COLUMNS (birth string);
# 3. 删除列字段(birth)
ALTER TABLE `user_1`.`t_user` REPLACE COLUMNS (uid string,name string);
4 添加数据
4.1 语法
# 1. load
LOAD DATA [LOCAL] INPATH 'filepath' [OVERWRITE] INTO TABLE tablename [PARTITION (partcol1=val1, partcol2=val2 ...)]
# 2. insert
INSERT OVERWRITE TABLE tablename1 [PARTITION (partcol1=val1, partcol2=val2 ...) [IF NOT EXISTS]]
select_statement1 FROM from_statement;
INSERT INTO TABLE tablename1 [PARTITION (partcol1=val1, partcol2=val2 ...)] select_statement1 FROM from_statement;
4.2 实例
# 1. load
LOAD DATA LOCAL INPATH '/home/user.txt' INTO TABLE `hive`.`t_1`;
LOAD DATA INPATH '/user.txt' INTO TABLE `hive`.`t_1`;
# 2. insert
CREATE TABLE `hive`.`t_user4` LIKE `hive`.`t_user3`;
INSERT OVERWRITE TABLE `hive`.`t_user4` IF NOT EXISTS
SELECT * FROM `hive`.`t_user3`;
- local是从本地文件系统中复制数据到表目录下
- 不加local是从HDFS文件系统中剪切数据到表目录下
- load方式加载数据只能用于表是textfile格式的情况
5 多数据插入
5.1 语法
FROM from_statement
INSERT OVERWRITE TABLE tablename1 [PARTITION (partcol1=val1, partcol2=val2 ...) [IF NOT EXISTS]] select_statement1
[INSERT OVERWRITE TABLE tablename2 [PARTITION ... [IF NOT EXISTS]] select_statement2]
[INSERT INTO TABLE tablename2 [PARTITION ...] select_statement2] ...;
5.2 实例
CREATE TABLE `hive`.`t_user5` LIKE `hive`.`t_user3`;
CREATE TABLE `hive`.`t_user6` LIKE `hive`.`t_user3`;
FROM `hive`.`t_user3`
INSERT OVERWRITE TABLE `hive`.`t_user5` IF NOT EXISTS SELECT *
INSERT OVERWRITE TABLE `hive`.`t_user6` IF NOT EXISTS SELECT *
;
6 Write Data
6.1 语法
INSERT OVERWRITE [LOCAL] DIRECTORY directory1
[ROW FORMAT row_format] [STORED AS file_format]
SELECT ... FROM ...
6.2 实例
INSERT OVERWRITE DIRECTORY '/write'
ROW FORMAT DELIMITED
SELECT * FROM `hive`.`t_user5`;