Hive
DDL
官网:https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL
创建表
DELIMITED
DELIMITED [FIELDS TERMINATED BY char [ESCAPED BY char]] [COLLECTION ITEMS TERMINATED BY char]
[MAP KEYS TERMINATED BY char] [LINES TERMINATED BY char]
[NULL DEFINED AS char]
create table psn
(
id int,
name string,
hobby array<string>,
address map<string, string>
)
row format delimited
fields terminated by ','
collection items terminated by '-'
map keys terminated by ':'
;
SERDE
SERDE serde_name [WITH SERDEPROPERTIES (property_name=property_value, property_name=property_value, ...)]
CREATE TABLE logtbl (
host STRING,
identity STRING,
t_user STRING,
time STRING,
request STRING,
referer STRING,
agent STRING)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe'
WITH SERDEPROPERTIES (
"input.regex" = "([^ ]*) ([^ ]*) ([^ ]*) \\[(.*)\\] \"(.*)\" (-|[0-9]*) (-|[0-9]*)"
)
STORED AS TEXTFILE;
192.168.57.4 - - [29/Feb/2016:18:14:35 +0800] "GET /bg-upper.png HTTP/1.1" 304 -
192.168.57.4 - - [29/Feb/2016:18:14:35 +0800] "GET /bg-nav.png HTTP/1.1" 304 -
192.168.57.4 - - [29/Feb/2016:18:14:35 +0800] "GET /asf-logo.png HTTP/1.1" 304 -
192.168.57.4 - - [29/Feb/2016:18:14:35 +0800] "GET /bg-button.png HTTP/1.1" 304 -
192.168.57.4 - - [29/Feb/2016:18:14:35 +0800] "GET /bg-middle.png HTTP/1.1" 304 -
192.168.57.4 - - [29/Feb/2016:18:14:36 +0800] "GET / HTTP/1.1" 200 11217
192.168.57.4 - - [29/Feb/2016:18:14:36 +0800] "GET / HTTP/1.1" 200 11217
192.168.57.4 - - [29/Feb/2016:18:14:36 +0800] "GET /tomcat.css HTTP/1.1" 304 -
192.168.57.4 - - [29/Feb/2016:18:14:36 +0800] "GET /tomcat.png HTTP/1.1" 304 -
192.168.57.4 - - [29/Feb/2016:18:14:36 +0800] "GET /asf-logo.png HTTP/1.1" 304 -
192.168.57.4 - - [29/Feb/2016:18:14:36 +0800] "GET /bg-middle.png HTTP/1.1" 304 -
192.168.57.4 - - [29/Feb/2016:18:14:36 +0800] "GET /bg-button.png HTTP/1.1" 304 -
192.168.57.4 - - [29/Feb/2016:18:14:36 +0800] "GET /bg-nav.png HTTP/1.1" 304 -
192.168.57.4 - - [29/Feb/2016:18:14:36 +0800] "GET /bg-upper.png HTTP/1.1" 304 -
192.168.57.4 - - [29/Feb/2016:18:14:36 +0800] "GET / HTTP/1.1" 200 11217
192.168.57.4 - - [29/Feb/2016:18:14:36 +0800] "GET /tomcat.css HTTP/1.1" 304 -
192.168.57.4 - - [29/Feb/2016:18:14:36 +0800] "GET /tomcat.png HTTP/1.1" 304 -
192.168.57.4 - - [29/Feb/2016:18:14:36 +0800] "GET / HTTP/1.1" 200 11217
192.168.57.4 - - [29/Feb/2016:18:14:36 +0800] "GET /tomcat.css HTTP/1.1" 304 -
192.168.57.4 - - [29/Feb/2016:18:14:36 +0800] "GET /tomcat.png HTTP/1.1" 304 -
192.168.57.4 - - [29/Feb/2016:18:14:36 +0800] "GET /bg-button.png HTTP/1.1" 304 -
192.168.57.4 - - [29/Feb/2016:18:14:36 +0800] "GET /bg-upper.png HTTP/1.1" 304 -
LOAD DATA LOCAL INPATH '/home/hdfs/logtbl' INTO TABLE logtbl;
创建分区表
PARTITIONED BY (col_name data_type [COMMENT col_comment], ...)
col_name 不能与字段名重复
create table psn2
(
id int,
name string,
hobby array<string>,
address map<string, string>
)
partitioned by (age int)
row format delimited
fields terminated by ','
collection items terminated by '-'
map keys terminated by ':'
;
创建多分区表
create table psn4
(
id int,
name string,
hobby array<string>,
address map<string, string>
)
partitioned by (age int, sex string)
row format delimited
fields terminated by ','
collection items terminated by '-'
map keys terminated by ':'
;
修改分区
ALTER TABLE table_name ADD [IF NOT EXISTS] PARTITION partition_spec
partition_spec:
: (partition_column = partition_col_value, partition_column = partition_col_value, ...)
ALTER TABLE psn4 ADD PARTITION (age=30, sex='girl');
删除分区
ALTER TABLE table_name DROP [IF EXISTS] PARTITION partition_spec[, PARTITION partition_spec, ...]
ALTER TABLE psn4 DROP PARTITION (age=30, sex='girl');
创建外部表
创建路径 /usr,并上传文件
hdfs dfs -mkdir /usr
hdfs dfs -put data1
外部表是 现有 hdfs 文件,根据文件创建的表
create external table psn3
(
id int,
name string,
hobby array<string>,
address map<string, string>
)
row format delimited
fields terminated by ','
collection items terminated by '-'
map keys terminated by ':'
location '/usr'
;
创建外分区表
create external table psn6
(
id int,
name string,
hobby array<string>,
address map<string, string>
)
partitioned by (age int, sex string)
row format delimited
fields terminated by ','
collection items terminated by '-'
map keys terminated by ':'
location '/usr'
;
恢复分区
创建外分区表后数据没有关联,需要恢复元数据信息
MSCK [REPAIR] TABLE table_name [ADD/DROP/SYNC PARTITIONS];
MSCK REPAIR TABLE psn6;
DML
官网:https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DML
导入数据
1,小明1,lol-book-movie,beijing:shangxuetang-shanghai:pudong
2,小明2,lol-book-movie,beijing:shangxuetang-shanghai:pudong
3,小明3,lol-book-movie,beijing:shangxuetang-shanghai:pudong
4,小明4,lol-book-movie,beijing:shangxuetang-shanghai:pudong
5,小明5,lol-movie,beijing:shangxuetang-shanghai:pudong
6,小明6,lol-book-movie,beijing:shangxuetang-shanghai:pudong
7,小明7,lol-book,beijing:shangxuetang-shanghai:pudong
8,小明8,lol-book,beijing:shangxuetang-shanghai:pudong
9,小明9,lol-book-movie,beijing:shangxuetang-shanghai:pudong
hive 执行
LOAD DATA LOCAL INPATH '/home/hdfs/data1' INTO TABLE psn;
导入数据 分区(partition)
LOAD DATA [LOCAL] INPATH 'filepath' [OVERWRITE] INTO TABLE tablename [PARTITION (partcol1=val1, partcol2=val2 ...)]
partcol1、partcol2 必须已经存在
LOAD DATA LOCAL INPATH '/home/hdfs/data1' INTO TABLE psn2 partition (age=10);
导入数据 多分区(partition)
要录入所有分区内容
LOAD DATA LOCAL INPATH '/home/hdfs/data1' INTO TABLE psn4 partition (age=10, sex='man');
查询
select * from psn;
OK
1 小明1 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
2 小明2 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
3 小明3 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
4 小明4 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
5 小明5 ["lol","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
6 小明6 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
7 小明7 ["lol","book"] {"beijing":"shangxuetang","shanghai":"pudong"}
8 小明8 ["lol","book"] {"beijing":"shangxuetang","shanghai":"pudong"}
9 小明9 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
Time taken: 0.28 seconds, Fetched: 9 row(s)
删除表
drop table psn;
drop table psn3;
- 内部表删除,会 连同 hdfs 文件一起删除;
- 外部表删除,不会删除 hdfs 文件。