DDL
浏览所有数据库
hive> SHOW TABLES;
# 浏览所有以s结尾的表
hive> SHOW TABLES '.*s';
浏览数据库下面的所有表
# 切换数据库
hive> use default;
OK
Time taken: 0.022 seconds
# 浏览数据库下的所有表
hive> show tables;
创建表
hive> CREATE TABLE pokes (foo INT, bar STRING);
创建带有分区的表
hive> CREATE TABLE invites (foo INT, bar STRING) PARTITIONED BY (ds STRING);
创建指定分隔符的表:
CREATE TABLE u_data (
userid INT,
movieid INT,
rating INT,
unixtime STRING)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
STORED AS TEXTFILE;
删除表
hive> DROP TABLE pokes;
修改表:
# 修改表名
hive> ALTER TABLE events RENAME TO 3koobecaf;
# 增加一个字段
hive> ALTER TABLE pokes ADD COLUMNS (new_col INT);
# 增加带有说明的字段
hive> ALTER TABLE invites ADD COLUMNS (new_col2 INT COMMENT 'a comment');
# 替换一个字段 注意:The table must use a native SerDe
hive> ALTER TABLE invites REPLACE COLUMNS (foo INT, bar STRING, baz INT COMMENT 'baz replaces new_col2');
# 替换字段,只保留一个字段
hive> ALTER TABLE invites REPLACE COLUMNS (foo INT COMMENT 'only keep the first column');
DML
hive DML语法: LanguageManual DML;
load本地文件到hive表
hive> LOAD DATA LOCAL INPATH './examples/files/kv1.txt' OVERWRITE INTO TABLE pokes;
hive> LOAD DATA LOCAL INPATH './examples/files/kv2.txt' OVERWRITE INTO TABLE invites PARTITION (ds='2008-08-15');
load hdfs上的文件或目录到hive表:
hive> LOAD DATA INPATH '/user/myname/kv2.txt' OVERWRITE INTO TABLE invites PARTITION (ds='2008-08-15');
查询数据到hdfs上
hive> INSERT OVERWRITE DIRECTORY '/tmp/hdfs_out' SELECT a.* FROM invites a WHERE a.ds='2008-08-15';
查询数据到本地目录
hive> INSERT OVERWRITE LOCAL DIRECTORY '/tmp/local_out' SELECT a.* FROM pokes a;
group by
hive> FROM invites a INSERT OVERWRITE TABLE events SELECT a.bar, count(*) WHERE a.foo > 0 GROUP BY a.bar;
hive> INSERT OVERWRITE TABLE events SELECT a.bar, count(*) FROM invites a WHERE a.foo > 0 GROUP BY a.bar;
join两张表
hive> FROM pokes t1 JOIN invites t2 ON (t1.bar = t2.bar) INSERT OVERWRITE TABLE events SELECT t1.bar, t1.foo, t2.foo;