数据库
创建数据库
hive> CREATE DATABASE financials;
hive> CREATE DATABASE IF NOT EXISTS financials;
显示现在有的数据库
hive> SHOW DATABASES;
default
financials
hive> CREATE DATABASE human_resources;
hive> SHOW DATABASES;
default
financials
human_resources
条件查询数据库
hive> SHOW DATABASES LIKE 'h.*';
human_resources
hive> ...
创建指定存放文件位置 数据库
hive> CREATE DATABASE financials
> LOCATION '/my/preferred/directory';
创建数据库时 添加注释信息
hive> CREATE DATABASE financials
> COMMENT 'Holds all financial tables';
hive> DESCRIBE DATABASE financials;
financials Holds all financial tables
hdfs://master-server/user/hive/warehouse/financials.db
使用数据库
hive> USE financials;
删除数据库
hive> DROP DATABASE IF EXISTS financials;
当数据库存在表时,先要删除表 再能删除数据库
当数据库中存在表时无法直接删除,会提示数据库非空,存在表,这时可以使用CASCADE关键字
hive> DROP DATABASE IF EXISTS financials CASCADE;
数据表
创建表
CREATE [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name
[(col_name data_type [COMMENT col_comment], ...)]
[COMMENT table_comment]
[PARTITIONED BY (col_name data_type [COMMENT col_comment], ...)]
[CLUSTERED BY (col_name, col_name, ...) [SORTED BY(col_name[ASC|DESC], ...)] INTO num_buckets BUCKETS]
[SKEWED BY (col_name, col_name, ...) ON ([(col_value, col_value, ...), ...|col_value, col_value, ...]) [STORED AS DIRECTORIES]
[ [ROW FORMAT row_format] [STORED AS file_format]
| STORED BY 'storage.handler.class.name' [WITH SERDEPROPERTIES (...)] ]
[LOCATION hdfs_path]
[TBLPROPERTIES (property_name=property_value, ...)]
[AS select_statement]
例:
CREATE TABLE IF NOT EXISTS mydb.employees (
name STRING COMMENT 'Employee name',
salary FLOAT COMMENT 'Employee salary',
subordinates ARRAY<STRING> COMMENT 'Names of subordinates',
deductions MAP<STRING, FLOAT>
COMMENT 'Keys are deductions names, values are percentages
address STRUCT<street:STRING, city:STRING, state:STRING, zip:INT>
COMMENT 'Home address')
COMMENT 'Description of the table'
TBLPROPERTIES ('creator'='me', 'created_at'='2012-01-02 10:00:00', ...)
LOCATION '/user/hive/warehouse/mydb.db/employees';
CREATE [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name
LIKE existing_table_or_view_name
[LOCATION hdfs_path];
例:
CREATE TABLE IF NOT EXISTS mydb.employees2
LIKE mydb.employees;
显示某个数据库中的表
hive> USE mydb;
hive> SHOW TABLES;
employees
table1
table2
显示指定筛选条件 表名
hive> USE mydb;
hive> SHOW TABLES 'empl.*';
employees
显示表扩展信息
hive> DESCRIBE EXTENDED mydb.employees;
name string Employee name
salary float Employee salary
subordinates array<string> Names of subordinates
deductions map<string,float> Keys are deductions names, values are percentages
address struct<street:string,city:string,state:string,zip:int> Home address
Detailed Table Information Table(tableName:employees, dbName:mydb, owner:me,
...
location:hdfs://master-server/user/hive/warehouse/mydb.db/employees,
parameters:{creator=me, created_at='2012-01-02 10:00:00',
last_modified_user=me, last_modified_time=1337544510,
comment:Description of the table, ...}, ...)
指定显示某个字段的信息
hive> DESCRIBE mydb.employees.salary;
salary float Employee salary
外部表,删除表不删除数据
CREATE EXTERNAL TABLE IF NOT EXISTS stocks (
exchange STRING,
symbol STRING,
ymd STRING,
price_open FLOAT,
price_high FLOAT,
price_low FLOAT,
price_close FLOAT,
volume INT,
price_adj_close FLOAT)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
LOCATION '/data/stocks';
复制表结构仓库外部表
CREATE EXTERNAL TABLE IF NOT EXISTS mydb.employees3
LIKE mydb.employees
LOCATION '/path/to/data';
分区表
CREATE TABLE employees (
name STRING,
salary FLOAT,
subordinates ARRAY<STRING>,
deductions MAP<STRING, FLOAT>,
address STRUCT<street:STRING, city:STRING, state:STRING, zip:INT>
)
PARTITIONED BY (country STRING, state STRING);
HIVE 可以将分区表设置成“严格”模式,禁止分区表的查询没有一个WHERE子句
hive> set hive.mapred.mode=strict;
hive> SELECT e.name, e.salary FROM employees e LIMIT 100;
FAILED: Error in semantic analysis: No partition predicate found for
Alias "e" Table "employees"
hive> set hive.mapred.mode=nonstrict;
hive> SELECT e.name, e.salary FROM employees e LIMIT 100;
查看现有分区
hive> SHOW PARTITIONS employees;
...
Country=CA/state=AB
country=CA/state=BC
...
country=US/state=AL
country=US/state=AK
查看分区详细 分区键
hive> SHOW PARTITIONS employees PARTITION(country='US');
country=US/state=AL
country=US/state=AK
...
hive> SHOW PARTITIONS employees PARTITION(country='US', state='AK');
country=US/state=AK
通过 DESC 显示分区键
hive> DESCRIBE EXTENDED employees;
name string,
salary float,
...
address struct<...>,
country string,
state string
Detailed Table Information...
partitionKeys:[FieldSchema(name:country, type:string, comment:null),
FieldSchema(name:state, type:string, comment:null)],
...
查看建表语句
hive> show create table score;
OK
CREATE TABLE `score`(
`num` string,
`name` string,
`score` float)
PARTITIONED BY (
`year` string,
`month` string,
`day` string)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
LINES TERMINATED BY '\n'
STORED AS INPUTFORMAT
'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION
'hdfs://hdpcls1/user/hive/warehouse/score'
TBLPROPERTIES (
'transient_lastDdlTime'='1489117674')
Time taken: 0.35 seconds, Fetched: 19 row(s)
从表读入数据到表中
INSERT OVERWRITE TABLE employees
PARTITION (country = 'US', state = 'OR')
SELECT * FROM staged_employees se
WHERE se.cnty = 'US' AND se.st = 'OR';
从文件读入 分区表
从本地
LOAD DATA LOCAL INPATH '${env:HOME}/california-employees'
INTO TABLE employees
PARTITION (country = 'US', state = 'CA');
从hdfs
LOAD DATA INPATH '/california-employees'
INTO TABLE employees
PARTITION (country = 'US', state = 'CA');
为外部表增加指定分区
ALTER TABLE log_messages ADD PARTITION(year = 2012, month = 1, day = 2)
LOCATION 'hdfs://master_server/data/log_messages/2012/01/02';
导出数据
INSERT OVERWRITE LOCAL DIRECTORY '/tmp/ca_employees'
SELECT name, salary, address
FROM employees
WHERE se.state = 'CA';
桶表
CREATE TABLE par_table(viewTime INT, userid BIGINT,
page_url STRING, referrer_url STRING,
ip STRING COMMENT 'IP Address of the User')
COMMENT 'This is the page view table'
PARTITIONED BY(date STRING, pos STRING)
CLUSTERED BY(userid) SORTED BY(viewTime) INTO 32 BUCKETS
ROW FORMAT DELIMITED ‘\t’
FIELDS TERMINATED BY '\n'
STORED AS SEQUENCEFILE;
删除表
DROP TABLE IF EXISTS employees;
修改表结构
ALTER TABLE modifies table metadata only. The data for the table is
untouched. It’s up to you to ensure that any modifications are consistent
with the actual data.
修改表名
ALTER TABLE log_messages RENAME TO logmsgs;
增加,修改,删除 表分区
ALTER TABLE log_messages ADD IF NOT EXISTS
PARTITION (year = 2011, month = 1, day = 1) LOCATION '/logs/2011/01/01'
PARTITION (year = 2011, month = 1, day = 2) LOCATION '/logs/2011/01/02'
PARTITION (year = 2011, month = 1, day = 3) LOCATION '/logs/2011/01/03'
ALTER TABLE log_messages PARTITION(year = 2011, month = 12, day = 2)
SET LOCATION 's3n://ourbucket/logs/2011/01/02';
ALTER TABLE log_messages DROP IF EXISTS PARTITION(year = 2011, month = 12, day = 2);
修改列
ALTER TABLE log_messages
CHANGE COLUMN hms hours_minutes_seconds INT
COMMENT 'The hours, minutes, and seconds part of the timestamp'
AFTER severity;
这个命令可以允许改变列名、数据类型、注释、列位置或者它们的任意组合
增加列
ALTER TABLE log_messages ADD COLUMNS (
app_name STRING COMMENT 'Application name',
session_id LONG COMMENT 'The current session id');
字段位置在所有列后面(partition列前)
删除 替换列
ALTER TABLE log_messages REPLACE COLUMNS (
hours_mins_secs INT COMMENT 'hour, minute, seconds from timestamp',
severity STRING COMMENT 'The message severity'
message STRING COMMENT 'The rest of the message');