DDL
-
创建数据库
CREATE (DATABASE|SCHEMA) [IF NOT EXISTS] database_name [COMMENT database_comment] [LOCATION hdfs_path] [WITH DBPROPERTIES (property_name=property_value, ...)];
Hive存储在HDFS上的路径:
可以通过hive-site.xml中hive.metastore.warehouse.dir属性来配置,默认路径为/user/hive/warehouse创建一个数据库test1:
create database test1;
创建一个数据库,不存在就忽略:
create database if not exists test2;
创建一个数据库,并指定文件位置为hadoop根目录下/test/location:
create database if not exists test3 location '/test/location';
创建一个数据库,并增加额外属性:
create database test4 with dbproperties('creator'='hlsijx');
-
显示当前处于哪个数据库
set hive.cli.print.current.db=true;
-
查看数据库信息
DESCRIBE DATABASE [EXTENDED] db_name;
查询数据库信息:
desc database test1;
查询数据库扩展信息:
desc database extended test4;
-
建表
CREATE [TEMPORARY] [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name -- (Note: TEMPORARY available in Hive 0.14.0 and later) [(col_name data_type [COMMENT col_comment], ... [constraint_specification])] [COMMENT table_comment] [PARTITIONED BY (col_name data_type [COMMENT col_comment], ...)] [CLUSTERED BY (col_name, col_name, ...) [SORTED BY (col_name [ASC|DESC], ...)] INTO num_buckets BUCKETS] [SKEWED BY (col_name, col_name, ...) -- (Note: Available in Hive 0.10.0 and later)] ON ((col_value, col_value, ...), (col_value, col_value, ...), ...) [STORED AS DIRECTORIES] [ [ROW FORMAT row_format] [STORED AS file_format] | STORED BY 'storage.handler.class.name' [WITH SERDEPROPERTIES (...)] -- (Note: Available in Hive 0.6.0 and later) ] [LOCATION hdfs_path] [TBLPROPERTIES (property_name=property_value, ...)] -- (Note: Available in Hive 0.6.0 and later) [AS select_statement]; -- (Note: Available in Hive 0.5.0 and later; not supported for external tables)
创建员工表emp
CREATE TABLE emp( empno int, ename string, job string, mgr int, hiredate string, sal double, comm double, deptno int ) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';
DML
-
导入数据
LOAD DATA [LOCAL] INPATH 'filepath' [OVERWRITE] INTO TABLE tablename [PARTITION (partcol1=val1, partcol2=val2 ...)]
LOCAL:本地系统,如果没有local那么就是指的HDFS的路径
OVERWRITE:是否数据覆盖,如果没有那么就是数据追加加载本地文件 /home/hadoop/data/emp.txt 到 emp 表
LOAD DATA LOCAL INPATH '/home/hadoop/data/emp.txt' OVERWRITE INTO TABLE emp;
或者加载 hdfs 文件到 emp 表
LOAD DATA INPATH 'hdfs://ip:8020/data/emp.txt' OVERWRITE INTO TABLE emp;
注意:当该语句执行后,hdfs 上的文件就被删除了,需要重新上传文件到 hdfs 才能再执行一遍
- 导出数据
INSERT OVERWRITE TABLE tablename1 [PARTITION (partcol1=val1, partcol2=val2 ...) [IF NOT EXISTS]] select_statement1 FROM from_statement;
导出 emp 表数据到本地 /data/hive 目录下
INSERT OVERWRITE LOCAL DIRECTORY '/data/hive/' ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' select empno,ename,sal,deptno from emp;