Data Manipulation Language:数据操纵语言
数据导入
1、load模式
LOAD DATA [LOCAL] INPATH 'filepath' [OVERWRITE] INTO TABLE tablename [PARTITION(partcol1=val1,partcol2=val2 ...)]
- load data:表示加载数据
- local:表示从本地加载数据到 hive 表;否则从 HDFS 加载数据到hive 表
- inpath:表示加载数据的路径
- overwrite:表示覆盖表中已有数据,否则表示继续添加数据
- into table tablename:表示要加载数据到哪个表
- partition:表示上传到指定分区
hive (bigdata)> create table student(id int,name string)
> row format delimited fields terminated by '\t';
OK
Time taken: 1.404 seconds
hive (bigdata)> load data local inpath '/opt/test/student' into table student;
Loading data to table bigdata.student
OK
Time taken: 2.4 seconds
2、insert模式
INSERT OVERWRITE TABLE tablename1 [PARTITIO
N (partcol1=val1, partcol2=val2 ...) [IF NO
T EXISTS]] select_statement1 FROM from_stat
ement;
INSERT INTO TABLE tablename2 [PARTITION (pa
rtcol1=val1, partcol2=val2 ...)] select_sta
tement1 FROM from_statement;
hive (bigdata)> select * from student;
OK
student.id student.name
1 zhangsan
2 lisi
Time taken: 4.057 seconds, Fetched: 2 row(s)
#直接添加数据
hive (bigdata)> insert into table student values('3','wangwu');
hive (bigdata)> select * from student;
OK
student.id student.name
3 wangwu
1 zhangsan
2 lisi
Time taken: 0.326 seconds, Fetched: 3 row(s)
#从其他表加载数据
hive (bigdata)> create table student2(id int,name string)
> row format delimited fields terminated by '\t';
hive (bigdata)> insert into table student2 values('4','chenliu');
hive (bigdata)> insert into table student select * from student2;
hive (bigdata)> select * from student;
OK
student.id student.name
3 wangwu
4 chenliu
1 zhangsan
2 lisi
Time taken: 0.311 seconds, Fetched: 4 row(s)
#覆盖掉原来表中数据
hive (bigdata)> insert overwrite table student select * from student2;
#将一个表的数据添加到其它多个表中
#以下三种结果相同,都不覆盖原表数据
hive (bigdata)> from student2
> insert overwrite table teacher partition(day=20180812) select *
> insert into table teacher partition(day=20180813) select *;
hive (bigdata)> from student3
> insert into table teacher partition(day=20180812) select *
> insert overwrite table teacher partition(day=20180813) select *;
hive (bigdata)> from student2
> insert into table aaa partition(day=20180812) select *
> insert into table aaa partition(day=20180813) select *;
#覆盖原表数据
hive (bigdata)> from student3
> insert overwrite table aaa partition(day='20180812') select *
> insert overwrite table aaa partition(day='20180813') select *;
3、as select模式
hive (bigdata)> create table student4 as select * from teacher;
4、location模式
#本地上传到hdfs
hive (bigdata)> dfs -put /opt/test/student /student;
#从hdfs导入
hive (bigdata)> create table student5(id int,name string)
> row format delimited fields terminated by '\t'
> location '/student';
5.import模式
#导入的必须是经过export导出的数据
hive (wangfang)> import table teacher from '/yyy';
数据导出
1、insert模式
#导出到本地
hive (bigdata)> insert overwrite local directory '/opt/test/export'
> select * from teacher;
[root@master export]# more 000000_0
4chenliu20180812
5wangfang20180812
1zhangsan20180812
2lisi20180812
4chenliu20180813
5wangfang20180813
1zhangsan20180813
2lisi20180813
#导出到本地,添加格式
hive (bigdata)> insert overwrite local directory '/opt/test/export'
> row format delimited fields terminated by '\t'
> select * from teacher;
[root@master export]# more 000000_0
4 chenliu 20180812
5 wangfang 20180812
1 zhangsan 20180812
2 lisi 20180812
4 chenliu 20180813
5 wangfang 20180813
1 zhangsan 20180813
2 lisi 20180813
#导出到HDFS
hive (bigdata)> insert overwrite directory '/xxx'
> row format delimited fields terminated by '\t'
> select * from teacher;
2、export模式
#导出路径目录需不存在,导出是自行创建
hive (bigdata)> export table teacher to '/yyy';
3、dfs -get模式
hive (bigdata)> dfs -get /hive/warehouse/bigdata.db/student/000000_0 /opt/test/export/aaa.txt
4、hive -e 模式
[root@master export]# hive -e 'select * from student;' > /opt/test/export/bbb.txt;