数据操作
数据导入
load data
load data [local] inpath '数据的 path' [overwrite] into table
table_name [partition (partcol1=val1,…)];
案例
将如下成绩表导入hive
姓名 科目 成绩
张三,数学,100
张三,语文,99
张三,英语,90
李四,数学,80
李四,语文,85
李四,英语,95
-- 建表语句
create table test01 (
name string comment '姓名',
course string comment '科目',
score float comment '成绩'
)
row format delimited
fields terminated by ','
lines terminated by '\n';
-- load data 本地文件
load data local inpath '/opt/module/apache-hive-3.1.2-bin/study/test01.txt'
into table test01;
-- load data hdfs文件
load data inpath '/test01.txt' into table test01;
注意:
对于管理表
- load 本地文件时,相当于文件copy至hive目录
- load hdfs文件时,相当于将文件mv到hive目录
对于外部表load数据与管理表表现一致,唯一区别在于外部表删除不会删除数据文件
Insert 插入
基本插入
# 覆盖插入
INSERT OVERWRITE TABLE tablename1 [PARTITION (partcol1=val1, partcol2=val2 ...) [IF NOT EXISTS]]
select_statement1 FROM from_statement;
# 添加插入
INSERT INTO TABLE tablename1 [PARTITION (partcol1=val1, partcol2=val2 ...)]
select_statement1
案例
insert into table test01 select * from test02;
insert overwrite table test01 select * from test02;
多表(多分区)插入
FROM from_statement
INSERT OVERWRITE TABLE tablename1 [PARTITION (partcol1=val1, partcol2=val2 ...) [IF NOT EXISTS]] select_statement1
[INSERT OVERWRITE TABLE tablename2 [PARTITION ... [IF NOT EXISTS]] select_statement2]
[INSERT INTO TABLE tablename2 [PARTITION ...] select_statement2] ...;
FROM from_statement
INSERT INTO TABLE tablename1 [PARTITION (partcol1=val1, partcol2=val2 ...)] select_statement1
[INSERT INTO TABLE tablename2 [PARTITION ...] select_statement2]
[INSERT OVERWRITE TABLE tablename2 [PARTITION ... [IF NOT EXISTS]] select_statement2] ...;
案例
from test01
insert into table test02 select name,course,score
insert overwrite table test03 select name,course,score;
from test03
insert into table test04 partition (name='zhangsan')
select course,score where name = 'zhangsan'
insert into table test04 partition (name='lisi')
select course,score where name = 'lisi'
Create Table… as Select …
create table if not exists test05
as select name, course,score from test03;
Create External Table … Location …
创建外部表,指定localtion
create [external] table test06 (
name string comment '姓名',
course string comment '科目',
score float comment '成绩'
)
row format delimited
fields terminated by ','
lines terminated by '\n'
location '/study/hive/test06';
Import(建表+数据)
将export数据导入
# 导出表至hdfs
export table study.test06 to '/study/hive/export/test06';
导入表
import table study.test07 from '/study/hive/export/test06';
数据导出
Insert 导出
## 将查询的结果导出到本地
insert overwrite local directory
'/opt/module/hive/data/export/student'
select * from student;
## 将查询的结果格式化导出到本地
hive(default)>insert overwrite local directory
'/opt/module/hive/data/export/student1'
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
select * from student;
## 将查询的结果导出到 HDFS 上(没有 local)
insert overwrite directory '/user/atguigu/student2'
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
select * from student;
Hive Shell 命令导出
基本语法:(hive -f/-e 执行语句或者脚本 > file)
bin/hive -e 'select * from default.student;' > /opt/module/hive/data/export/student4.txt;
Export 导出到 HDFS 上
配合import使用
export table default.student
to '/user/hive/warehouse/export/student';
export 和 import 主要用于两个 Hadoop 平台集群之间 Hive 表迁移。