Hive DML常用操作及示例

1、文件载入Hive表

语法:

LOAD DATA [LOCAL] INPATH 'filepath' [OVERWRITE] INTO TABLE tablename [PARTITION (partcol1=val1, partcol2=val2 ...)]

示例:

# 创建分区表
create table tb_load_data(
id      int,
name    string,
hobby   array<string>,
add     map<String,string>
)
partitioned by(part_tag1 string,part_tag2 int)
row format delimited
fields terminated by ','
collection items terminated by '-'
map keys terminated by ':'
lines terminated by '\n'
;

# 加载数据,数据加载后扔是目录下的源文件
load data local inpath '/usr/local/hive-2.1.1/data_dir/complex_data_type.txt' overwrite into table tb_load_data partition (part_tag1 = 'first',part_tag2=100);

# 数据文件信息
[hadoop@node224 ~]$ hdfs dfs -cat /user/hive/warehouse/db01.db/tb_load_data/part_tag1=first/part_tag2=100/complex_data_type.txt
1,xiaoming,book-TV-code,beijing:chaoyang-shagnhai:pudong
2,lilei,book-code,nanjing:jiangning-taiwan:taibei
3,lihua,music-book,heilongjiang:haerbin

2、通过查询向Hive表中插入数据

语法:

# 基础语法
NSERT OVERWRITE TABLE tablename1 [PARTITION (partcol1=val1, partcol2=val2 ...) [IF NOT EXISTS]] select_statement1 FROM from_statement;
INSERT INTO TABLE tablename1 [PARTITION (partcol1=val1, partcol2=val2 ...)] select_statement1 FROM from_statement;
 
# Hive extension (multiple inserts):
FROM from_statement
INSERT OVERWRITE TABLE tablename1 [PARTITION (partcol1=val1, partcol2=val2 ...) [IF NOT EXISTS]] select_statement1
[INSERT OVERWRITE TABLE tablename2 [PARTITION ... [IF NOT EXISTS]] select_statement2]
[INSERT INTO TABLE tablename2 [PARTITION ...] select_statement2] ...;
FROM from_statement
INSERT INTO TABLE tablename1 [PARTITION (partcol1=val1, partcol2=val2 ...)] select_statement1
[INSERT INTO TABLE tablename2 [PARTITION ...] select_statement2]
[INSERT OVERWRITE TABLE tablename2 [PARTITION ... [IF NOT EXISTS]] select_statement2] ...;
 
# Hive extension (dynamic partition inserts):
INSERT OVERWRITE TABLE tablename PARTITION (partcol1[=val1], partcol2[=val2] ...) select_statement FROM from_statement;
INSERT INTO TABLE tablename PARTITION (partcol1[=val1], partcol2[=val2] ...) select_statement FROM from_statement;

动态分区插入相关的配置参数

6756b36068cc3ab8bba47ba9e0096fa27f2.jpg

示例:

# 创建基础插入测试表
create table tb_insert_data(
id      int,
name    string,
hobby   array<string>,
add     map<String,string>
)
partitioned by(part_tag3 int,part_tag4 string)
row format delimited
fields terminated by ','
collection items terminated by '-'
map keys terminated by ':'
lines terminated by '\n'
;

# insert into 模式加载追加
# 静态
insert into table tb_insert_data partition(part_tag3=200,part_tag4='second')  select id,name,hobby,add from tb_load_data;

# 动态
set hive.exec.dynamic.partition.mode=nonstrict;  # 是会话级别的设置
insert into table tb_insert_data partition(part_tag3,part_tag4)  select id,name,hobby,add,part_tag2,part_tag1 from tb_load_data;
select id,name,hobby,add,part_tag2,part_tag1 from tb_load_data

# insert overwrite 模式覆盖相同的数据,而不是下一批的数据覆盖整个表的数据
# 静态
insert overwrite table tb_insert_data partition(part_tag3=200,part_tag4='second')  select id,name,hobby,add from tb_load_data where name='lilei';
insert overwrite table tb_insert_data partition(part_tag3=300,part_tag4='second')  select id,name,hobby,add from tb_load_data;

# 动态
insert overwrite table tb_insert_data partition(part_tag3,part_tag4)  select id,name,hobby,add,part_tag2,part_tag1 from tb_load_data;
select * from  tb_insert_data;

#hive扩展一次多表写入
# 创建一次多表插入测试表

create table tb_insert_multi_01(
id      int,
name    string,
hobby   array<string>,
add     map<String,string>
)
partitioned by(part_tag7 int,part_tag8 string)
row format delimited
fields terminated by ','
collection items terminated by '-'
map keys terminated by ':'
lines terminated by '\n'
;

create table tb_insert_multi_02(
id      int,
name    string,
hobby   array<string>,
add     map<String,string>
)
partitioned by(part_tag5 string,part_tag6 int)
row format delimited
fields terminated by ','
collection items terminated by '-'
map keys terminated by ':'
lines terminated by '\n'
;

create table tb_insert_multi_03(
id      int,
name    string,
hobby   array<string>,
add     map<String,string>
)
row format delimited
fields terminated by ','
collection items terminated by '-'
map keys terminated by ':'
lines terminated by '\n'
;

# 一次多表 overwrite\into\分区\非分区
from tb_load_data
insert overwrite table tb_insert_multi_01 partition(part_tag7=400,part_tag8='multi_01') select id,name,hobby,add
insert into table tb_insert_multi_02 partition(part_tag5,part_tag6) select id,name,hobby,add,part_tag1,part_tag2
insert into tb_insert_multi_03 select id,name,hobby,add
;

# 加载完成后测试载入数据情况
select * from tb_insert_multi_01;
select * from tb_insert_multi_02;
select * from tb_insert_multi_03;

3、查询结果写入文件系统(本地\hdfs)

语法:

INSERT OVERWRITE [LOCAL] DIRECTORY directory1
  [ROW FORMAT row_format] [STORED AS file_format] (Note: Only available starting with Hive 0.11.0)
  SELECT ... FROM ...
 
# Hive extension (multiple inserts):
FROM from_statement
INSERT OVERWRITE [LOCAL] DIRECTORY directory1 select_statement1
[INSERT OVERWRITE [LOCAL] DIRECTORY directory2 select_statement2] ...
 
  
# row_format
  : DELIMITED [FIELDS TERMINATED BY char [ESCAPED BY char]] [COLLECTION ITEMS TERMINATED BY char]
        [MAP KEYS TERMINATED BY char] [LINES TERMINATED BY char]
        [NULL DEFINED AS char] (Note: Only available starting with Hive 0.13)

示例:

# 写入本地文件系统
insert overwrite local directory '/usr/local/hive-2.1.1/data_dir/tb_insert_multi_02'
row format delimited
fields terminated by ','
collection items terminated by '-'
map keys terminated by ':'
lines terminated by '\n'
stored as textfile
select * from  tb_insert_multi_02
;

# 写入HDFS目录
insert overwrite  directory '/tmp/hive/tb_insert_multi_02'
row format delimited
fields terminated by ','
collection items terminated by '-'
map keys terminated by ':'
lines terminated by '\n'
stored as textfile
select * from  tb_insert_multi_02
;

# 扩展语句一次分别写入本地和HDFS
FROM tb_insert_multi_02
INSERT OVERWRITE LOCAL DIRECTORY '/usr/local/hive-2.1.1/data_dir/tb_insert_multi_02_local' select id,name,hobby,add
INSERT OVERWRITE DIRECTORY '/tmp/hive/tb_insert_multi_02_hdfs' select *
;

4、将指定的值插入Hive表

不能插入复合数据类型,如集合,数组等

语法

INSERT INTO TABLE tablename [PARTITION (partcol1[=val1], partcol2[=val2] ...)] VALUES values_row [, values_row ...]
  
Where values_row is:
( value [, value ...] )
where a value is either null or any valid SQL literal

示例

# 建表
CREATE TABLE students (name VARCHAR(64), age INT, gpa DECIMAL(3, 2));
# 插入数据
INSERT INTO TABLE students VALUES ('fred flintstone', 35, 1.28), ('barney rubble', 32, 2.32);
 
# 复杂数据类型插入是提示错误
0: jdbc:hive2://node225:10000/db01> insert into table tb_insert_multi_02 partition(part_tag5,part_tag6) values (4,huafeng,'["music","book"]','{"heilongjiang":"haerbin"}','third',500);
FAILED: SemanticException [Error 10293]: Unable to create temp file for insert values Expression of type TOK_TABLE_OR_COL not supported in insert/values
Error: Error while compiling statement: FAILED: SemanticException [Error 10293]: Unable to create temp file for insert values Expression of type TOK_TABLE_OR_COL not supported in insert/values (state=42000,code=10293)

5、更新(update)合并(merge)删除(delete)

Update\Merge\Delete  can only be performed on tables that support ACID,此处遗留一个点,Hive的事务开启。。

语法

# update
UPDATE tablename SET column = value [, column = value ...] [WHERE expression]

# delete
DELETE FROM tablename [WHERE expression]

# merge
MERGE INTO <target table> AS T USING <source expression/table> AS S
ON <boolean expression1>
WHEN MATCHED [AND <boolean expression2>] THEN UPDATE SET <set clause list>
WHEN MATCHED [AND <boolean expression3>] THEN DELETE
WHEN NOT MATCHED [AND <boolean expression4>] THEN INSERT VALUES<value list>

Hive LanguageManual DML

转载于:https://my.oschina.net/peakfang/blog/2243706

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值