Hudi SQL语法

CREATE TABLE

CREATE TABLE命令通过指定带有表属性的字段列表来创建Hudi Table。
格式:

CREATE TABLE [ IF NOT EXISTS] [database_name.]table_name
[ (columnTypeList)]
USING hudi
[ COMMENT table_comment ]
[ LOCATION location_path ]
[ OPTIONS (options_list) ]

创建非分区表
创建一个cow内部表

create table if not exists hudi_table0 (
id int,
name string,
price double
)  using hudi
options (type = 'cow',
primaryKey = 'id'
);

创建一个mor外部表

create table if not exists hudi_table1 (
id int,
name string,
price double,
ts bigint
)  using hudi
location '/tmp/hudi/hudi_table1'
options (
type = 'mor',
primaryKey = 'id,name',
preCombineField = 'ts'
);

创建分区表

create table if not exists hudi_table_p0 (
id bigint,
name string,
ts bigint,
dt string,
hh string
)  using hudi
location '/tmp/hudi/hudi_table_p0'
options (
type = 'cow',
primaryKey = 'id',
preCombineField = 'ts'
)
partitioned by (dt, hh);

以SQL方式创建一个hudi表的外表,与spark-shell or deltastreamer方式创建的hudi外表相同

create table h_p1
using hudi
options (
primaryKey = 'id',
preCombineField = 'ts'
)
partitioned by (dt)
location '/path/to/hudi';
创建表指定表属性
create table if not exists h3(
id bigint,
name string,
price double
) using hudi
options (
primaryKey = 'id',
type = 'mor',
hoodie.cleaner.fileversions.retained = '20',
hoodie.keep.max.commits = '20'
);

备注:Hudi当前不支持使用char、varchar、tinyint、smallint类型,建议使用string或int类型。

CREATE TABLE AS SELECT

CREATE TABLE As SELECT命令通过指定带有表属性的字段列表来创建Hudi Table。
格式:

CREATE TABLE [ IF NOT EXISTS] [database_name.]table_name
USING hudi
[ COMMENT table_comment ]
[ LOCATION location_path ]
[ OPTIONS (options_list) ]
[ AS query_statement ]

创建分区表

create table h2 using hudi
options (type = 'cow', primaryKey = 'id')
partitioned by (dt)
as
select 1 as id, 'a1' as name, 10 as price, 1000 as dt;

创建非分区表

create table h3 using hudi
as
select 1 as id, 'a1' as name, 10 as price;

从parquet表加载数据到hudi表
创建parquet表

create table parquet_mngd using parquet options(path=’hdfs:///tmp/parquet_dataset/*.parquet’);

CTAS创建hudi表

create table hudi_tbl using hudi location 'hdfs:///tmp/hudi/hudi_tbl/' options (
type = 'cow',
primaryKey = 'id',
preCombineField = 'ts'
)
partitioned by (datestr) as select * from parquet_mngd;

备注:为了更好的加载数据性能,CTAS使用bulk insert作为写入方式。

DROP TABLE

格式:

DROP TABLE [IF EXISTS] [db_name.]table_name;

删除表

DROP TABLE IF EXISTS hudidb.h1;

SHOW TABLE

展示当前库中所有表

SHOW TABLES IN hudidb;

ALTER RENAME TABLE

重命名表

ALTER TABLE oldTableName RENAME TO newTableName
alter table h0 rename to h0_1;

ALTER ADD COLUMNS

添加新列

ALTER TABLE tableIdentifier ADD COLUMNS(colAndType (,colAndType)*)
alter table h0_1 add columns(ext0 string);

TRUNCATE TABLE

清空表数据

TRUNCATE TABLE tableIdentifier
truncate table h0_1;

INSERT INTO

Hudi对于设置了主键的表支持三种Insert模式,默认为upsert。

1、trict模式,Insert 语句将保留 COW 表的主键唯一性约束,不允许重复记录。如果在插入过程中已经存在记录,则会为 COW 表抛出异常,对于MOR表,该模式与upsert模式行为一致。
2、non-strict模式,对主键表采用insert处理。
3、upsert模式,对于主键表的重复值进行更新操作。

insert into h0 select 1, 'a1', 20;

-- insert static partition
insert into h_p0 partition(dt = '2021-01-02') select 1, 'a1';

-- insert dynamic partition
insert into h_p0 select 1, 'a1', dt;

-- insert dynamic partition
insert into h_p1 select 1 as id, 'a1', '2021-01-03' as dt, '19' as hh;

-- insert overwrite table
insert overwrite table h0 select 1, 'a1', 20;

-- insert overwrite table with static partition
insert overwrite h_p0 partition(dt = '2021-01-02') select 1, 'a1';

-- insert overwrite table with dynamic partition
insert overwrite table h_p1 select 2 as id, 'a2', '2021-01-03' as dt, '19' as hh;

MERGE INTO

通过MERGE INTO命令,根据一张表或子查询的连接条件对另外一张表进行查询,连接条件匹配上的进行UPDATE或DELETE,无法匹配的执行INSERT。这个语法仅需要一次全表扫描就完成了全部同步工作,执行效率要高于INSERT+UPDATE。

格式:

MERGE INTO tableIdentifier AS target_alias

USING (sub_query | tableIdentifier) AS source_alias

ON <merge_condition>

[ WHEN MATCHED [ AND <condition> ] THEN <matched_action> ]

[ WHEN MATCHED [ AND <condition> ] THEN <matched_action> ]

[ WHEN NOT MATCHED [ AND <condition> ] THEN <not_matched_action> ]


<merge_condition> =A equal bool condition

<matched_action> =

DELETE |

UPDATE SET * |

UPDATE SET column1 = expression1 [, column2 = expression2 ...]

<not_matched_action> =

INSERT * |

INSERT (column1 [, column2 ...]) VALUES (value1 [, value2 ...])

1.merge-on condition当前只支持主键列。
2.当前仅支持对COW表进行部分字段的更新。

merge into h0 using s0
on h0.id = s0.id
when matched then update set h0.id = s0.id,price = s0.price * 2

3.当前仅支持对COW表进行更新时,目标表的字段出现在更新表达式的右值。

merge into h0 using s0 on h0.id = s0.id
when matched then update set id = s0.id,
name = h0.name,
price = s0.price + h0.price

4.如果针对部分字段进行更新,更新值必须包含主键列:Hudi表使用id和start_dt作为符合主键。

merge into hudi_test3 D using (select id,start_dt from tmp_tb2 where col2 in ('D', 'U')) N
on (N.id = D.id and N.start_dt = D.start_dt)
when matched then update set
D.id = N.id,
D.start_dt = N.start_dt,
D.end_dt = '2022-04-28';
merge into h0 as target
using (
select id, name, price, flag from s
) source
on target.id = source.id
when matched then update set *
when not matched then insert *;

merge into h0
using (
select id, name, price, flag from s
) source
on h0.id = source.id
when matched and flag != 'delete' then update set id = source.id, name = source.name, price = source.price * 2
when matched and flag = 'delete' then delete
when not matched then insert (id,name,price) values(source.id, source.name, source.price);

merge into t0 as target
using s0 source
on target.id = source.id
when matched then update set *
when not matched then insert *;

UPDATE

格式

update h0 set price = price + 20 where id = 1;
update h0 set price = price *2, name = 'a2' where id = 2;

DELETE

格式

DELETE from tableIdentifier [ WHERE boolExpression]
delete from h0 where column1 = 'country';

delete from h0 where column1 IN ('country1', 'country2');

delete from h0 where column1 IN (select column11 from sourceTable2);

delete from h0 where column1 IN (select column11 from sourceTable2 where column1 = 'USA');

delete from h0;

COMPACTION

缩( compaction)用于在 MergeOnRead表将基于行的log日志文件转化为parquet列式数据文件,用于加快记录的查找。

格式

schedule compaction  on h1;
show compaction on h1;
run compaction on h1 at 20210915170758;

schedule compaction  on '/tmp/hudi/h1';
run compaction on '/tmp/hudi/h1';

SAVE_POINT

管理Hudi表的savepoint。

格式
创建savepoint:

call create_savepoints('[table_name]', '[commit_Time]', '[user]', '[comments]');

查看所有存在的savepoint

call show_savepoints(table =>'[table_name]');

回滚savepoint:

call rollback_savepoints('[table_name]', '[commit_Time]');
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

王旭亮_

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值