CREATE CATALOG hudi_catalog
WITH (
'type'='hudi',
'catalog.path' = '/warehouse/tablespace/external/hive/',
'hive.conf.dir' = '/opt/cloudera/parcels/CDH-7.1.7-1.cdh7.1.7.p0.15945976/lib/hive/conf',
'mode'='hms'
);
use CATALOG hudi_catalog;
create database hudi_test;
use hudi_test;
create table hudi_test_0620_1
(
id STRING PRIMARY KEY NOT ENFORCED,
cost DOUBLE COMMENT '费用',
`data_dt` STRING
) PARTITIONED BY (data_dt)
WITH (
'connector' = 'hudi',
'path' = 'hdfs:///warehouse/tablespace/external/hive/hudi_test.db/hudi_test_0620_1',
'ROW FORMAT SERDE'='org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe',
'STORED AS INPUTFORMAT'='org.apache.hudi.hadoop.HoodieParquetInputFormat',
'OUTPUTFORMAT'='org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat',
'LOCATION'='hdfs:///warehouse/tablespace/external/hive/hudi_test.db/hudi_test_0620_1',
'hoodie.datasource.write.keygenerator.class' = 'org.apache.hudi.keygen.ComplexAvroKeyGenerator',
'hoodie.datasource.write.recordkey.field' = 'id',
'hoodie.datasource.write.hive_style_partitioning' = 'true',
'hive_sync.enable' = 'true',
'hive_sync.mode' = 'hms',
'hive_sync.metastore.uris' = 'thrift://node20:9083',
'hive_sync.conf.dir'='/opt/cloudera/parcels/CDH/lib/hive/conf',
'hive_sync.db' = 'hudi_test',
'hive_sync.table' = 'hudi_test_0620_1',
'hive_sync.partition_fields' = 'data_dt',
'hive_sync.partition_extractor_class' = 'org.apache.hudi.hive.HiveStylePartitionValueExtractor'
);
insert into hudi_test.hudi_test_0620_1 values ('hudi', 10100.01, '2022-10-31'),
('spark', 10100.02, '2023-05-26'),
('flink', 10100.06, '2023-05-27'),
('hive', 10100.00, '2023-05-28');
select * from hudi_test_0620_1 where `data_dt` = '2023-05-28';