flinksql kafka写数据进hudi,数据写不进hudi。以下是测试的代码
CREATE TABLE IF NOT EXISTS dwd_dm_day_kafka (
`day_id` int,
`month_id` int,
`qn_id` int,
`year_id` int,
`phycial_delete` int
) WITH (
'connector' = 'kafka',
'topic' = 'dwd_dm_day_hudi_01',
'properties.bootstrap.servers' = 'master1.betacdh.com:9092,work1.betacdh.com:9092,work2.betacdh.com:9092',
'scan.startup.mode' = 'earliest-offset',
'format' = 'json'
);
CREATE TABLE dwd_dm_day_hudi(
`day_id` int,
`month_id` int,
`qn_id` int,
`year_id` int,
`phycial_delete` int,
PRIMARY KEY (`day_id`) NOT ENFORCED
) COMMENT '订单表'
WITH (
'connector' = 'hudi'
, 'path' = 'hdfs://work1.betacdh.com:8020/test_sdc/dwd_dm_day_hudi'
, 'table.type' = 'MERGE_ON_READ'
, 'write.option' = 'insert'
, 'hive_sync.enable' = 'true'
, 'hive_sync.mode' = 'hms'
, 'hive_sync.metastore.uris' = 'thrift://work1.betacdh.com:9083'
, 'hive_sync.table' = 'dwd_dm_day_hudi'
, 'hive_sync.db' = 'test_sdc'
,'write.tasks' = '1'
,'compaction.tasks' = '1'
,'compaction.async.enable' = 'true'
,'compaction.trigger.strategy' = 'num_commits'
,'compaction.delta_commits' = '1'
);
insert into dwd_dm_day_hudi select day_id,month_id,qn_id,year_id,phycial_delete from dwd_dm_day_kafka;
以上情况数据并不能写进hudi。
但是用insert into可以
CREATE TABLE dwd_dm_day_hudi(
`day_id` int,
`month_id` int,
`qn_id` int,
`year_id` int,
`phycial_delete` int,
PRIMARY KEY (`day_id`) NOT ENFORCED
) COMMENT '订单表'
WITH (
'connector' = 'hudi'
, 'path' = 'hdfs://work1.betacdh.com:8020/test_sdc/dwd_dm_day_hudi'
, 'table.type' = 'MERGE_ON_READ'
, 'write.option' = 'insert'
, 'hive_sync.enable' = 'true'
, 'hive_sync.mode' = 'hms'
, 'hive_sync.metastore.uris' = 'thrift://work1.betacdh.com:9083'
, 'hive_sync.table' = 'dwd_dm_day_hudi'
, 'hive_sync.db' = 'test_sdc'
,'write.tasks' = '1'
,'compaction.tasks' = '1'
,'compaction.async.enable' = 'true'
,'compaction.trigger.strategy' = 'num_commits'
,'compaction.delta_commits' = '1'
);
insert into dwd_dm_day_hudi values(3333,3333,3333,3333,3333);
直接将值insert into 是可以将数据写入hudi。
这个问题困扰了我一天了。今早无意中发现开启checkpoint后数据成功写入hudi。代码示例如下:
set execution.checkpointing.interval=20000;
-- 保存checkpoint文件的目录
set state.checkpoints.dir=hdfs:///flink/flink-checkpoints;
-- 任务取消后保留checkpoint,默认值NO_EXTERNALIZED_CHECKPOINTS,
-- 可选值NO_EXTERNALIZED_CHECKPOINTS、DELETE_ON_CANCELLATION、RETAIN_ON_CANCELLATION
set execution.checkpointing.externalized-checkpoint-retention=RETAIN_ON_CANCELLATION;
-- checkpoint模式,默认值EXACTLY_ONCE,可选值:EXACTLY_ONCE、AT_LEAST_ONCE
-- 要想支持EXACTLY_ONCE,需要sink端支持事务
set execution.checkpointing.mode=EXACTLY_ONCE;
-- checkpoint超时时间,默认10分钟
set execution.checkpointing.timeout=600000;
-- checkpoint文件保留数,默认1
set state.checkpoints.num-retained=3;
CREATE TABLE IF NOT EXISTS dwd_dm_day_kafka (
`day_id` int,
`month_id` int,
`qn_id` int,
`year_id` int,
`phycial_delete` int
) WITH (
'connector' = 'kafka',
'topic' = 'dwd_dm_day_hudi_01',
'properties.bootstrap.servers' = 'master1.betacdh.com:9092,work1.betacdh.com:9092,work2.betacdh.com:9092',
'scan.startup.mode' = 'earliest-offset',
'format' = 'json'
);
CREATE TABLE dwd_dm_day_hudi(
`day_id` int,
`month_id` int,
`qn_id` int,
`year_id` int,
`phycial_delete` int,
PRIMARY KEY (`day_id`) NOT ENFORCED
) COMMENT '订单表'
WITH (
'connector' = 'hudi'
, 'path' = 'hdfs://work1.betacdh.com:8020/test_sdc/dwd_dm_day_hudi'
, 'table.type' = 'MERGE_ON_READ'
, 'write.option' = 'insert'
, 'hive_sync.enable' = 'true'
, 'hive_sync.mode' = 'hms'
, 'hive_sync.metastore.uris' = 'thrift://work1.betacdh.com:9083'
, 'hive_sync.table' = 'dwd_dm_day_hudi'
, 'hive_sync.db' = 'test_sdc'
,'write.tasks' = '1'
,'compaction.tasks' = '1'
,'compaction.async.enable' = 'true'
,'compaction.trigger.strategy' = 'num_commits'
,'compaction.delta_commits' = '1'
);
insert into dwd_dm_day_hudi select day_id,month_id,qn_id,year_id,phycial_delete from dwd_dm_day_kafka;