1,创建一个kafka的表
%flink.ssql
DROP TABLE IF EXISTS logtail;--创建kafka表
CREATE TABLE logtail (
order_state_tag int
......................
) WITH (
'connector' = 'kafka',
'topic' = 'ods.rds_core.plateform_stable.assure_orders',
'properties.bootstrap.servers' = 'dev-ct6-dc-worker01:9092,dev-ct6-dc-worker02:9092,dev-ct6-dc-worker03:9092',
'properties.group.id' = 'testGroup2',
'format' = 'canal-json',
'scan.startup.mode' = 'earliest-offset'
)
2,创建hudi表
%flink.ssql
drop table if exists hudi_order_ods_test ;
CREATE TABLE hudi_order_ods_test(
order_number varchar ,
order_key varchar ,
ts TIMESTAMP(3),
PRIMARY KEY(order_number) NOT ENFORCED
)
WITH (
'connector' = 'hudi',
'path' = 'hdfs://bi-524:8020/tmp/hudi/hudi_order_ods_test',
'table.type' = 'MERGE_ON_READ',
'read.streaming.enabled'= 'true',
'write.tasks'= '1',
'compaction.tasks'= '1',
'read.streaming.check-interval'= '30',
'write.insert.drop.duplicates' = 'true'
);
3,同时读写
%flink.ssql(type=update)
insert into hudi_order_ods_test select order_number,order_key,TO_TIMESTAMP(FROM_UNIXTIME(create_date / 1000, 'yyyy-MM-dd HH:mm:ss')) as ts from logtail;
%flink.ssql(type=update)
-- select count(distinct(order_number)) from hudi_order_ods_test4;
select * from hudi_order_ods_test;
4,关闭读任务,再次启动读任务
报错:
5,查看日志
2021-05-07 15:01:15,806 INFO org.apache.hudi.common.table.view.HoodieTableFileSystemView [] - Adding file-groups for partition :, #FileGroups=2
2021-05-07 15:01:15,806 INFO org.apache.hudi.common.table.view.AbstractTableFileSystemView [] - addFilesToView: NumFiles=14, NumFileGroups=2, FileGroupsCreationTime=1, StoreTimeTaken=0
2021-05-07 15:01:15,806 INFO org.apache.hudi.common.table.view.AbstractTableFileSystemView [] - Time to load partition () =5
2021-05-07 15:01:15,806 INFO org.apache.hudi.common.table.view.AbstractTableFileSystemView [] - Pending Compaction instant for (FileSlice {fileGroupId=HoodieFileGroupId{partitionPath='', fileId='61a65cf3-ae91-4e77-af19-26d9c1cea230'}, baseCommitTime=20210507150056, baseFile='HoodieBaseFile{fullPath=hdfs://bi-524:8020/tmp/hudi/hudi_order_ods_test/61a65cf3-ae91-4e77-af19-26d9c1cea230_0-1-0_20210507150056.parquet, fileLen=439367, BootstrapBaseFile=null}', logFiles='[HoodieLogFile{pathStr='hdfs://bi-524:8020/tmp/hudi/hudi_order_ods_test/.61a65cf3-ae91-4e77-af19-26d9c1cea230_20210507150056.log.1_0-1-0', fileLen=1885}]'}) is :Optional.empty
2021-05-07 15:01:15,807 INFO org.apache.flink.runtime.taskmanager.Task [] - split_reader -> SinkConversionToTuple2 (2/4)#239 (0478013d0ec2a918eada606c0c21421b) switched from CANCELING to CANCELED.
2021-05-07 15:01:15,807 INFO org.apache.flink.runtime.taskmanager.Task [] - Freeing task resources for split_reader -> SinkConversionToTuple2 (2/4)#239 (0478013d0ec2a918eada606c0c21421b).
2021-05-07 15:01:15,807 INFO org.apache.flink.runtime.taskexecutor.TaskExecutor [] - Un-registering task and sending final execution state CANCELED to JobManager for task split_reader -> SinkConversionToTuple2 (1/4)#239 acf040250b4fa87d6ae4b8b2ac06df5d.
2021-05-07 15:01:15,808 INFO org.apache.flink.runtime.taskexecutor.TaskExecutor [] - Un-registering task and sending final execution state CANCELED to JobManager for task split_reader -> SinkConversionToTuple2 (3/4)#239 626f50defe9f75cb4523619fd09c72f7.
2021-05-07 15:01:15,808 INFO org.apache.flink.runtime.taskexecutor.TaskExecutor [] - Un-registering task and sending final execution state CANCELED to JobManager for task split_reader -> SinkConversionToTuple2 (2/4)#239 0478013d0ec2a918eada606c0c21421b.
2021-05-07 15:01:15,813 INFO org.apache.hudi.table.MarkerFiles [] - Creating Marker Path=hdfs://bi-524:8020/tmp/hudi/hudi_order_ods_test/.hoodie/.temp/20210507150106/61a65cf3-ae91-4e77-af19-26d9c1cea230_0-1-0_20210507150056.parquet.marker.APPEND
2021-05-07 15:01:15,820 INFO org.apache.hudi.common.table.log.HoodieLogFormat$WriterBuilder [] - Building HoodieLogFormat Writer
2021-05-07 15:01:15,820 INFO org.apache.hudi.common.table.log.HoodieLogFormat$WriterBuilder [] - HoodieLogFile on path hdfs://bi-524:8020/tmp/hudi/hudi_order_ods_test/.61a65cf3-ae91-4e77-af19-26d9c1cea230_20210507150056.log.1_0-1-0
2021-05-07 15:01:15,822 INFO org.apache.hudi.common.table.log.HoodieLogFormatWriter [] - HoodieLogFile{pathStr='hdfs://bi-524:8020/tmp/hudi/hudi_order_ods_test/.61a65cf3-ae91-4e77-af19-26d9c1cea230_20210507150056.log.1_0-1-0', fileLen=1885} exists. Appending to existing file
2021-05-07 15:01:15,865 INFO org.apache.hudi.io.HoodieAppendHandle [] - AppendHandle for partitionPath filePath .61a65cf3-ae91-4e77-af19-26d9c1cea230_20210507150056.log.1_0-1-0, took 85 ms.
2021-05-07 15:01:15,880 INFO org.apache.hudi.common.fs.FSUtils [] - Hadoop Configuration: fs.defaultFS: [hdfs://bi-524:8020], Config:[Configuration: core-default.xml, core-site.xml, mapred-default.xml, mapred-site.xml, yarn-default.xml, yarn-site.xml, hdfs-default.xml, hdfs-site.xml, /etc/hadoop/conf.cloudera.hdfs/core-site.xml, /etc/hadoop/conf.cloudera.hdfs/hdfs-site.xml], FileSystem: [DFS[DFSClient[clientName=DFSClient_NONMAPREDUCE_1220740860_94, ugi=OpsUser (auth:SIMPLE)]]]
2021-05-07 15:01:15,881 ERROR org.apache.hudi.source.StreamReadMonitoringFunction [] - Get write status of path: hdfs://bi-524:8020/tmp/hudi/hudi_order_ods_test/.61a65cf3-ae91-4e77-af19-26d9c1cea230_20210507145735.log.1_0-1-0 error
2021-05-07 15:01:15,882 INFO org.apache.flink.runtime.taskmanager.Task [] - Source: streaming_source (1/1)#239 (21419e6d2a499d0d6702212854e2d83d) switched from CANCELING to CANCELED.
2021-05-07 15:01:15,882 INFO org.apache.flink.runtime.taskmanager.Task [] - Freeing task resources for Source: streaming_source (1/1)#239 (21419e6d2a499d0d6702212854e2d83d).
2021-05-07 15:01:15,882 INFO org.apache.flink.runtime.taskexecutor.TaskExecutor [] - Un-registering task and sending final execution state CANCELED to JobManager for task Source: streaming_source (1/1)#239 21419e6d2a499d0d6702212854e2d83d.
2021-05-07 15:01:15,891 INFO org.apache.hudi.common.table.HoodieTableMetaClient [] - Loading HoodieTableMetaClient from hdfs://bi-524:8020/tmp/hudi/hudi_order_ods_test
2021-05-07 15:01:15,892 INFO org.apache.hudi.io.FlinkAppendHandle [] - Closing the file 61a65cf3-ae91-4e77-af19-26d9c1cea230 as we are done with all the records 0
2021-05-07 15:01:15,892 INFO org.apache.hudi.common.table.HoodieTableMetaClient [] - Loading HoodieTableMetaClient from hdfs://bi-524:8020/tmp/hudi/hudi_order_ods_test
2021-05-07 15:01:15,893 INFO org.apache.hudi.common.fs.FSUtils [] - Hadoop Configuration: fs.defaultFS: [hdfs://bi-524:8020], Config:[Configuration: core-default.xml, core-site.xml, mapred-default.xml, mapred-site.xml, yarn-default.xml, yarn-site.xml, hdfs-default.xml, hdfs-site.xml, /etc/hadoop/conf.cloudera.hdfs/core-site.xml, /etc/hadoop/conf.cloudera.hdfs/hdfs-site.xml], FileSystem: [DFS[DFSClient[clientName=DFSClient_NONMAPREDUCE_1220740860_94, ugi=OpsUser (auth:SIMPLE)]]]
2021-05-07 15:01:15,894 INFO org.apache.hudi.client.HoodieFlinkWriteClient [] - Cleaner has been spawned already. Waiting for it to finish
2021-05-07 15:01:15,894 INFO org.apache.hudi.client.AsyncCleanerService [] - Waiting for async cleaner to finish
2021-05-07 15:01:15,894 INFO org.apache.hudi.client.HoodieFlinkWriteClient [] - Cleaner has finished
2021-05-07 15:01:15,894 INFO org.apache.hudi.sink.CleanFunction [] - Executor executes action [wait for cleaning finish] success!
2021-05-07 15:01:15,894 INFO org.apache.hudi.common.table.HoodieTableConfig [] - Loading table properties from hdfs://bi-524:8020/tmp/hudi/hudi_order_ods_test/.hoodie/hoodie.properties
2021-05-07 15:01:15,896 INFO org.apache.hudi.common.table.HoodieTableMetaClient [] - Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=PARQUET) from hdfs://bi-524:8020/tmp/hudi/hudi_order_ods_test
2021-05-07 15:01:15,896 INFO org.apache.hudi.common.table.HoodieTableMetaClient [] - Loading Active commit timeline for hdfs://bi-524:8020/tmp/hudi/hudi_order_ods_test
2021-05-07 15:01:15,899 INFO org.apache.hudi.common.table.timeline.HoodieActiveTimeline [] - Loaded instants [[20210507144916__rollback__COMPLETED], [20210507145145__clean__COMPLETED], [20210507145235__clean__COMPLETED], [20210507145325__clean__COMPLETED], [20210507145415__clean__COMPLETED], [20210507145505__clean__COMPLETED], [20210507145555__clean__COMPLETED], [20210507145645