flink cdc2.3.0读取mysql导入hudi0.11.1

首先编辑mysql的my.cnf  添加以下后重启  service mysqld restart

#开启binlog的配置
log_bin=mysql_bin
binlog-format=Row
server-id=1
expire_logs_days = 7
max_binlog_size = 500M
#需要同步的数据库名称
binlog-do-db=cdctest
#忽略数据库
binlog-ignore-db=mysql
maven 依赖如下

    <properties>
        <maven.compiler.source>8</maven.compiler.source>
        <maven.compiler.target>8</maven.compiler.target>
        <flink.version>1.14.5</flink.version>
        <java.version>1.8</java.version>
        <slf4j.version>1.7.30</slf4j.version>
        <scala.binary.version>2.12</scala.binary.version>
    </properties>
    <dependencies>
        <!-- 引入日志管理相关依赖-->
        <dependency>
            <groupId>org.slf4j</groupId>
            <artifactId>slf4j-api</artifactId>
            <version>${slf4j.version}</version>
        </dependency>
        <dependency>
            <groupId>org.slf4j</groupId>
            <artifactId>slf4j-log4j12</artifactId>
            <version>${slf4j.version}</version>
        </dependency>

        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>3.2.0</version>
        </dependency>

        <dependency>
            <groupId>org.apache.hudi</groupId>
            <artifactId>hudi-flink1.14-bundle_${scala.binary.version}</artifactId>
            <version>0.11.1</version>
        </dependency>

        <dependency>
            <groupId>org.slf4j</groupId>
            <artifactId>slf4j-api</artifactId>
            <version>${slf4j.version}</version>
        </dependency>
        <dependency>
            <groupId>org.slf4j</groupId>
            <artifactId>slf4j-log4j12</artifactId>
            <version>${slf4j.version}</version>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-api-scala-bridge_${scala.binary.version}</artifactId>
            <version>${flink.version}</version>
            <scope>provided</scope>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-api-scala_${scala.binary.version}</artifactId>
            <version>${flink.version}</version>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-api-scala_${scala.binary.version}</artifactId>
            <version>${flink.version}</version>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-planner_${scala.binary.version}</artifactId>
            <version>${flink.version}</version>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-clients_${scala.binary.version}</artifactId>
            <version>${flink.version}</version>
        </dependency>

        <dependency>
            <groupId>com.ververica</groupId>
            <artifactId>flink-connector-mysql-cdc</artifactId>
            <version>2.3.0</version>
        </dependency>

        <dependency>
            <groupId>mysql</groupId>
            <artifactId>mysql-connector-java</artifactId>
            <version>8.0.27</version>
        </dependency>

        <dependency>
            <groupId>org.apache.hive</groupId>
            <artifactId>hive-exec</artifactId>
            <version>3.1.2</version>
        </dependency>
    </dependencies>

flink 代码如下 

object FlinkCDC {
  def main(args: Array[String]): Unit = {
    val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment()
    val tableEnv: StreamTableEnvironment = StreamTableEnvironment.create(env)
    FlinkUtils.initKerberos()
    FlinkUtils.initCheckPoint(env)
    env.setParallelism(1)

    val timestamp = 1676472746000L
    //使用Flink sql流读
    tableEnv.executeSql("" +
      " CREATE TABLE user_info " +
      "( id int PRIMARY KEY NOT ENFORCED ," +
      "  name STRING," +
      "  sex STRING ) WITH (  " +
      "  'connector' = 'mysql-cdc'," +
      "  'hostname' = '127.0.0.1'," +
      "  'port' = '3306'," +
      "  'username' = 'root'," +
      "  'password' = '123456'," +
      "  'server-time-zone' = 'Asia/Shanghai'," +
      "  'database-name' = 'zht'," +
      "  'table-name' = 'myuserinfo'," +
      "   'scan.startup.mode' = 'timestamp'," +
      "  'scan.startup.timestamp-millis'  = '"+timestamp+"'," +   //timestamp可以作为参数传入
      "    'scan.snapshot.fetch.size' ='1024'," +
      "  'debezium.mysql.include.schema.changes'='true'," +
      "  'debezium.snapshot.locking.mode' = 'none') ")

    //'scan.startup.mode' = 'initial',
    //'scan.startup.mode' = 'timestamp',
    //'scan.startup.timestamp-millis'  ='1676471846000',

     // 'scan.startup.timestamp-millis'  =""
    // 'scan.startup.mode' = 'earliest-offset',

    val date = new Date()
    val ts: Long = date.getTime
    val partitionpath_format: SimpleDateFormat = new SimpleDateFormat("yyyy")
    val years = partitionpath_format.format(date)

    val tstable: Table = tableEnv.sqlQuery("select * from user_info").addColumns(lit(ts), lit(years))
    tableEnv.toChangelogStream(tstable).print()



    tableEnv.executeSql(
      """
        |  CREATE TABLE hudi_user_info(
        |   id      int PRIMARY KEY NOT ENFORCED,
        |   name    String,
        |   sex     String,
        |   ts       bigint,
        |   years     String)
        |  PARTITIONED BY (years)
        |  WITH (
        |  'connector' = 'hudi',
        |  'path' = 'hdfs://xxxx:8020/datas/hudi_datas/hive/ods/test/hudi_user_info',
        |  'table.type' = 'COPY_ON_WRITE',
        |  'hoodie.datasource.write.recordkey.field' = 'id,ts',
        |  'hoodie.datasource.write.partitionpath.field'='years',
        |  'hoodie.datasource.write.hive_style_partitioning' = 'true',
        |  'hoodie.datasource.write.partitionpath.urlencode' = 'true',
        |  'hoodie.datasource.write.operation' ='upsert',
        |  'write.precombine' = 'true',
        |  'write.precombine.field' ='ts',
        |  'hoodie.index.type'='BLOOM',
        |  'hive_sync.enable' = 'true',
        |  'hive_sync.mode' = 'hms',
        |  'hive_sync.use_jdbc' = 'false',
        |  'hive_sync.metastore.uris' = 'thrift://xxxx:9083',
        |  'hive_sync.db' = 'hudi_ods',
        |  'changelog.enabled'= 'true',
        |  'hive_sync.table' = 'hudi_user_info',
        |  'hive_sync.assume_date_partitioning' = 'true',
        |  'hive_sync.partition_fields' = 'years',
        |  'hive_sync.support_timestamp'= 'true',
        |  'write.keygenerator.class' = 'org.apache.hudi.keygen.ComplexAvroKeyGenerator',
        |  'hive_sync.partition_extractor_class' = 'org.apache.hudi.hive.HiveStylePartitionValueExtractor')
        |""".stripMargin)

    tableEnv.executeSql("insert into  hudi_user_info  select * from "+tstable+"")
    env.execute()
  }
}
        tableEnv.executeSql(" CREATE TABLE  `pg_policy_declaration` (  " +
                "     `id` STRING  " +
                "     ,`declaration_start_date` DATE  " +
                "     ,`declaration_end_date` DATE  " +
                "     ,PRIMARY KEY ( `id` ) NOT ENFORCED  " +
                ") WITH (" +
                " 'connector' = 'postgres-cdc'," +
                " 'hostname' = '127.0.0.1'," +
                " 'port' = '5432'," +
                " 'username' = 'postgres'," +
                " 'password' = '123456'," +
                " 'database-name' = 'cdctest'," +
                " 'schema-name' = 'public'," +
                " 'decoding.plugin.name'='pgoutput', " +
                " 'table-name' = 'policy_declaration'," +
                " 'schema-name' = 'public'," +
                " 'debezium.snapshot.mode' = 'initial'," +
                " 'debezium.slot.retry.delay.ms' = '10000'," +
                " 'debezium.slot.drop.on.stop' = 'false', " +
                " 'debezium.slot.name' = 'policy_declaration_test' )");

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值