flinkcdc將MySQL數據寫入kafka

15 篇文章 0 订阅

1.導入依賴

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>com.atguigu</groupId>
    <artifactId>atguigu-flink-cdc</artifactId>
    <version>1.0-SNAPSHOT</version>

    <properties>
        <!--        <flink-version>1.14.6</flink-version>-->
        <java.version>1.8</java.version>
        <flink-version>1.13.6</flink-version>
        <maven.compiler.source>${java.version}</maven.compiler.source>
        <maven.compiler.target>${java.version}</maven.compiler.target>
        <scala.version>2.12</scala.version>
        <hadoop.version>3.1.3</hadoop.version>

    </properties>

    <dependencies>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-java</artifactId>
            <version>${flink-version}</version>
        </dependency>

        <!-- https://mvnrepository.com/artifact/org.apache.flink/flink-streaming-java -->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-streaming-java_2.12</artifactId>
            <version>${flink-version}</version>

        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-clients_${scala.version}</artifactId>
            <version>${flink-version}</version>
        </dependency>

        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>3.1.3</version>
        </dependency>

        <dependency>
            <groupId>mysql</groupId>
            <artifactId>mysql-connector-java</artifactId>
            <version>5.1.49</version>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-planner-blink_${scala.version}</artifactId>
            <version>${flink-version}</version>
        </dependency>

        <!-- https://mvnrepository.com/artifact/org.apache.flink/flink-table-planner -->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-planner_2.12</artifactId>
            <version>${flink-version}</version>
        </dependency>


        <dependency>
            <groupId>com.ververica</groupId>
            <artifactId>flink-connector-mysql-cdc</artifactId>
            <version>2.0.2</version>
        </dependency>

        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>fastjson</artifactId>
            <version>1.2.75</version>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-kafka_${scala.version}</artifactId>
            <version>${flink-version}</version>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-json</artifactId>
            <version>${flink-version}</version>
        </dependency>

        <dependency>
            <groupId>org.projectlombok</groupId>
            <artifactId>lombok</artifactId>
            <version>1.18.16</version>
            <scope>compile</scope>
        </dependency>


        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-statebackend-rocksdb_${scala.version}</artifactId>
            <version>${flink-version}</version>
        </dependency>


        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-runtime-web_${scala.version}</artifactId>
            <version>${flink-version}</version>
            <scope>provided</scope>
        </dependency>



    </dependencies>

    <build>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-assembly-plugin</artifactId>
                <version>3.0.0</version>
                <configuration>
                    <descriptorRefs>
                        <descriptorRef>jar-with-dependencies</descriptorRef>
                    </descriptorRefs>
                </configuration>
                <executions>
                    <execution>
                        <id>make-assembly</id>
                        <phase>package</phase>
                        <goals>
                            <goal>single</goal>
                        </goals>
                    </execution>
                </executions>
            </plugin>
        </plugins>
    </build>


</project>

2.主程序


import com.ververica.cdc.connectors.mysql.MySqlSource;
import com.ververica.cdc.connectors.mysql.table.StartupOptions;
import com.ververica.cdc.debezium.DebeziumSourceFunction;
import com.ververica.cdc.debezium.StringDebeziumDeserializationSchema;
import org.apache.flink.api.common.restartstrategy.RestartStrategies;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.runtime.state.filesystem.FsStateBackend;
import org.apache.flink.runtime.state.hashmap.HashMapStateBackend;
import org.apache.flink.streaming.api.CheckpointingMode;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.CheckpointConfig;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer;

import java.util.Properties;

public class FlinkCDC {

    public static void main(String[] args) throws Exception {

        String ckAndGroupIdAndJobName = "test2";
        //1.创建执行环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);

        //2.Flink-CDC将读取binlog的位置信息以状态的方式保存在CK,如果想要做到断点续传,需要从Checkpoint或者Savepoint启动程序
        //2.1 开启Checkpoint,每隔5秒钟做一次CK
        env.enableCheckpointing(5000L);
        //2.2 指定CK的一致性语义
        env.getCheckpointConfig().setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        //2.3 设置任务关闭的时候保留最后一次CK数据
        env.getCheckpointConfig().setExternalizedCheckpointCleanup(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
        //2.4 指定从CK自动重启策略
        env.setRestartStrategy(RestartStrategies.fixedDelayRestart(3, 2000L));


        //2.5 设置状态后端
        env.setStateBackend(new HashMapStateBackend());
        // 3. 设置 checkpoint 的存储路径
        env.getCheckpointConfig().setCheckpointStorage("hdfs://hadoop103:8020/ck/" + ckAndGroupIdAndJobName);
//        env.getCheckpointConfig().setCheckpointStorage("hdfs://mycluster:8020/ck/" + ckAndGroupIdAndJobName);

        //2.6 设置访问HDFS的用户名
        System.setProperty("HADOOP_USER_NAME", "sarah");


        //3.创建Flink-MySQL-CDC的Source
        //initial (default): Performs an initial snapshot on the monitored database tables upon first startup, and continue to read the latest binlog.
        //latest-offset: Never to perform snapshot on the monitored database tables upon first startup, just read from the end of the binlog which means only have the changes since the connector was started.
        //timestamp: Never to perform snapshot on the monitored database tables upon first startup, and directly read binlog from the specified timestamp. The consumer will traverse the binlog from the beginning and ignore change events whose timestamp is smaller than the specified timestamp.
        //specific-offset: Never to perform snapshot on the monitored database tables upon first startup, and directly read binlog from the specified offset.

        DebeziumSourceFunction<String> sourceFunction = MySqlSource.<String>builder()
//                .hostname("hadoop102")
//                .port(3306)
//                .username("root")
//                .password("%3!4xaHXml")
//                .databaseList("student")

                .hostname("wiltechs-sjfood3-replica-i.mysql.rds.aliyuncs.com")
                .port(3306)
                .username("meta_ro")
                .password("hlmabcmY6xWF")
                .databaseList("db_core")
               // .tableList("db_core.tb_readonly_invoice_data") //可选配置项,如果不指定该参数,则会读取上一个配置下的所有表的数据,注意:指定的时候需要使用"db.table"的方式
             // .tableList("db_core.cc_location") //可选配置项,如果不指定该参数,则会读取上一个配置下的所有表的数据,注意:指定的时候需要使用"db.table"的方式


                .startupOptions(StartupOptions.initial())
                .deserializer(new CustomerDeserialization())
//                .deserializer(new StringDebeziumDeserializationSchema())
                .build();

        DataStreamSource<String> streamSource = env.addSource(sourceFunction);

        //4.使用CDC Source从MySQL读取数据
        //3.打印数据并将数据写入 Kafka

        streamSource.addSink(getKafkaProducer("172.16.11.144:9092")).name(ckAndGroupIdAndJobName).uid(ckAndGroupIdAndJobName+"uid2");;


        streamSource.print();
        //4.启动任务
        env.execute("FlinkCDC");

    }


    //kafka 生产者
    public static FlinkKafkaProducer<String> getKafkaProducer(String brokers) {
        return new FlinkKafkaProducer<String>("hadoop104:9092", "db_core_test", new SimpleStringSchema());
    }

}

3.自定義序列化器

import com.alibaba.fastjson.JSONObject;

import com.ververica.cdc.debezium.DebeziumDeserializationSchema;
import io.debezium.data.Envelope;
import org.apache.flink.api.common.typeinfo.BasicTypeInfo;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.util.Collector;
import org.apache.kafka.connect.data.Field;
import org.apache.kafka.connect.data.Schema;
import org.apache.kafka.connect.data.Struct;
import org.apache.kafka.connect.source.SourceRecord;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

public class CustomerDeserialization implements DebeziumDeserializationSchema<String> {

  
   @Override
   public void deserialize(SourceRecord sourceRecord, Collector<String> collector) throws Exception {

       //1.创建 JSON 对象用于存储最终数据
       JSONObject result = new JSONObject();

       //2.获取库名&表名放入 source
       String topic = sourceRecord.topic();
       String[] fields = topic.split("\\.");

       String database = fields[1];
       String tableName = fields[2];



       Struct value = (Struct) sourceRecord.value();
       //3.获取"before"数据
       Struct before = value.getStruct("before");

       HashMap<String, Object> sourceOffset = (HashMap<String, Object>) sourceRecord.sourceOffset();
       Long ts_sec = ((Number) sourceOffset.get("ts_sec")).longValue();

       JSONObject beforeJson = new JSONObject();
       if (before != null) {
           Schema beforeSchema = before.schema();
           List<Field> beforeFields = beforeSchema.fields();
           for (Field field : beforeFields) {
               Object beforeValue = before.get(field);
               beforeJson.put(field.name(), beforeValue);
          }
      }


       //4.获取"after"数据
       Struct after = value.getStruct("after");
       JSONObject afterJson = new JSONObject();
       if (after != null) {
           Schema afterSchema = after.schema();
           List<Field> afterFields = afterSchema.fields();
           for (Field field : afterFields) {
               Object afterValue = after.get(field);
               afterJson.put(field.name(), afterValue);
          }
      }

       //5.获取操作类型 CREATE UPDATE DELETE 进行符合 Debezium-op 的字母
       Envelope.Operation operation = Envelope.operationFor(sourceRecord);
       String type = operation.toString().toLowerCase();
       if ("insert".equals(type)) {
           type = "c";
      }
       if ("update".equals(type)) {
           type = "u";
      }
       if ("delete".equals(type)) {
           type = "d";
      }
       if ("create".equals(type)) {
           type = "c";
      }
       //6.将字段写入 JSON 对象
//       result.put("source", source);

       result.put("database",database);
       result.put("table",tableName);
       result.put("before", beforeJson);
       result.put("after", afterJson);
       result.put("op", type);
       result.put("ts", ts_sec);
       //7.输出数据
       collector.collect(result.toJSONString());
  }

   @Override
   public TypeInformation<String> getProducedType() {
       return BasicTypeInfo.STRING_TYPE_INFO;
  }
}


以前寫過一次,凡是忘記保存了,代碼不知道爲什麽不見了,調試了半天,又是被自己蠢哭的一天

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
Flink CDCMySQL 同步到 Kafka 的流程如下: 1. 配置 MySQL 数据源:在 Flink CDC 中,使用 JDBC Connector 连接 MySQL 数据库,并配置相应的参数,例如数据库连接 URL、用户名、密码等。 2. 配置 Kafka 数据接收器:使用 Kafka Connector 连接 Kafka,配置相应的参数,例如 Kafka 主题、Kafka Broker 地址等。 3. 创建 Flink CDC 任务:使用 Flink SQL 或 Flink Table API 创建 Flink CDC 任务,并配置相应的数据源和数据接收器。 4. 启动 Flink CDC 任务:使用 Flink 自带的命令行工具或 Web UI 启动 Flink CDC 任务,开始同步 MySQL 数据Kafka 中。 具体的步骤如下: 1. 下载并安装 Flink:从 Flink 官网下载并安装 Flink。 2. 配置 MySQL 数据源:在 Flink 的 conf 目录下创建一个新的文件,例如 mysql.properties,配置 MySQL 数据源相关的参数,例如: ``` connector.class = jdbc connector.url = jdbc:mysql://localhost:3306/test?useSSL=false connector.table = my_table connector.username = root connector.password = password ``` 3. 配置 Kafka 数据接收器:在 Flink 的 conf 目录下创建一个新的文件,例如 kafka.properties,配置 Kafka 数据接收器相关的参数,例如: ``` connector.class = kafka connector.topic = my_topic connector.properties.bootstrap.servers = localhost:9092 ``` 4. 创建 Flink CDC 任务:使用 Flink SQL 或 Flink Table API 创建 Flink CDC 任务,例如: ``` CREATE TABLE my_table ( id INT, name STRING, age INT ) WITH ( 'connector' = 'jdbc', 'url' = 'jdbc:mysql://localhost:3306/test?useSSL=false', 'table-name' = 'my_table', 'username' = 'root', 'password' = 'password' ); CREATE TABLE my_topic ( id INT, name STRING, age INT ) WITH ( 'connector' = 'kafka', 'topic' = 'my_topic', 'properties.bootstrap.servers' = 'localhost:9092' ); INSERT INTO my_topic SELECT * FROM my_table; ``` 5. 启动 Flink CDC 任务:使用 Flink 自带的命令行工具或 Web UI 启动 Flink CDC 任务,例如: ``` ./bin/flink run -c com.example.MyCDCJob /path/to/my/cdc/job.jar ``` 通过以上步骤,就可以实现从 MySQL 同步数据Kafka 中的流程。需要注意的是,Flink CDC 可以根据实际的需求进行调整,例如任务并行度、缓冲区大小等参数。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

m0_37759590

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值