Flink CDC同步数据

一、版本信息

  • Flink:1.16.1

二、同步MySQL数据到MySQL

准备工作

  • 将flink-sql-connector-mysql-cdc放在flink的lib目录下,注意cdc版本。本人使用的是flink-sql-connector-mysql-cdc-2.3.0.jar

Flink SQL实现

  • 准备SQL文件
## 设置job名称
SET pipeline.name = 'data_center_shine_job';
CREATE DATABASE IF NOT EXISTS `default_catalog`.`wsmg`;
CREATE TABLE IF NOT EXISTS `default_catalog`.`wsmg`.`data_center_shine_src` (
  `id` INT NOT NULL,
  `bus_name` STRING NOT NULL,
  `mapper_class_name` STRING NOT NULL,
  `entity_class_name` STRING NOT NULL,
  PRIMARY KEY(`id`)
 NOT ENFORCED

) with (
  'hostname' = 'xxx',
  'port' = '3306',
  'username' = 'root',
  'password' = '*****',
  'database-name' = 'wsmg',
  'table-name' = 'data_center_shine',
  'connector' = 'mysql-cdc'
);

CREATE TABLE IF NOT EXISTS `default_catalog`.`wsmg`.`data_center_shine_sink` (
   `id` INT NOT NULL,
  `bus_name` STRING NOT NULL,
  `mapper_class_name` STRING NOT NULL,
  `entity_class_name` STRING NOT NULL,
  PRIMARY KEY(`id`)
 NOT ENFORCED
) with (
  'connector' = 'jdbc',
  'jdbc-url' = 'jdbc:mysql://xxx:3306/wsmg',
  'driver' = 'com.mysql.cj.jdbc.Driver',
  'username' = 'root',
  'password' = '*****',
  'table-name' = 'data_center_shine'
);

INSERT INTO `default_catalog`.`wsmg`.`data_center_shine_src` SELECT * FROM `default_catalog`.`wsmg`.`data_center_shine_sink`;

  • 启动任务
## 启动任务
./flink-1.16.1/bin/sql-client.sh -f data_center_shine.sql
  • 多个SQL合并为一个任务执行
 
BEGIN STATEMENT SET;
INSERT INTO xxx SELECT * FROM xxx;
INSERT INTO xxx SELECT * FROM xxx;
INSERT INTO xxx SELECT * FROM xxx;
END;
  • 从savepoint启动任务
SET execution.savepoint.path=xxx;

Java代码实现

  • pom文件如下
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>com.wys</groupId>
    <artifactId>flink</artifactId>
    <version>1.0.0</version>
    <packaging>jar</packaging>

    <properties>
        <maven.compiler.source>8</maven.compiler.source>
        <maven.compiler.target>8</maven.compiler.target>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <flink.version>1.16.1</flink.version>
        <flink-cdc.version>2.3.0</flink-cdc.version>
        <slf4j.version>1.7.30</slf4j.version>
    </properties>

    <dependencies>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-java</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-clients</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-streaming-java</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-api-java-bridge</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-planner-loader</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-runtime</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-base</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-jdbc</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <!-- mysql-cdc fat jar -->
        <dependency>
            <groupId>com.ververica</groupId>
            <artifactId>flink-sql-connector-mysql-cdc</artifactId>
            <version>${flink-cdc.version}</version>
        </dependency>

        <!-- flink webui -->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-runtime-web</artifactId>
            <version>${flink.version}</version>
        </dependency>

        <!--日志相关的依赖 -->
        <dependency>
            <groupId>org.slf4j</groupId>
            <artifactId>slf4j-api</artifactId>
            <version>${slf4j.version}</version>
            <scope>provided</scope>
        </dependency>
        <dependency>
            <groupId>org.slf4j</groupId>
            <artifactId>slf4j-log4j12</artifactId>
            <version>${slf4j.version}</version>
            <scope>provided</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.logging.log4j</groupId>
            <artifactId>log4j-to-slf4j</artifactId>
            <version>2.14.0</version>
            <scope>provided</scope>
        </dependency>

   
    </dependencies>

</project>

  • Java代码如下-sql方式
package com.wys.flink;


import org.apache.flink.configuration.Configuration;
import org.apache.flink.configuration.RestOptions;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;

public class FlinkMySQLToMySQL {
    public static void main(String[] args) throws Exception {
        // 本机执行环境
        //env = StreamExecutionEnvironment.getExecutionEnvironment();
        Configuration conf = new Configuration();
        //设置WebUI绑定的本地端口
        conf.setString(RestOptions.BIND_PORT,"8081");
        //使用配置
        StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(conf);
        EnvironmentSettings settings = EnvironmentSettings
                .newInstance()
                .inStreamingMode()
                .build();
        StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env, settings);
        // CREATE TABLE data_center_shine_src
        tableEnv.executeSql(sourceDDL());
        // CREATE TABLE data_center_shine_sink
        tableEnv.executeSql(sinkDDL());
        // 将data_center_shine_src同步到data_center_shine_sink
        tableEnv.getConfig().set("pipeline.name", "data_center_shine_java_job"); // 设置job名称
        tableEnv.executeSql("insert into data_center_shine_sink select * from data_center_shine_src;");
    }

    public static String sourceDDL() {
        String sourceDDL = "CREATE TABLE data_center_shine_src (\n" +
                "  `id` INT NOT NULL,\n" +
                "  `bus_name` STRING NOT NULL,\n" +
                "  `mapper_class_name` STRING NOT NULL,\n" +
                "  `entity_class_name` STRING NOT NULL,\n" +
                "   PRIMARY KEY(`id`)\n" +
                "   NOT ENFORCED\n" +
                ")  WITH (\n" +
                "  'table-name' = 'data_center_shine',\n" +
                "  'connector' = 'mysql-cdc',\n" +
                "  'hostname' = 'xxxx',\n" +
                "  'port' = '3306',\n" +
                "  'username' = 'root',\n" +
                "  'password' = '*****',\n" +
                "  'database-name' = 'wsmg'\n" +
                ");";
        return sourceDDL;
    }

    public static String sinkDDL() {
        String sinkDDL = "CREATE TABLE data_center_shine_sink (\n" +
                "  `id` INT NOT NULL,\n" +
                "  `bus_name` STRING NOT NULL,\n" +
                "  `mapper_class_name` STRING NOT NULL,\n" +
                "  `entity_class_name` STRING NOT NULL,\n" +
                "   PRIMARY KEY(`id`)\n" +
                "   NOT ENFORCED\n" +
                ")  WITH (\n" +
                "   'connector' = 'jdbc',\n" +
                "   'url' = 'jdbc:mysql://xxxx/wsmg',\n" +
                "   'driver' = 'com.mysql.cj.jdbc.Driver',\n" +
                "   'username' = 'root',\n" +
                "   'password' = '*****',\n" +
                "   'table-name' = 'data_center_shine'\n" +
                ");";
        return sinkDDL;
    }

}

  • 远程服务器执行任务,本地flink版本需和远程服务器一致
   // 远程执行环境
   StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("ip", 8081);

三、MySQL同步数据到StarRocks

准备工作

  • 将flink-sql-connector-mysql-cdc、1.2.9_flink-1.16.jar放在flink的lib目录下,注意jar版本需要和flink配套。本人使用的是flink-sql-connector-mysql-cdc-2.3.0.jar,1.2.9_flink-1.16.jar

实现

  • Flink SQL和Java的实现和 MySQL数据同步到MySQL的实现基本一致
  • StarRocks DDL示例
CREATE TABLE IF NOT EXISTS `default_catalog`.`wsmg`.`data_center_shine_sink` (
  `id` INT NOT NULL,
  `bus_name` STRING NOT NULL,
  `mapper_class_name` STRING NOT NULL,
  `entity_class_name` STRING NOT NULL,
  PRIMARY KEY(`id`)
 NOT ENFORCED
) with (
  'database-name' = 'wsmg',
  'password' = '',
  'sink.max-retries' = '10',
  'sink.properties.format' = 'json',
  'jdbc-url' = 'jdbc:mysql://xxx:9030',
  'load-url' = 'xxx:8030',
  'sink.buffer-flush.interval-ms' = '15000',
  'username' = 'root',
  'sink.properties.strip_outer_array' = 'true',
  'connector' = 'starrocks',
  'table-name' = 'data_center_shine'
);

  • Java 实现pom文件还需添加如下依赖
 <!--flink-connector-starrocks-->
 <dependency>
      <groupId>com.starrocks</groupId>
      <artifactId>flink-connector-starrocks</artifactId>
      <version>1.2.9_flink-1.16</version>
  </dependency>
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值