一、版本信息
- Flink:1.16.1
二、同步MySQL数据到MySQL
准备工作
- 将flink-sql-connector-mysql-cdc放在flink的lib目录下,注意cdc版本。本人使用的是flink-sql-connector-mysql-cdc-2.3.0.jar
Flink SQL实现
- 准备SQL文件
## 设置job名称
SET pipeline.name = 'data_center_shine_job';
CREATE DATABASE IF NOT EXISTS `default_catalog`.`wsmg`;
CREATE TABLE IF NOT EXISTS `default_catalog`.`wsmg`.`data_center_shine_src` (
`id` INT NOT NULL,
`bus_name` STRING NOT NULL,
`mapper_class_name` STRING NOT NULL,
`entity_class_name` STRING NOT NULL,
PRIMARY KEY(`id`)
NOT ENFORCED
) with (
'hostname' = 'xxx',
'port' = '3306',
'username' = 'root',
'password' = '*****',
'database-name' = 'wsmg',
'table-name' = 'data_center_shine',
'connector' = 'mysql-cdc'
);
CREATE TABLE IF NOT EXISTS `default_catalog`.`wsmg`.`data_center_shine_sink` (
`id` INT NOT NULL,
`bus_name` STRING NOT NULL,
`mapper_class_name` STRING NOT NULL,
`entity_class_name` STRING NOT NULL,
PRIMARY KEY(`id`)
NOT ENFORCED
) with (
'connector' = 'jdbc',
'jdbc-url' = 'jdbc:mysql://xxx:3306/wsmg',
'driver' = 'com.mysql.cj.jdbc.Driver',
'username' = 'root',
'password' = '*****',
'table-name' = 'data_center_shine'
);
INSERT INTO `default_catalog`.`wsmg`.`data_center_shine_src` SELECT * FROM `default_catalog`.`wsmg`.`data_center_shine_sink`;
- 启动任务
## 启动任务
./flink-1.16.1/bin/sql-client.sh -f data_center_shine.sql
- 多个SQL合并为一个任务执行
BEGIN STATEMENT SET;
INSERT INTO xxx SELECT * FROM xxx;
INSERT INTO xxx SELECT * FROM xxx;
INSERT INTO xxx SELECT * FROM xxx;
END;
- 从savepoint启动任务
SET execution.savepoint.path=xxx;
Java代码实现
- pom文件如下
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.wys</groupId>
<artifactId>flink</artifactId>
<version>1.0.0</version>
<packaging>jar</packaging>
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<flink.version>1.16.1</flink.version>
<flink-cdc.version>2.3.0</flink-cdc.version>
<slf4j.version>1.7.30</slf4j.version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-java</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-api-java-bridge</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-planner-loader</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-runtime</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-base</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-jdbc</artifactId>
<version>${flink.version}</version>
</dependency>
<!-- mysql-cdc fat jar -->
<dependency>
<groupId>com.ververica</groupId>
<artifactId>flink-sql-connector-mysql-cdc</artifactId>
<version>${flink-cdc.version}</version>
</dependency>
<!-- flink webui -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-runtime-web</artifactId>
<version>${flink.version}</version>
</dependency>
<!--日志相关的依赖 -->
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>${slf4j.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>${slf4j.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-to-slf4j</artifactId>
<version>2.14.0</version>
<scope>provided</scope>
</dependency>
</dependencies>
</project>
- Java代码如下-sql方式
package com.wys.flink;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.configuration.RestOptions;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
public class FlinkMySQLToMySQL {
public static void main(String[] args) throws Exception {
// 本机执行环境
//env = StreamExecutionEnvironment.getExecutionEnvironment();
Configuration conf = new Configuration();
//设置WebUI绑定的本地端口
conf.setString(RestOptions.BIND_PORT,"8081");
//使用配置
StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(conf);
EnvironmentSettings settings = EnvironmentSettings
.newInstance()
.inStreamingMode()
.build();
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env, settings);
// CREATE TABLE data_center_shine_src
tableEnv.executeSql(sourceDDL());
// CREATE TABLE data_center_shine_sink
tableEnv.executeSql(sinkDDL());
// 将data_center_shine_src同步到data_center_shine_sink
tableEnv.getConfig().set("pipeline.name", "data_center_shine_java_job"); // 设置job名称
tableEnv.executeSql("insert into data_center_shine_sink select * from data_center_shine_src;");
}
public static String sourceDDL() {
String sourceDDL = "CREATE TABLE data_center_shine_src (\n" +
" `id` INT NOT NULL,\n" +
" `bus_name` STRING NOT NULL,\n" +
" `mapper_class_name` STRING NOT NULL,\n" +
" `entity_class_name` STRING NOT NULL,\n" +
" PRIMARY KEY(`id`)\n" +
" NOT ENFORCED\n" +
") WITH (\n" +
" 'table-name' = 'data_center_shine',\n" +
" 'connector' = 'mysql-cdc',\n" +
" 'hostname' = 'xxxx',\n" +
" 'port' = '3306',\n" +
" 'username' = 'root',\n" +
" 'password' = '*****',\n" +
" 'database-name' = 'wsmg'\n" +
");";
return sourceDDL;
}
public static String sinkDDL() {
String sinkDDL = "CREATE TABLE data_center_shine_sink (\n" +
" `id` INT NOT NULL,\n" +
" `bus_name` STRING NOT NULL,\n" +
" `mapper_class_name` STRING NOT NULL,\n" +
" `entity_class_name` STRING NOT NULL,\n" +
" PRIMARY KEY(`id`)\n" +
" NOT ENFORCED\n" +
") WITH (\n" +
" 'connector' = 'jdbc',\n" +
" 'url' = 'jdbc:mysql://xxxx/wsmg',\n" +
" 'driver' = 'com.mysql.cj.jdbc.Driver',\n" +
" 'username' = 'root',\n" +
" 'password' = '*****',\n" +
" 'table-name' = 'data_center_shine'\n" +
");";
return sinkDDL;
}
}
- 远程服务器执行任务,本地flink版本需和远程服务器一致
// 远程执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("ip", 8081);
三、MySQL同步数据到StarRocks
准备工作
- 将flink-sql-connector-mysql-cdc、1.2.9_flink-1.16.jar放在flink的lib目录下,注意jar版本需要和flink配套。本人使用的是flink-sql-connector-mysql-cdc-2.3.0.jar,1.2.9_flink-1.16.jar
实现
- Flink SQL和Java的实现和 MySQL数据同步到MySQL的实现基本一致
- StarRocks DDL示例
CREATE TABLE IF NOT EXISTS `default_catalog`.`wsmg`.`data_center_shine_sink` (
`id` INT NOT NULL,
`bus_name` STRING NOT NULL,
`mapper_class_name` STRING NOT NULL,
`entity_class_name` STRING NOT NULL,
PRIMARY KEY(`id`)
NOT ENFORCED
) with (
'database-name' = 'wsmg',
'password' = '',
'sink.max-retries' = '10',
'sink.properties.format' = 'json',
'jdbc-url' = 'jdbc:mysql://xxx:9030',
'load-url' = 'xxx:8030',
'sink.buffer-flush.interval-ms' = '15000',
'username' = 'root',
'sink.properties.strip_outer_array' = 'true',
'connector' = 'starrocks',
'table-name' = 'data_center_shine'
);
- Java 实现pom文件还需添加如下依赖
<!--flink-connector-starrocks-->
<dependency>
<groupId>com.starrocks</groupId>
<artifactId>flink-connector-starrocks</artifactId>
<version>1.2.9_flink-1.16</version>
</dependency>