数据表及依赖准备
创建MySQL测试数据表
需要修改MySQL的配置文件,windows为安装目录下的my.ini,linux为/etc/my.cnf,需要设置并开启binlog日志,自行开启,两行配置项,添加之后重启mysql
CREATE TABLE `test1` (
`id` int NOT NULL,
`name` varchar(255) COLLATE utf8mb4_general_ci DEFAULT NULL,
`description` varchar(255) COLLATE utf8mb4_general_ci DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
开启binlog
server_id=1
log_bin=mysql-bin
binlog_format=ROW
expire_logs_days=30
数据库测试语句
-- 插入测试
INSERT INTO test1 (id, name, description) VALUES (1, 'John', 'A software engineer');
INSERT INTO test1 (id, name, description) VALUES (2, 'Alice', 'A data scientist');
INSERT INTO test1 (id, name, description) VALUES (3, 'Bob', 'An analyst');
-- 删除测试
DELETE FROM test1 WHERE id = 1; -- 删除 id 为 1 的行
DELETE FROM test1 WHERE id = 2; -- 删除 id 为 2 的行
导入CDC maven依赖
这里的MySQL 我用的8.0的,如果用的5.7的需要去下面的仓库地址自行更换
maven中央仓库:https://mvnrepository.com/artifact/mysql/mysql-connector-java/5.1.36
<dependency>
<groupId>com.ververica</groupId>
<artifactId>flink-connector-mysql-cdc</artifactId>
<version>2.4.0</version>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>8.0.27</version>
</dependency>
数据变更标识
- +I:新增数据。
- -U:一条数据的修改会产生两个
U
标识符数据。其中-U
含义为修改前数据。 - +U:修改之后的数据。
- -D:删除的数据。
Java版CDC
DataStreamAPI
主机名,端口,数据库名等自行修改
package com.esni.cdc;
/**
* Date: 2023/8/23 10:04
* Author: yilinwei
* Description: MySQL DataStreamAPI CDC github测试案例: https://github.com/ververica/flink-cdc-connectors
*
*/
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import com.ververica.cdc.debezium.JsonDebeziumDeserializationSchema;
import com.ververica.cdc.connectors.mysql.source.MySqlSource;
public class MysqlDataStreamCdc {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
MySqlSource<String> mySqlSource = MySqlSource.<String>builder()
// 设置主机名
.hostname("localhost")
// 设置端口号
.port(3307)
// 设置数据库列表
.databaseList("tyy_test")
// 设置表列表
.tableList("tyy_test.test1")
// 设置用户名
.username("root")
// 设置密码
.password("root")
// 设置序列化器
.deserializer(new JsonDebeziumDeserializationSchema())
.build();
// 启用checkpoint
env.enableCheckpointing(3000);
// 添加source
DataStreamSource<String> mySQLSource = env
.fromSource(mySqlSource, WatermarkStrategy.noWatermarks(), "CDC Source");
mySQLSource.print();
env.execute("mysql cdc");
}
}
测试
- 插入删除数据
- 打印结果,出现回撤流,发生数据变更
TableAPI的CDC代码
主机名,端口,数据库名等自行修改
package com.esni.cdc;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
/**
* Date: 2023/8/23 10:04
* Author: yilinwei
* Description: MySQL FlinkTableAPI CDC github测试案例: https://github.com/ververica/flink-cdc-connectors
*/
public class MysqlTableCdc {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
//创建CDC读取数据表,编写cdc配置项
/**
* 目前,Flink 仅支持 "NOT ENFORCED" 模式的主键约束
* PRIMARY KEY (id) NOT ENFORCED
* 如果Flink中不定义主键,那么下面的配置项就要开启
* " 'scan.incremental.snapshot.enabled'='true',\n" +
* " 'scan.incremental.snapshot.chunk.key-column'='id', \n" +
*
*/
tableEnv
.executeSql(
"CREATE TABLE mysql_binlog (\n" +
" id INT NOT NULL,\n" +
" name STRING,\n" +
" description STRING, \n" +
" PRIMARY KEY (id) NOT ENFORCED \n " + //定义主键,如果不定义主键,那么下面的配置项就要开启
") WITH (\n" +
" 'connector' = 'mysql-cdc',\n" +
" 'hostname' = 'localhost',\n" + //主机名
" 'port' = '3307',\n" + //端口
" 'username' = 'root',\n" + //用户名
" 'password' = 'root',\n" + //密码
" 'database-name' = 'tyy_test',\n" + //数据库名
" 'table-name' = 'test1'\n" + // 数据库表名
//如果没有添加主键就需要增加这两行
// " 'scan.incremental.snapshot.enabled'='true',\n" + //配置选项指定了是否启用增量快照模式。增量快照模式用于从数据库中捕获变更,只传输最新的变更数据而不是全量数据
// " 'scan.incremental.snapshot.chunk.key-column'='id', \n" + //配置选项指定了增量快照模式下的快照分块的键列
");");
//输出CDC数据
tableEnv
.sqlQuery("SELECT id, UPPER(name), description FROM mysql_binlog;")
.execute()
.print();
}
}
测试
- 插入删除数据
- 打印结果,出现回撤流,发生数据变更
Scala版CDC
DataStreamAPI
主机名,端口,数据库名等自行修改
package org.esni.cdc
import com.ververica.cdc.connectors.mysql.source.MySqlSource
import com.ververica.cdc.debezium.JsonDebeziumDeserializationSchema
import org.apache.flink.api.common.eventtime.WatermarkStrategy
import org.apache.flink.api.common.typeinfo.Types
import org.apache.flink.streaming.api.scala._
/**
* Date: 2023/8/23 10:57
* Author: yilinwei
* Description: scala DataStreamAPI cdc
* CDC github测试案例 : https://github.com/ververica/flink-cdc-connectors
*/
object MysqlDataStreamCdc {
def main(args: Array[String]): Unit = {
val env: StreamExecutionEnvironment = StreamExecutionEnvironment
.getExecutionEnvironment
env.setParallelism(1)
val mySqlSource: MySqlSource[String] = MySqlSource
.builder[String]
.hostname("localhost")
.port(3307)
.databaseList("tyy_test")
.tableList("tyy_test.test1")
.username("root")
.password("root")
.deserializer(new JsonDebeziumDeserializationSchema())
.build()
// 启用checkpoint
env
.enableCheckpointing(3000)
val resultSource: DataStream[String] = env
.fromSource(mySqlSource, WatermarkStrategy.noWatermarks(), "MySQL Source")
resultSource
.print()
env.execute("MySQL CDC Job")
}
}
测试
- 插入删除数据
- 打印结果,出现回撤流,发生数据变更
TableAPI
package org.esni.cdc
import org.apache.flink.streaming.api.scala._
import org.apache.flink.table.api.bridge.scala.StreamTableEnvironment
/**
* Date: 2023/8/23 10:57
* Author: yilinwei
* Description: scala Table cdc
* CDC github测试案例 : https://github.com/ververica/flink-cdc-connectors
*/
object MysqlTableCdc {
def main(args: Array[String]): Unit = {
val env: StreamExecutionEnvironment = StreamExecutionEnvironment
.getExecutionEnvironment
env.setParallelism(1)
val tableEnv: StreamTableEnvironment = StreamTableEnvironment.create(env)
//创建CDC读取数据表,编写cdc配置项
/**
* 目前,Flink 仅支持 "NOT ENFORCED" 模式的主键约束
* PRIMARY KEY (id) NOT ENFORCED
* 如果Flink中不定义主键,那么下面的配置项就要开启
* " 'scan.incremental.snapshot.enabled'='true',\n" +
* " 'scan.incremental.snapshot.chunk.key-column'='id', \n" +
*
*/
tableEnv.executeSql("CREATE TABLE mysql_binlog (\n" + " id INT NOT NULL,\n" + " name STRING,\n" + " description STRING, \n" + " PRIMARY KEY (id) NOT ENFORCED \n " + //定义主键,如果不定义主键,那么下面的配置项就要开启
") WITH (\n" + " 'connector' = 'mysql-cdc',\n" + " 'hostname' = 'localhost',\n" + //主机名
" 'port' = '3307',\n" + //端口
" 'username' = 'root',\n" + //用户名
" 'password' = 'root',\n" + //密码
" 'database-name' = 'tyy_test',\n" + //数据库名
" 'table-name' = 'test1'\n" + ");" // 数据库表名
//如果没有添加主键就需要增加这两行
// " 'scan.incremental.snapshot.enabled'='true',\n" + //配置选项指定了是否启用增量快照模式。增量快照模式用于从数据库中捕获变更,只传输最新的变更数据而不是全量数据
// " 'scan.incremental.snapshot.chunk.key-column'='id', \n" + //配置选项指定了增量快照模式下的快照分块的键列
)
//输出CDC数据//输出CDC数据
tableEnv.sqlQuery("SELECT id, UPPER(name), description FROM mysql_binlog;").execute.print()
}
}
测试
- 插入删除数据
- 打印结果,出现回撤流,发生数据变更