引言
Flink CDC(Flink Change Data Capture)即 Flink 的变更数据捕获技术,是一种基于数据库日志的CDC技术,它实现了一个全增量一体化的数据集成框架。与Flink计算框架相结合,Flink CDC能够高效地实现海量数据的实时集成。其核心功能在于实时监视数据库或数据流中的数据变动,并将这些变动抽取出来,以便进行进一步的处理和分析。借助Flink CDC,用户可以轻松地构建实时数据管道,实时响应和处理数据变动,为实时分析、实时报表和实时决策等场景提供有力支持。
一、MySQL开启Binlog
MySQL开启 BinLong 功能,需要在配置文件中 修改 [mysql]的相关参数(lunux 中 /etc/my.cnf 文件 或windows 的 /my.ini 文件)
[mysqld]
server-id=1
# 设置日志格式为行级格式
binlog-format=Row
# 设置binlog日志文件的前缀
log-bin=mysql-bin
# 指定需要记录二进制日志的数据库
binlog_do_db=testjpa
开启Binlog 后,在需要 为 Flink CDC 配置响应的权限,使其能够正常链接到 MySQL数据库,包括授权 Flink CDC 链接 数据库的用户权限。
mysql> SHOW VARIABLES LIKE 'log_bin';
+---------------+-------+
| Variable_name | Value |
+---------------+-------+
| log_bin | ON |
+---------------+-------+
二、代码示例
创建Spring Boot项目添加依赖
<dependencies>
<!-- Flink dependency -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-java</artifactId>
<version>1.14.0</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java_2.12</artifactId>
<version>1.14.0</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-mysql-cdc</artifactId>
<version>2.0.0</version>
</dependency>
<!-- Spring Boot dependencies -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter</artifactId>
</dependency>
</dependencies>
配置Flink和MySQL CDC
flink:
checkpoint:
interval: 10000
parallelism: 1
spring:
datasource:
url: jdbc:mysql://localhost:3306/your_database
username: your_username
password: your_password
创建服务类实现实时跟踪
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.springframework.stereotype.Service;
@Service
public class FlinkCdcService {
public void startDataStreaming() {
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
final StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
// 使用Flink CDC连接MySQL
String name = "inventory";
tableEnv.executeSql("CREATE TABLE " + name + " (" +
" id INT," +
" name STRING," +
" description STRING," +
" weight DECIMAL(10, 3)" +
") WITH (" +
" 'connector' = 'mysql-cdc'," +
" 'hostname' = 'localhost'," +
" 'port' = '3306'," +
" 'username' = 'your_username'," +
" 'password' = 'your_password'," +
" 'database-name' = 'your_database'," +
" 'table-name' = 'your_table'" +
")");
// 查询并打印结果
DataStream<String> dataStream = tableEnv.sqlQuery("SELECT * FROM " + name).execute().print();
try {
env.execute("Flink CDC Demo");
} catch (Exception e) {
e.printStackTrace();
}
}
}
配置 SpringBoot 启动类
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.CommandLineRunner;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
@SpringBootApplication
public class FlinkCdcApplication implements CommandLineRunner {
@Autowired
private FlinkCdcService flinkCdcService;
public static void main(String[] args) {
SpringApplication.run(FlinkCdcApplication.class, args);
}
@Override
public void run(String... args) throws Exception {
flinkCdcService.startDataStreaming();
}
}