实时链路架构图
环境部署
使用docker compose部署便于测试
- 创建一个docker-compose.yml文件,定义要启动的服务
---
version: '2'
services:
mysql:
image: mysql:8.0.32
hostname: mysql
container_name: mysql
ports:
- "3306:3306"
environment:
MYSQL_ROOT_PASSWORD: mysql-pw
MYSQL_DATABASE: call-center
MYSQL_USER: example-user
MYSQL_PASSWORD: example-pw
volumes:
- "./mysql/custom-config.cnf:/etc/mysql/conf.d/custom-config.cnf"
zookeeper:
image: confluentinc/cp-zookeeper:7.3.0
hostname: zookeeper
container_name: zookeeper
ports:
- "2181:2181"
environment:
ZOOKEEPER_CLIENT_PORT: 2181
ZOOKEEPER_TICK_TIME: 2000
broker:
image: confluentinc/cp-kafka:7.3.0
hostname: broker
container_name: broker
depends_on:
- zookeeper
ports:
- "29092:29092"
environment:
KAFKA_BROKER_ID: 1
KAFKA_ZOOKEEPER_CONNECT: 'zookeeper:2181'
KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT
KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://broker:9092,PLAINTEXT_HOST://localhost:29092
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS: 0
KAFKA_TRANSACTION_STATE_LOG_MIN_ISR: 1
KAFKA_TRANSACTION_STATE_LOG_REPLICATION_FACTOR: 1
schema-registry:
image: confluentinc/cp-schema-registry:7.3.0
hostname: schema-registry
container_name: schema-registry
depends_on:
- zookeeper
- broker
ports:
- "8081:8081"
environment:
SCHEMA_REGISTRY_HOST_NAME: schema-registry
SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: "PLAINTEXT://broker:9092"
ksqldb-server:
image: confluentinc/ksqldb-server:0.28.2
hostname: ksqldb-server
container_name: ksqldb-server
depends_on:
- broker
- schema-registry
ports:
- "8088:8088"
volumes:
- "./confluent-hub-components/:/usr/share/kafka/plugins/"
environment:
KSQL_LISTENERS: "http://0.0.0.0:8088"
KSQL_BOOTSTRAP_SERVERS: "broker:9092"
KSQL_KSQL_SCHEMA_REGISTRY_URL: "http://schema-registry:8081"
KSQL_KSQL_LOGGING_PROCESSING_STREAM_AUTO_CREATE: "true"
KSQL_KSQL_LOGGING_PROCESSING_TOPIC_AUTO_CREATE: "true"
# Configuration to embed Kafka Connect support.
KSQL_CONNECT_GROUP_ID: "ksql-connect-cluster"
KSQL_CONNECT_BOOTSTRAP_SERVERS: "broker:9092"
KSQL_CONNECT_KEY_CONVERTER: "org.apache.kafka.connect.storage.StringConverter"
KSQL_CONNECT_VALUE_CONVERTER: "io.confluent.connect.avro.AvroConverter"
KSQL_CONNECT_VALUE_CONVERTER_SCHEMA_REGISTRY_URL: "http://schema-registry:8081"
KSQL_CONNECT_CONFIG_STORAGE_TOPIC: "_ksql-connect-configs"
KSQL_CONNECT_OFFSET_STORAGE_TOPIC: "_ksql-connect-offsets"
KSQL_CONNECT_STATUS_STORAGE_TOPIC: "_ksql-connect-statuses"
KSQL_CONNECT_CONFIG_STORAGE_REPLICATION_FACTOR: 1
KSQL_CONNECT_OFFSET_STORAGE_REPLICATION_FACTOR: 1
KSQL_CONNECT_STATUS_STORAGE_REPLICATION_FACTOR: 1
KSQL_CONNECT_PLUGIN_PATH: "/usr/share/kafka/plugins"
ksqldb-cli:
image: confluentinc/ksqldb-cli:0.28.2
container_name: ksqldb-cli
depends_on:
- broker
- ksqldb-server
entrypoint: /bin/sh
tty: true
- 配置mysql
MySQL 需要一些自定义配置才能很好地与 Debezium 配合使用,所以首先要注意这一点。如果您有兴趣, Debezium 有专门的文档,但本指南仅涵盖要点。使用以下内容创建一个新文件mysql/custom-config.cnf
[mysqld]
server-id = 223344
log_bin = mysql-bin
binlog_format = ROW
binlog_row_image = FULL
expire_logs_days = 10
gtid_mode = ON
enforce_gtid_consistency = ON
- 下载必要的连接器
下载地址:https://www.confluent.io/product/connectors/
所需的连接器:
Debezium MySQL CDC Source Connector 实时数据需要
JDBC Connector JDBC连接数据库获取批量数据
mysql-connector-java-8.0.28.jar mysql数据库驱动 - 解压驱动器包到指定的目录(docker-compose.yml中ksqldb-server容器的插件目录KSQL_CONNECT_PLUGIN_PATH对应的宿主机目录./confluent-hub-components/)
**注意:JDBC Connector 包中没有mysql的驱动,所以在解压后将mysql-connector-java-8.0.28.jar 放到./confluent-hub-components/confluentinc-kafka-connect-jdbc-10.6.4/lib目录下 - 启动容器
docker compose up -d
- mysql数据库用户授权及建表
##进入mysql容器
docker exec -it mysql /bin/bash
##以root身份登录mysql
mysql -uroot -pmysql-pw
##为简单起见,授予example-user从任何主机连接的所有权限
GRANT ALL PRIVILEGES ON *.* TO 'example-user' WITH GRANT OPTION;
ALTER USER 'example-user'@'%' IDENTIFIED WITH mysql_native_password BY 'example-pw';
FLUSH PRIVILEGES;
##切换数据库及创建表
USE call-center;
CREATE TABLE calls (name TEXT, reason TEXT, duration_seconds INT);
##添加一些测试数据
INSERT INTO calls (name, reason, duration_seconds) VALUES ("michael", "purchase", 540);
INSERT INTO calls (name, reason, duration_seconds) VALUES ("michael", "help", 224);
INSERT INTO calls (name, reason, duration_seconds) VALUES ("colin", "help", 802);
INSERT INTO calls (name, reason, duration_seconds) VALUES ("derek", "purchase", 10204);
INSERT INTO calls (name, reason, duration_seconds) VALUES ("derek", "help", 600);
INSERT INTO calls (name, reason, duration_seconds) VALUES ("colin", "refund", 105);
INSERT INTO calls (name, reason, duration_seconds) VALUES ("michael", "help", 2030);
INSERT INTO calls (name, reason, duration_seconds) VALUES ("colin", "purchase", 800);
INSERT INTO calls (name, reason, duration_seconds) VALUES ("derek", "help", 2514);
INSERT INTO calls (name, reason, duration_seconds) VALUES ("derek", "refund", 325);
- 启动 Debezium 连接器
##进入ksqldb-cli容器,启动 Debezium 连接器
docker exec -it ksqldb-cli ksql http://ksqldb-server:8088
##告诉ksqlDB 从每个主题的最早点开始所有查询
SET 'auto.offset.reset' = 'earliest';
##创建一个 Debezium 源连接器并将其所有更改写入 Kafka 主题
CREATE SOURCE CONNECTOR calls_reader WITH (
'connector.class' = 'io.debezium.connector.mysql.MySqlConnector',
'database.hostname' = 'mysql',
'database.port' = '3306',
'database.user' = 'example-user',
'database.password' = 'example-pw',
'database.allowPublicKeyRetrieval' = 'true',
'database.server.id' = '184054',
'database.server.name' = 'call-center-db',
'database.whitelist' = 'call-center',
'database.history.kafka.bootstrap.servers' = 'broker:9092',
'database.history.kafka.topic' = 'call-center',
'table.whitelist' = 'call-center.calls',
'include.schema.changes' = 'false'
);
SHOW TOPICS;
##打印topic内容
PRINT 'call-center-db.call-center.calls' FROM BEGINNING;
##查看连接器状态
DESCRIBE CONNECTOR calls_reader;
- 创建stream
CREATE STREAM calls WITH (
kafka_topic = 'call-center-db.call-center.calls',
value_format = 'avro'
);
select * from calls EMIT CHANGES;
- 创建物化视图
CREATE TABLE support_view AS
SELECT after->name AS name,
count_distinct(after->reason) AS distinct_reasons,
latest_by_offset(after->reason) AS last_reason
FROM calls
GROUP BY after->name
EMIT CHANGES;
select * from support_view EMIT CHANGES;
- 推送数据到目标数据库
##进入mysql容器
docker exec -it mysql /bin/bash
##以root身份登录mysql
mysql -uroot -pmysql-pw
##切换数据库及创建表
USE call-center;
CREATE TABLE SUPPORT_VIEW1_PK (NAME VARCHAR(32), DISTINCT_REASONS INT, LAST_REASON TEXT,PRIMARY KEY (NAME));
##进入ksqldb-cli容器,启动 Debezium 连接器
docker exec -it ksqldb-cli ksql http://ksqldb-server:8088
##告诉ksqlDB 从每个主题的最早点开始所有查询
SET 'auto.offset.reset' = 'earliest';
##创建一个 JDBC sink连接器并将topic中的数据推送到目标表
CREATE SINK CONNECTOR SINK_SUPPORT_VIEW_PK WITH (
'connector.class' = 'io.confluent.connect.jdbc.JdbcSinkConnector',
'connection.url' = 'jdbc:mysql://mysql:3306/call-center',
'connection.user' = 'example-user',
'connection.password'='example-pw',
'table.name.format'='SUPPORT_VIEW_PK',
'insert.mode'='upsert',
'pk.mode'='record_key',
'pk.fields'='NAME',
'topics' = 'SUPPORT_VIEW'
);
##在mysql容器中查询数据
select * from SUPPORT_VIEW_PK;
删除容器及数据
docker compose down -v