1. 配置 Docker Compose 文件
首先,创建一个 docker-compose.yml 文件,启动 MongoDB、Elasticsearch 和 Kibana 服务。
version: '2.1'
services:
mongo:
image: "mongo:4.0-xenial"
command: --replSet rs0 --smallfiles --oplogSize 128
ports:
- "27017:27017"
environment:
- MONGO_INITDB_ROOT_USERNAME=mongouser
- MONGO_INITDB_ROOT_PASSWORD=mongopw
elasticsearch:
image: elastic/elasticsearch:7.6.0
environment:
- cluster.name=docker-cluster
- bootstrap.memory_lock=true
- "ES_JAVA_OPTS=-Xms512m -Xmx512m"
- discovery.type=single-node
ports:
- "9200:9200"
- "9300:9300"
ulimits:
memlock:
soft: -1
hard: -1
nofile:
soft: 65536
hard: 65536
kibana:
image: elastic/kibana:7.6.0
ports:
- "5601:5601"
启动服务:
docker-compose up -d
2. 初始化 MongoDB 数据和副本集
进入 MongoDB 容器并初始化副本集和数据:
docker-compose exec mongo /usr/bin/mongo -u mongouser -p mongopw
初始化副本集并插入示例数据:
// 初始化副本集
rs.initiate();
rs.status();
// 切换到数据库 mgdb
use mgdb;
// 插入订单数据
db.orders.insertMany([
{
order_id: 101,
order_date: ISODate("2020-07-30T10:08:22.001Z"),
customer_id: 1001,
price: NumberDecimal("50.50"),
product: { name: 'scooter', description: 'Small 2-wheel scooter' },
order_status: false
},
{
order_id: 102,
order_date: ISODate("2020-07-30T10:11:09.001Z"),
customer_id: 1002,
price: NumberDecimal("15.00"),
product: { name: 'car battery', description: '12V car battery' },
order_status: false
},
{
order_id: 103,
order_date: ISODate("2020-07-30T12:00:30.001Z"),
customer_id: 1003,
price: NumberDecimal("25.25"),
product: { name: 'hammer', description: '16oz carpenter hammer' },
order_status: false
}
]);
// 插入客户数据
db.customers.insertMany([
{ customer_id: 1001, name: 'Jark', address: 'Hangzhou' },
{ customer_id: 1002, name: 'Sally', address: 'Beijing' },
{ customer_id: 1003, name: 'Edward', address: 'Shanghai' }
]);
3. 下载必要的 JAR 文件
确保你已经下载以下 JAR 包并放置在 <FLINK_HOME>/lib/ 目录中:
flink-sql-connector-elasticsearch7-3.0.1-1.17.jarflink-sql-connector-mongodb-cdc-3.0-SNAPSHOT.jar
4. 启动 Flink 集群
启动 Flink 集群并启动 Flink SQL CLI:
flink run -m yarn-cluster -c org.apache.flink.client.cli.CliFrontend <flink-job-jar>.jar
或者直接启动 SQL CLI:
flink sql-client
5. 配置 Flink SQL
在 Flink SQL CLI 中执行以下 SQL 配置命令:
设置检查点间隔和本地时区
-- 设置检查点间隔为 3 秒
SET execution.checkpointing.interval = 3s;
-- 设置本地时区为 Asia/Shanghai
SET table.local-time-zone = Asia/Shanghai;
创建 MongoDB CDC 表
创建 orders 和 customers 表,使用 MongoDB CDC 连接器。
-- 创建 orders 表,配置 MongoDB CDC 连接
CREATE TABLE orders (
_id STRING,
order_id INT,
order_date TIMESTAMP_LTZ(3),
customer_id INT,
price DECIMAL(10, 5),
product ROW<name STRING, description STRING>,
order_status BOOLEAN,
PRIMARY KEY (_id) NOT ENFORCED
) WITH (
'connector' = 'mongodb-cdc',
'hosts' = 'localhost:27017',
'username' = 'mongouser',
'password' = 'mongopw',
'database' = 'mgdb',
'collection' = 'orders'
);
-- 创建 customers 表,配置 MongoDB CDC 连接
CREATE TABLE customers (
_id STRING,
customer_id INT,
name STRING,
address STRING,
PRIMARY KEY (_id) NOT ENFORCED
) WITH (
'connector' = 'mongodb-cdc',
'hosts' = 'localhost:27017',
'username' = 'mongouser',
'password' = 'mongopw',
'database' = 'mgdb',
'collection' = 'customers'
);
创建 Elasticsearch 表
创建用于存储同步数据的 Elasticsearch 表:
-- 创建 enriched_orders 表,配置 Elasticsearch 连接器
CREATE TABLE enriched_orders (
order_id INT,
order_date TIMESTAMP_LTZ(3),
customer_id INT,
price DECIMAL(10, 5),
product ROW<name STRING, description STRING>,
order_status BOOLEAN,
customer_name STRING,
customer_address STRING,
PRIMARY KEY (order_id) NOT ENFORCED
) WITH (
'connector' = 'elasticsearch-7',
'hosts' = 'http://localhost:9200',
'index' = 'enriched_orders'
);
执行数据同步
使用 SQL JOIN 将 orders 和 customers 表联接,并将结果插入到 Elasticsearch:
-- 将数据从 MongoDB orders 和 customers 表同步到 Elasticsearch enriched_orders 表
INSERT INTO enriched_orders
SELECT o.order_id,
o.order_date,
o.customer_id,
o.price,
o.product,
o.order_status,
c.name,
c.address
FROM orders AS o
LEFT JOIN customers AS c ON o.customer_id = c.customer_id;
6. 在 MongoDB 中进行数据变更
接下来,你可以在 MongoDB 中对数据进行更改,查看数据是否成功同步到 Elasticsearch。
// 插入新订单数据
db.orders.insert({
order_id: 104,
order_date: ISODate("2020-07-30T12:00:30.001Z"),
customer_id: 1004,
price: NumberDecimal("25.25"),
product: { name: 'rocks', description: 'box of assorted rocks' },
order_status: false
});
// 插入新客户数据
db.customers.insert({
customer_id: 1004,
name: 'Jacob',
address: 'Shanghai'
});
// 更新订单状态
db.orders.updateOne(
{ order_id: 104 },
{ $set: { order_status: true } }
);
// 删除订单
db.orders.deleteOne(
{ order_id : 104 }
);
7. 检查 Elasticsearch 数据
你可以通过 Kibana 或直接查询 Elasticsearch 来查看数据是否已成功同步到 enriched_orders 索引。
使用 Kibana 查看数据,或者使用以下命令查询 Elasticsearch:
curl -X GET "localhost:9200/enriched_orders/_search?pretty"
611

被折叠的 条评论
为什么被折叠?



