一、环境信息
数据库/工具 | 版本 |
---|---|
Apache Doris | 1.2.6 |
MySQL | 8.0.34 |
Apache SeaTunnel | 2.3.3 |
二、数据表
2.1 数据源 Doris 数据表
- 源表1
CREATE TABLE IF NOT EXISTS ads.ads_device_switch_performance (
`event_time` DATETIMEV2 COMMENT '业务时间',
`device_id` VARCHAR(32) COMMENT '设备id',
`device_type` VARCHAR(32) COMMENT '设备类型',
`device_name` VARCHAR(128) COMMENT '设备名称',
`cpu_usage` INT COMMENT 'CPU使用率百分比'
)
UNIQUE KEY(`event_time`, `device_id`)
PARTITION BY RANGE(`event_time`) ()
DISTRIBUTED BY HASH(`event_time`) BUCKETS AUTO
PROPERTIES (
"replication_allocation" = "tag.location.default: 3",
"dynamic_partition.enable" = "true",
"dynamic_partition.time_unit" = "DAY",
"dynamic_partition.time_zone" = "Asia/Shanghai",
"dynamic_partition.end" = "3",
"dynamic_partition.prefix" = "p",
"dynamic_partition.replication_allocation" = "tag.location.default: 3",
"dynamic_partition.create_history_partition" = "true",
"dynamic_partition.history_partition_num" = "30",
"enable_unique_key_merge_on_write" = "true",
"disable_auto_compaction" = "false"
);
- 源表2
CREATE TABLE IF NOT EXISTS ads.ads_device_router_performance (
`event_time` DATETIMEV2 COMMENT '业务时间',
`device_id` VARCHAR(32) COMMENT '设备id',
`device_type` VARCHAR(32) COMMENT '设备类型',
`device_name` VARCHAR(128) COMMENT '设备名称',
`cpu_usage` INT COMMENT 'CPU使用率百分比'
)
UNIQUE KEY(`event_time`, `device_id`)
PARTITION BY RANGE(`event_time`) ()
DISTRIBUTED BY HASH(`event_time`) BUCKETS AUTO
PROPERTIES (
"replication_allocation" = "tag.location.default: 3",
"dynamic_partition.enable" = "true",
"dynamic_partition.time_unit" = "DAY",
"dynamic_partition.time_zone" = "Asia/Shanghai",
"dynamic_partition.end" = "3",
"dynamic_partition.prefix" = "p",
"dynamic_partition.replication_allocation" = "tag.location.default: 3",
"dynamic_partition.create_history_partition" = "true",
"dynamic_partition.history_partition_num" = "30",
"enable_unique_key_merge_on_write" = "true",
"disable_auto_compaction" = "false"
);
2.2 数据汇 MySQL 数据表
- 目标表
CREATE TABLE `device_performance` (
`id` BIGINT NOT NULL AUTO_INCREMENT COMMENT '表主键',
`create_time` DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
`update_time` DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '更新时间',
`timestamp` DATETIME NOT NULL COMMENT '业务时间',
`device_id` VARCHAR(32) COMMENT '设备id',
`device_type` VARCHAR(32) COMMENT '设备类型',
`device_name` VARCHAR(128) NOT NULL COMMENT '设备名称',
`cpu_usage` FLOAT NOT NULL COMMENT 'CPU利用率单位是%',
PRIMARY KEY (`id`),
) ENGINE=InnoDB AUTO_INCREMENT=614789 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci COMMENT='设备状态';
CREATE UNIQUE INDEX unique_idx ON device_performance (`timestamp`,`device_id`);
三、需求
离线同步 Doris
多张数据表 到 MySQL
数据库。
这里 同步 两张 Doris
数据表 到 MySQL
数据库同一张表,多对多,多对一 实际 做法也是完全相同的,在 SeaTunnel 里 只需要分别在 source
、transform
、sink
中创建相应数量的 配置代码块即可。
四、SeaTunnel 同步方案
4.1 同步配置
- device_performance.config
env {
job.mode="BATCH"
job.name="device_performance"
}
source {
Jdbc {
url="jdbc:mysql://192.168.91.120:9030/ads?tinyInt1isBit=false&enabledTLSProtocols=TLSv1.2&rewriteBatchedStatements=true"
driver="com.mysql.cj.jdbc.Driver"
user=doris
password="Doris123456"
result_table_name="switch_src"
query="SELECT `event_time`, `device_id`, `device_type`, `device_name`, `cpu_usage` FROM ads.ads_device_switch_performance WHERE event_time = REPLACE('"${event_time}"', 'T', ' ');"
}
Jdbc {
url="jdbc:mysql://192.168.91.120:9030/ads?tinyInt1isBit=false&enabledTLSProtocols=TLSv1.2&rewriteBatchedStatements=true"
driver="com.mysql.cj.jdbc.Driver"
user=doris
password="Doris123456"
result_table_name="router_src"
query="SELECT `event_time`, `device_id`, `device_type`, `device_name`, `cpu_usage` FROM ads.ads_device_router_performance WHERE event_time = REPLACE('"${event_time}"', 'T', ' ');"
}
}
transform {
Sql {
source_table_name = "switch_src"
result_table_name = "switch_dst"
query = "SELECT NOW() AS create_time, NOW() AS update_time, event_time AS timestamp, device_id, device_type, device_name, cpu_usage FROM switch_src;"
}
Sql {
source_table_name = "router_src"
result_table_name = "router_dst"
query = "SELECT NOW() AS create_time, NOW() AS update_time, event_time AS timestamp, device_id, device_type, device_name, cpu_usage FROM router_src;"
}
}
sink {
Jdbc {
url="jdbc:mysql://1192.168.91.100:3306/device?charset=utf8"
driver="com.mysql.cj.jdbc.Driver"
user=mysql
password="mysql123456"
source_table_name = "switch_dst"
query="INSERT INTO device_performance (create_time, update_time, timestamp, device_id, device_type, devices_name, cpu_usage ) VALUES(?, ?, ?, ?, ?, ?) ON DUPLICATE KEY UPDATE update_time=VALUES(update_time), device_type=VALUES(device_type), devices_name=VALUES(devices_name), cpu_usage=VALUES(cpu_usage);"
}
Jdbc {
url="jdbc:mysql://1192.168.91.100:3306/device?charset=utf8"
driver="com.mysql.cj.jdbc.Driver"
user=mysql
password="mysql123456"
source_table_name = "router_dst"
query="INSERT INTO device_performance (create_time, update_time, timestamp, device_id, device_type, devices_name, cpu_usage ) VALUES(?, ?, ?, ?, ?, ?) ON DUPLICATE KEY UPDATE update_time=VALUES(update_time), device_type=VALUES(device_type), devices_name=VALUES(devices_name), cpu_usage=VALUES(cpu_usage);"
}
}
4.2 执行脚本
- 使用 本地模式
-e local
seatunnel 2.3.3
通过-i
传递参数, 当传入Doris
的DATETIME
类型参数2023-10-10 10:40:00
,seatunnel
识别参数值 为2023-10-10
,推测seatunnel
默认将 空格 作为参数分隔符,所以在这里使用2023-10-10T10:40:00
规避这个问题
#!/bin/bash
${SEATUNNEL_HOME}/bin/seatunnel.sh \
--config device_performance.config \
-e local \
-i event_time='2023-10-10T10:40:00'