1、基础环境
三台虚拟机的k8s环境 | 借助Kubesphere部署的v1.21.5 |
hadoop | 3.3.6 |
v1.21.5 | v1.21.5 |
spark-3.4.1-bin-hadoop3 | 3.4.1 |
hive | 3.1.3 |
hbase | 2.4.9 |
flink | 1.17.0 |
flink-connector-mysql-cdc | 2.4.1 |
mysql | 8.0.33 |
hudi | 0.14.1 |
2、模拟数据写入MySQL(doris-mysql)
使用定时任务解析日志文件,封装成特定的格式,netty client发送到server端,server端进行孵化处理,解析,写入MySQL数据
@Override
protected void channelRead0(ChannelHandlerContext channelHandlerContext, DatagramPacket datagramPacket) throws Exception {
try {
ByteBuf byteBuf = datagramPacket.content();
String str = byteBuf.toString(CharsetUtil.UTF_8);
if (StringUtils.isNotBlank(str)) {
Map<String, Object> event = ProcessUtil.parseProcess(str);
LOG.info(JSONUtil.toJsonStr(event));
pushMsgToMysql(event);
}
String resStr = "ok";
byte[] resBytes = resStr.getBytes(StandardCharsets.UTF_8);
DatagramPacket resData = new DatagramPacket(Unpooled.copiedBuffer(resBytes), datagramPacket.sender());
channelHandlerContext.writeAndFlush(resData);
} catch (Exception e) {
LOG.error("channelRead0异常", e);
}
}
SQL脚本语句:
SET NAMES utf8mb4;
SET FOREIGN_KEY_CHECKS = 0;-- ----------------------------
-- Table structure for business
-- ----------------------------
DROP TABLE IF EXISTS `business`;
CREATE TABLE `business` (
`uuid` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
`product` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT '商品名称',
`promotion` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT '促销',
`value_added_service` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT '增值服务',
`logistics` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT '物流',
`weight` double NULL DEFAULT NULL COMMENT '重量',
`color` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT '颜色',
`version` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT '版本',
`shop` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT '店铺',
`evaluate` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT '评价',
`order_num` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT '订单编号',
`rider` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT '骑手',
`order_time` datetime NULL DEFAULT NULL ON UPDATE CURRENT_TIMESTAMP COMMENT '订单时间',
`create_time` datetime NULL DEFAULT NULL ON UPDATE CURRENT_TIMESTAMP COMMENT '创建时间',
`pay_price` decimal(10, 2) NULL DEFAULT NULL COMMENT '支付价格',
`pay_type` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT '支付方式',
`address` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT '收获地址',
PRIMARY KEY (`uuid`) USING BTREE
) ENGINE = InnoDB AUTO_INCREMENT = 1 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = DYNAMIC;SET FOREIGN_KEY_CHECKS = 1;
3、flink cdc消费MySQL写入kafka
3.1、程序代码
需要优化,有运行推出的可能
import com.ververica.cdc.connectors.mysql.source.MySqlSource;
import com.ververica.cdc.connectors.mysql.table.StartupOptions;
import com.ververica.cdc.debezium.JsonDebeziumDeserializationSchema;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.connector.base.DeliveryGuarantee;
import org.apache.flink.connector.kafka.sink.KafkaRecordSerializationSchema;
import org.apache.flink.connector.kafka.sink.KafkaSink;
import org.apache.flink.kafka.shaded.org.apache.kafka.clients.producer.ProducerConfig;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;public class MysqlFlinkCdcKafkaStream {
public static void main(String[] args) {
Configuration conf = new Configuration();
conf.setInteger("rest.port",10000);
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(conf);
MySqlSource<String> mySqlSource = MySqlSource.<String>builder()
.serverTimeZone("Asia/Shanghai")
.hostname("mysql")
.port(3306)
.username("root")
.password("123456")
.databaseList("demo")
//2.3.0cdc必须前面加上数据库点
.tableList("demo.business")
.startupOptions(StartupOptions.initial())
.deserializer(new JsonDebeziumDeserializationSchema())
.build();
DataStreamSource<String> streamSource = env.fromSource(mySqlSource, WatermarkStrategy.noWatermarks(), "mysql-cdc-source");
KafkaSink<String> kafkaSink = KafkaSink.<String>builder()
.setBootstrapServers("kafka:9092")
.setRecordSerializer(
KafkaRecordSerializationSchema.<String>builder()
.setTopic("demo")
.setValueSerializationSchema(new SimpleStringSchema())
.build()
)
.setDeliveryGuarantee(DeliveryGuarantee.EXACTLY_ONCE)
.setTransactionalIdPrefix("demo-")
.setProperty(ProducerConfig.TRANSACTION_TIMEOUT_CONFIG, "300000")
.build();streamSource.sinkTo(kafkaSink);
try {
env.execute("MySQL-Stream_Flink_CDC_SQL-Kafka");
} catch (Exception e) {
throw new RuntimeException(e);
}
}
}
3.2、程序maven打包
pom.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.example.cloud</groupId>
<artifactId>MysqlFlinkCdcKafkaStream</artifactId>
<version>2.4.5</version>
<name>MysqlFlinkCdcKafkaStream</name>
<properties>
<java.version>1.8</java.version>