1.简介
本篇使用Cnal监控mysql中binglog日志的变化,解析二进制日志为map类型的数据,并引入Protobuf实现对象的序列化,写入到kafka,生成实时数仓ODS层的原始数据。
2.代码实现
2.1项目的入口
package com.bigdata.app;
import com.bigdata.client.CanalClient;
/**
* @ description: 项目启动类
* @ author: spencer
* @ date: 2021/1/12 13:41
*/
public class CanalClientApp {
public static void main(String[] args){
CanalClient canalClient = new CanalClient();
canalClient.start();
}
}
2.2 canal客户端
package com.bigdata.client;
import com.alibaba.otter.canal.client.CanalConnector;
import com.alibaba.otter.canal.client.CanalConnectors;
import com.alibaba.otter.canal.protocol.CanalEntry;
import com.alibaba.otter.canal.protocol.Message;
import com.bigdata.bean.CanalRowData;
import com.bigdata.constant.Constant;
import com.bigdata.kafka.KafkaSender;
import com.bigdata.util.FlinkUtil;
import com.bigdata.util.JsonUtil;
import com.google.protobuf.InvalidProtocolBufferException;
import java.net.InetSocketAddress;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* @ description: Canal客户端
* 1.主要实现客户端连接器初始化
* 2.数据解析以及发送到下游
* @ author: spencer
* @ date: 2021/1/12 13:42
*/
public class CanalClient {
// 每次拉取binglog的数量
public static final Integer BATCH_SIZE = 5 * 1024;
// canal客户端连接器
public static CanalConnector canalConnector = null;
// kafka生产者
KafkaSender kafkaSender = null;
List<InetSocketAddress> socketAddressList = null;
public CanalClient() {
// canalConnector = CanalConnectors.newClusterConnector(
// FlinkUtil.getParam().get(Constant.ZOOKEEPER_SERVER_IP),
// FlinkUtil.getParam().get(Constant.CANAL_SERVER_DESTINATION),
// FlinkUtil.getParam().get(Constant.CANAL_SERVER_USERNAME),
// FlinkUtil.getParam().get(Constant.CANAL_SERVER_PASSWORD)
// );
kafkaSender = new KafkaSender();
String[] canalServers = FlinkUtil.getParam().get(Constant.CANAL_SERVER_IP).split(",");
socketAddressList = new ArrayList<>();
for (String canalServer : canalServers) {
socketAddressList.add(new InetSocketAddress(canalServer, FlinkUtil.getParam().getInt(Constant.CANAL_SERVER_PORT)));
}
canalConnector = CanalConnectors.newClusterConnector(
socketAddressList,
FlinkUtil.getParam().get(Constant.CANAL_SERVER_DESTINATION),
FlinkUtil.getParam().get(Constant.CANAL_SERVER_USERNAME),
FlinkUtil.getParam().get(Constant.CANAL_SERVER_PASSWORD)
);
}
/**
* 实现对数据的解析,并写入到kafka
*/
public void start() {
try {
canalConnector.connect();
System.out.println(canalConnector);
canalConnector.rollback();
canalConnector.subscribe(FlinkUtil.getParam().get(Constant.CANAL_SUBSCRIBE_FILTER));
while (true) {
Message message = canalConnector.getWithoutAck(BATCH_SIZE);
// 注意先获取id,再获取size
long batchId = message.getId();
int size = message.getEntries().size();
if (size == 0 || size == -1) {
//