客户端代码分析
首先我是从官方实例example项目开始。
项目结构
其中,assembly文件夹中都是一些xml配置,我们将其忽略就可以了。
bin目录里面是一些可执行文件,如果我们将变量配置好,是可以直接通过可执行文件来执行,实现自己的功能。
conf文件夹里是日志的配置文件,忽略过去就好,毕竟重点在代码那块。
resource文件夹里就是一些配置了,就是通过更改这些配置文件来使可执行文件有路径可寻。
接下来就是重点了:
java项目
包结构是com.alibaba.otter.canal.example。再往下是一些测试类和kafka、rocketmq包。
我们首先来看外面的这些类。
BaseCanalClientTest.java
public class BaseCanalClientTest {
protected final static Logger logger = LoggerFactory.getLogger(AbstractCanalClientTest.class);
protected static final String SEP = SystemUtils.LINE_SEPARATOR;
protected static final String DATE_FORMAT = "yyyy-MM-dd HH:mm:ss";
protected volatile boolean running = false;
protected Thread.UncaughtExceptionHandler handler = new Thread.UncaughtExceptionHandler() {
public void uncaughtException(Thread t, Throwable e) {
logger.error("parse events has an error", e);
}
};
//线程
protected Thread thread = null;
//canal连接器
protected CanalConnector connector;
//输出格式
protected static String context_format = null;
protected static String row_format = null;
protected static String transaction_format = null;
protected String destination;
protected void printSummary(Message message, long batchId, int size);
protected String buildPositionForDump(Entry entry);
protected void printEntry(List<Entry> entrys);
protected void printColumn(List<Column> columns);
protected void printXAInfo(List<Pair> pairs);
public void setConnector(CanalConnector connector);
//获取当前Entry的 GTID信息示例
public static String getCurrentGtid(CanalEntry.Header header);
//获取当前Entry的 GTID Sequence No信息示例
public static String getCurrentGtidSn(CanalEntry.Header header);
//获取当前Entry的 GTID Last Committed信息示例
public static String getCurrentGtidLct(CanalEntry.Header header);
可以看出,这个类里都是一些变量,打印方法,获取某些属性的方法,按图索骥,就不一一介绍了,后面有用到再说。
AbstractCanalClientTest.java
public class AbstractCanalClientTest extends BaseCanalClientTest{
//构造方法:目的地和连接器
public AbstractCanalClientTest(String destination) {
this(destination, null);
}
public AbstractCanalClientTest(String destination, CanalConnector connector) {
this.destination = destination;
this.connector = connector;
}
//另开一线程用于获取指定数量的数据
protected void start();
//线程终止
protected void stop();
//线程所调用的方法
protected void process(){
int batchSize = 5 * 1024;
while (running) {
try {
MDC.put("destination", destination);
connector.connect();
connector.subscribe();
while (running) {
Message message = connector.getWithoutAck(batchSize); // 获取指定数量的数据
long batchId = message.getId();
int size = message.getEntries().size();
if (batchId == -1 || size == 0) {
// try {
// Thread.sleep(1000);
// } catch (InterruptedException e) {
// }
} else {
printSummary(message, batchId, size);
printEntry(message.getEntries());
}
connector.ack(batchId); // 提交确认
// connector.rollback(batchId); // 处理失败, 回滚数据
}
} catch (Exception e) {
logger.error("process error!", e);
} finally {
connector.disconnect();
MDC.remove("destination");
}
}
};
}
可是看出,这个类也是为其他类提供基础支持的一个类
SimpleCanalClientTest.java
/**
* 单机模式的测试例子
*/
public class SimpleCanalClientTest extends AbstractCanalClientTest {
public static void main(String args[]) {
// 根据ip,直接创建链接,无HA的功能
String destination = "example";
//获取本机ip
String ip = AddressUtils.getHostIp();
//创建canal连接器,依赖于canal.client包
CanalConnector connector = CanalConnectors.newSingleConnector(new InetSocketAddress(ip, 11111),
destination,
"",
"");
final SimpleCanalClientTest clientTest = new SimpleCanalClientTest(destination);
clientTest.setConnector(connector);
clientTest.start();
Runtime.getRuntime().addShutdownHook(new Thread() {
public void run() {
try {
logger.info("## stop the canal client");
clientTest.stop();
} catch (Throwable e) {
logger.warn("##something goes wrong when stopping canal:", e);
} finally {
logger.info("## canal client is down.");
}
}
});
}
}
这个类是基于canal单机版进行测试,目的地是去example,ip默认取得本机ip。调用AbstractCanalClientTest类的start()方法,用于获取数据。
ClusterCanalClientTest.java
/**
* 集群模式的测试例子
*/
public class ClusterCanalClientTest extends AbstractCanalClientTest {
public ClusterCanalClientTest(String destination){
super(destination);
}
public static void main(String args[]) {
String destination = "example";
// 基于固定canal server的地址,建立链接,其中一台server发生crash,可以支持failover
// CanalConnector connector = CanalConnectors.newClusterConnector(
// Arrays.asList(new InetSocketAddress(
// AddressUtils.getHostIp(),
// 11111)),
// "stability_test", "", "");
// 基于zookeeper动态获取canal server的地址,建立链接,其中一台server发生崩溃crash,可以支持故障转移failover
CanalConnector connector = CanalConnectors.newClusterConnector("127.0.0.1:2181", destination, "", "");
final ClusterCanalClientTest clientTest = new ClusterCanalClientTest(destination);
clientTest.setConnector(connector);
clientTest.start();
Runtime.getRuntime().addShutdownHook(new Thread() {
public void run() {
try {
logger.info("## stop the canal client");
clientTest.stop();
} catch (Throwable e) {
logger.warn("##something goes wrong when stopping canal:", e);
} finally {
logger.info("## canal client is down.");
}
}
});
}
}
这个类与SimpleCanalClientTest类其实差不多,无非是一个为单机版,一个是集群版,集群版可根据zk来获取连接。并且当其中一台宕机时,可以进行故障转移不影响正常使用。
SimpleCanalClientPermanceTest.java
package com.alibaba.otter.canal.example;
import java.net.InetSocketAddress;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.TimeUnit;
import com.alibaba.otter.canal.client.CanalConnector;
import com.alibaba.otter.canal.client.CanalConnectors;
import com.alibaba.otter.canal.client.impl.SimpleCanalConnector;
import com.alibaba.otter.canal.protocol.Message;
public class SimpleCanalClientPermanceTest {
public static void main(String args[]) {
String destination = "example";
String ip = "123.126.41.204";
int port = 20454;
int batchSize = 1024;
int count = 0;
int sum = 0;
int perSum = 0;
long start = System.currentTimeMillis();
long end = 0;
final ArrayBlockingQueue<Long> queue = new ArrayBlockingQueue<Long>(100);
try {
final CanalConnector connector = CanalConnectors.newSingleConnector(new InetSocketAddress(ip, port),
destination,
"",
"");
Thread ackThread = new Thread(new Runnable() {
@Override
public void run() {
while (true) {
try {
long batchId = queue.take();
connector.ack(batchId);
} catch (InterruptedException e) {
}
}
}
});
ackThread.start();
((SimpleCanalConnector) connector).setLazyParseEntry(true);
connector.connect();
connector.subscribe();
while (true) {
Message message = connector.getWithoutAck(batchSize, 100L, TimeUnit.MILLISECONDS);
long batchId = message.getId();
int size = message.getRawEntries().size();
sum += size;
perSum += size;
count++;
queue.add(batchId);
if (count % 10 == 0) {
end = System.currentTimeMillis();
if (end - start != 0) {
long tps = (perSum * 1000) / (end - start);
System.out.println(" total : " + sum + " , current : " + perSum + " , cost : " + (end - start)
+ " , tps : " + tps);
start = end;
perSum = 0;
}
}
}
} catch (Throwable e) {
e.printStackTrace();
}
}
}
在我看来,这个应该是对数据库进行fetch。canal 记住 client 最新的position。 如果是第一次 fetch,则会从 canal 中保存的最老一条数据开始输出。
至于后面则是对fetch的数据进行一种遍历输出。