RocketMQ为了更好的保证数据的安全性,除了对数据本地持久化以外,还增加了主从同步,即将主节点的数据同步到从节点,在4.5.1版本后,也可启到主从节点切换的高可用的作用,因为该版本以后,支持通过DLedger来实现主从动态选举。主从同步,是通过线程任务来执行的,并且涉及多个线程任务,有Socket接收线程、Socket读取线程、Socket写线程、业务处理线程。有关Socket的都通过NIO来处理了,这块知识,有兴趣的读者自行去学习,本章不做更多关于NIO的知识讲解。接入来先看看主从同步涉及的处理方法及调用链:
HAService.start
->acceptSocketService.beginAccept() // NIO准备工作(Socket创建、绑定、Selector注册等)
->acceptSocketService.start() // 与从节点连接传输的线程
->HAConnection.start() // run方法里面有调用
->readSocketService.start() // 读线程
->readSocketService.processReadEvent
->HAConnection.this.haService.notifyTransferSome
->writeSocketService.start() // 写线程,就是把消息数据写到从节点的
->groupTransferService.start() // 业务处理线程
->haClient.start() // 从节点服务
既然涉及到同步双方(主从节点),那么到底是怎么交互的呢,老规矩,先从几个问题开始。
1、主从节点到底谁先说话(前提是两者已经完成网络连接)?
答:由从节点开始说话。
2、第一句说的啥?
答:从节点主动告诉主节点,当前自己本地最大的偏移(commitlog文件的最大偏移)。
3、主节点收到从节点的说话后,做了什么?
答:主节点先根据从节点的偏移,确定要不要同步,如果要,就把需要同步的那部分消息序列化传输到从节点。
4、从节点接收到同步消息后,又做了什么?
答:从节点收到消息后,存储到commitlog文件,然后再一次上报偏移量(如问题1)
以上就是主从同步要经过的过程及各自承担的角色。接下来就通过源码来讲解具体的实现。
13.1 主从节点网络连接
服务端(主)通过NIO创建服务端Socket
这个类是 AcceptSocketService
public void beginAccept() throws Exception {
// 创建 ServerSocketChannel
this.serverSocketChannel = ServerSocketChannel.open();
// 创建 Selector
this.selector = RemotingUtil.openSelector();
// 设置SO_REUSEADDR
this.serverSocketChannel.socket().setReuseAddress(true);
// 绑定地址
this.serverSocketChannel.socket().bind(this.socketAddressListen);
// 设置为非阻塞
this.serverSocketChannel.configureBlocking(false);
// 注册连接事件至 Selector
this.serverSocketChannel.register(this.selector, SelectionKey.OP_ACCEPT);
}
启动服务端线程等待客户端连接
这段源码在类 HAClient 中
public void run() {
log.info(this.getServiceName() + " service started");
while (!this.isStopped()) {
try {
// 阻塞等待监听事件
this.selector.select(1000);
// 获取已到达的事件key集合
Set<SelectionKey> selected = this.selector.selectedKeys();
// 判空
if (selected != null) {
// 遍历处理
for (SelectionKey k : selected) {
// 判断事件类型
if ((k.readyOps() & SelectionKey.OP_ACCEPT) != 0) {
//获取selectorKey 对应的 Channel
SocketChannel sc = ((ServerSocketChannel) k.channel()).accept();
if (sc != null) {
HAService.log.info("HAService receive new connection, "
+ sc.socket().getRemoteSocketAddress());
try {
// 创建HAConnection
HAConnection conn = new HAConnection(HAService.this, sc);
conn.start();// 启动 HAConnection
HAService.this.addConnection(conn);
} catch (Exception e) {
log.error("new HAConnection exception", e);
sc.close();
}
}
} else {
log.warn("Unexpected ops in select " + k.readyOps());
}
}
selected.clear();
}
} catch (Exception e) {
log.error(this.getServiceName() + " service has exception.", e);
}
}
log.info(this.getServiceName() + " service end");
}
客户端(从)通过NIO连接服务端
这个类是 HAClient
public HAClient() throws IOException {
// 创建 Selector
this.selector = RemotingUtil.openSelector();
}
public static Selector openSelector() throws IOException {
Selector result = null;
if (isLinuxPlatform()) { // 判断是否 linux 平台
try {
// linux平台,就通过Epoll来实现Selector
final Class<?> providerClazz = Class.forName("sun.nio.ch.EPollSelectorProvider");
if (providerClazz != null) {
try {
final Method method = providerClazz.getMethod("provider");
if (method != null) {
final SelectorProvider selectorProvider = (SelectorProvider) method.invoke(null);
if (selectorProvider != null) {
result = selectorProvider.openSelector();
}
}
} catch (final Exception e) {
log.warn("Open ePoll Selector for linux platform exception", e);
}
}
} catch (final Exception e) {
// ignore
}
}
if (result == null) {
result = Selector.open();
}
return result;
}
private boolean connectMaster() throws ClosedChannelException {
if (null == socketChannel) {
String addr = this.masterAddress.get();
if (addr != null) {
SocketAddress socketAddress = RemotingUtil.string2SocketAddress(addr);
if (socketAddress != null) {
// 连接服务端
this.socketChannel = RemotingUtil.connect(socketAddress);
if (this.socketChannel != null) {
// 注册读监听事件
this.socketChannel.register(this.selector, SelectionKey.OP_READ);
}
}
}
// 取出commitlog的最大偏移,作为当前要上报的偏移值
this.currentReportedOffset = HAService.this.defaultMessageStore.getMaxPhyOffset();
this.lastWriteTimestamp = System.currentTimeMillis();
}
return this.socketChannel != null;
}
13.2 从节点主动上报偏移值
该方法在 HAClint 类中
public void run() {
log.info(this.getServiceName() + " service started");
while (!this.isStopped()) {
try {
if (this.connectMaster()) { // 连接到主节点
//判断是否需要汇报偏移值,间隔需要大于心跳的时间(5s)
if (this.isTimeToReportOffset()) {
// 向主节点汇报
boolean result = this.reportSlaveMaxOffset(this.currentReportedOffset);
if (!result) {
// 汇报完成,即使是失败,也要关闭连接
this.closeMaster();
}
}
// select 阻塞,等待监听事件触发
this.selector.select(1000);
// 拉取主节点的信息,主要是同步的消息
boolean ok = this.processReadEvent();
if (!ok) {
this.closeMaster(); // 连接关闭
}
// 再次上报最新偏移值
if (!reportSlaveMaxOffsetPlus()) {
continue;
}
//检查时间距离上次同步进度的时间间隔
long interval =
HAService.this.getDefaultMessageStore().getSystemClock().now()
- this.lastWriteTimestamp;
//如果间隔大于心跳的时间,那么就关闭
if (interval > HAService.this.getDefaultMessageStore().getMessageStoreConfig()
.getHaHousekeepingInterval()) {
log.warn("HAClient, housekeeping, found this connection[" + this.masterAddress
+ "] expired, " + interval);
this.closeMaster();
log.warn("HAClient, master not response some time, so close connection");
}
} else {
// 连接失败,再等5s
this.waitForRunning(1000 * 5);
}
} catch (Exception e) {
log.warn(this.getServiceName() + " service has exception. ", e);
// 其他失败,再等5s
this.waitForRunning(1000 * 5);
}
}
log.info(this.getServiceName() + " service end");
}
13.3 主节点接收从节点的偏移值上报
这段源码在 HAConnection.ReadSocketService 类中
public void run() {
HAConnection.log.info(this.getServiceName() + " service started");
while (!this.isStopped()) {
try {
this.selector.select(1000);
// 处理从节点上报的偏移值
boolean ok = this.processReadEvent();
if (!ok) {
HAConnection.log.error("processReadEvent error");
break;
}
long interval = HAConnection.this.haService.getDefaultMessageStore().getSystemClock().now() - this.lastReadTimestamp;
if (interval > HAConnection.this.haService.getDefaultMessageStore().getMessageStoreConfig().getHaHousekeepingInterval()) {
log.warn("ha housekeeping, found this connection[" + HAConnection.this.clientAddr + "] expired, " + interval);
break;
}
} catch (Exception e) {
HAConnection.log.error(this.getServiceName() + " service has exception.", e);
break;
}
}
this.makeStop();
writeSocketService.makeStop();
haService.removeConnection(HAConnection.this);
HAConnection.this.haService.getConnectionCount().decrementAndGet();
SelectionKey sk = this.socketChannel.keyFor(this.selector);
if (sk != null) {
sk.cancel();
}
try {
this.selector.close();
this.socketChannel.close();
} catch (IOException e) {
HAConnection.log.error("", e);
}
HAConnection.log.info(this.getServiceName() + " service end");
}
private boolean processReadEvent() {
int readSizeZeroTimes = 0;
if (!this.byteBufferRead.hasRemaining()) {
this.byteBufferRead.flip();
this.processPostion = 0;
}
while (this.byteBufferRead.hasRemaining()) {
try {
// 开始读取
int readSize = this.socketChannel.read(this.byteBufferRead);
if (readSize > 0) {
readSizeZeroTimes = 0;
this.lastReadTimestamp = HAConnection.this.haService.getDefaultMessageStore().getSystemClock().now();
// 偏移值占8位,所以要大于8
if ((this.byteBufferRead.position() - this.processPostion) >= 8) {
// 因为涉及多个客户端,这里需要维护好一批值,所以这里要取中当前线程的一个8位
int pos = this.byteBufferRead.position() - (this.byteBufferRead.position() % 8);
// 取出来了
long readOffset = this.byteBufferRead.getLong(pos - 8);
this.processPostion = pos;
// 设置从slave读取到的偏移值,在写线程中会用到
HAConnection.this.slaveAckOffset = readOffset;
if (HAConnection.this.slaveRequestOffset < 0) {
HAConnection.this.slaveRequestOffset = readOffset;
log.info("slave[" + HAConnection.this.clientAddr + "] request offset " + readOffset);
}
//唤醒阻塞的线程,唤醒被阻塞的消息写入的线程
HAConnection.this.haService.notifyTransferSome(HAConnection.this.slaveAckOffset);
}
} else if (readSize == 0) {
if (++readSizeZeroTimes >= 3) {
break;
}
} else {
log.error("read socket[" + HAConnection.this.clientAddr + "] < 0");
return false;
}
} catch (IOException e) {
log.error("processReadEvent exception", e);
return false;
}
}
return true;
}
13.4 主节点写内容至从节点
该源码在 WriteSocketService 类中
public void run() {
HAConnection.log.info(this.getServiceName() + " service started");
while (!this.isStopped()) {
try {
this.selector.select(1000);
// 默认就是 -1,表示slave还没有发出同步请求
if (-1 == HAConnection.this.slaveRequestOffset) {
Thread.sleep(10);
continue;
}
// -1 表示初次同步
if (-1 == this.nextTransferFromWhere) {
// 为0,进一步表现slave是第一次同步
if (0 == HAConnection.this.slaveRequestOffset) {
// 本地commitlog最大偏移值
long masterOffset = HAConnection.this.haService.getDefaultMessageStore().getCommitLog().getMaxOffset();
// 求出当前最大偏移值所在commitlog文件的起始偏移值,因为是第一次,所以得从头开始传输
masterOffset =
masterOffset
- (masterOffset % HAConnection.this.haService.getDefaultMessageStore().getMessageStoreConfig()
.getMapedFileSizeCommitLog());
if (masterOffset < 0) {
masterOffset = 0;
}
// 设置下一次传输的起始偏移
this.nextTransferFromWhere = masterOffset;
} else {
// 不是第一次,直接设置为下次偏移值
this.nextTransferFromWhere = HAConnection.this.slaveRequestOffset;
}
log.info("master transfer data from " + this.nextTransferFromWhere + " to slave[" + HAConnection.this.clientAddr
+ "], and slave request " + HAConnection.this.slaveRequestOffset);
}
// 判断上一次同步是否完成,这一段if...else,是针对非首次同步时才会执行
if (this.lastWriteOver) {
// 与上一次完成的时间间隔
long interval =
HAConnection.this.haService.getDefaultMessageStore().getSystemClock().now() - this.lastWriteTimestamp;
// 超过心跳时间(5s)
if (interval > HAConnection.this.haService.getDefaultMessageStore().getMessageStoreConfig()
.getHaSendHeartbeatInterval()) {
// 构建请求头,12字节
this.byteBufferHeader.position(0);
this.byteBufferHeader.limit(headerSize);
this.byteBufferHeader.putLong(this.nextTransferFromWhere);
this.byteBufferHeader.putInt(0);
this.byteBufferHeader.flip();
// 消息同步,发送,有兴趣的读者自行进去看这个方法吧,就是一些NIO网络传输的,这里就不多讲了
this.lastWriteOver = this.transferData();
if (!this.lastWriteOver)
continue;
}
} else {
// 上一次同步没有完成,继续发送同步
this.lastWriteOver = this.transferData();
if (!this.lastWriteOver)
continue;
}
// 从 nextTransferFromWhere 位置取出要同步的数据
SelectMappedBufferResult selectResult =
HAConnection.this.haService.getDefaultMessageStore().getCommitLogData(this.nextTransferFromWhere);
if (selectResult != null) {
int size = selectResult.getSize();
// 判断单次同步数据的大小,不能超过32kb
if (size > HAConnection.this.haService.getDefaultMessageStore().getMessageStoreConfig().getHaTransferBatchSize()) {
size = HAConnection.this.haService.getDefaultMessageStore().getMessageStoreConfig().getHaTransferBatchSize();
}
long thisOffset = this.nextTransferFromWhere;
this.nextTransferFromWhere += size;
selectResult.getByteBuffer().limit(size);
this.selectMappedBufferResult = selectResult;
// 同步请求头
this.byteBufferHeader.position(0);
this.byteBufferHeader.limit(headerSize);
this.byteBufferHeader.putLong(thisOffset);
this.byteBufferHeader.putInt(size);
this.byteBufferHeader.flip();
// 数据传输
this.lastWriteOver = this.transferData();
} else {
// 没有取到数据,就等待下一次执行
HAConnection.this.haService.getWaitNotifyObject().allWaitForRunning(100);
}
} catch (Exception e) {
HAConnection.log.error(this.getServiceName() + " service has exception.", e);
break;
}
}
// 下面是一些扫尾和关闭工作
HAConnection.this.haService.getWaitNotifyObject().removeFromWaitingThreadTable();
if (this.selectMappedBufferResult != null) {
this.selectMappedBufferResult.release();
}
this.makeStop();
readSocketService.makeStop();
haService.removeConnection(HAConnection.this);
SelectionKey sk = this.socketChannel.keyFor(this.selector);
if (sk != null) {
sk.cancel();
}
try {
this.selector.close();
this.socketChannel.close();
} catch (IOException e) {
HAConnection.log.error("", e);
}
HAConnection.log.info(this.getServiceName() + " service end");
}
13.5 主线程等待同步
前面1-4节讲的都是同步操作的背后逻辑,是异步的,但是对于用户同步发送消息的整个是同步的,那么主线程就要等待同步的完成,这一节,咱们看看主线程做了什么。
进入到CommitLog.handleHA方法
public void handleHA(AppendMessageResult result, PutMessageResult putMessageResult, MessageExt messageExt) {
if (BrokerRole.SYNC_MASTER == this.defaultMessageStore.getMessageStoreConfig().getBrokerRole()) {
HAService service = this.defaultMessageStore.getHaService();
if (messageExt.isWaitStoreMsgOK()) {
// slave 是否Ok
if (service.isSlaveOK(result.getWroteOffset() + result.getWroteBytes())) {
// 重点看这点,创建一个请求
GroupCommitRequest request = new GroupCommitRequest(result.getWroteOffset() + result.getWroteBytes());
// 将请求放到列表
service.putRequest(request);
// 唤醒/通知线程任务去执行新提交的请求
service.getWaitNotifyObject().wakeupAll();
// 等待任务的执行
boolean flushOK =
request.waitForFlush(this.defaultMessageStore.getMessageStoreConfig().getSyncFlushTimeout());
// 等待超时,就返回超时错误
if (!flushOK) {
log.error("do sync transfer other node, wait return, but failed, topic: " + messageExt.getTopic() + " tags: "
+ messageExt.getTags() + " client address: " + messageExt.getBornHostNameString());
putMessageResult.setPutMessageStatus(PutMessageStatus.FLUSH_SLAVE_TIMEOUT);
}
}
// Slave 有问题
else {
// 返回 slave不可用错误
putMessageResult.setPutMessageStatus(PutMessageStatus.SLAVE_NOT_AVAILABLE);
}
}
}
}
HAService.putRequest
public void putRequest(final CommitLog.GroupCommitRequest request) {
// 找到 GroupTransferService,这个类继承ServiceThread,是一个线程任务,继续往后
this.groupTransferService.putRequest(request);
}
GroupTransferService任务执行
public void run() {
log.info(this.getServiceName() + " service started");
while (!this.isStopped()) { // 循环执行
try {
this.waitForRunning(10);
this.doWaitTransfer(); // 主要逻辑在这个方法中
} catch (Exception e) {
log.warn(this.getServiceName() + " service has exception. ", e);
}
}
log.info(this.getServiceName() + " service end");
}
private void doWaitTransfer() {
synchronized (this.requestsRead) {
if (!this.requestsRead.isEmpty()) {
// 遍历请求列表
for (CommitLog.GroupCommitRequest req : this.requestsRead) {
// 同步后的偏移大于等于本次要同步的最大偏移,那就证明已经同步成功了
boolean transferOK = HAService.this.push2SlaveMaxOffset.get() >= req.getNextOffset();
// 没有成功,就遍历5次,为什么是5次呢,因为默认5秒,1秒一次
for (int i = 0; !transferOK && i < 5; i++) {
this.notifyTransferObject.waitForRunning(1000);// 等待1秒
// 再次判断 push2SlaveMaxOffset,这个值在方法 notifyTransferSome中修改
transferOK = HAService.this.push2SlaveMaxOffset.get() >= req.getNextOffset();
}
if (!transferOK) {
log.warn("transfer messsage to slave timeout, " + req.getNextOffset());
}
// 唤醒/通知主线程
req.wakeupCustomer(transferOK);
}
this.requestsRead.clear();
}
}
}