zookeeper QuorumCnxManager源码解析

参考
https://blog.csdn.net/a814333256/article/details/102049968
http://fenlan.github.io/2018/04/19/ZookeeperLeaderElection/

数据的发送与接收

概览

在这里插入图片描述
主要涉及了四个线程,线程之间靠队列实现通讯。
发送数据时WorkerSender线程从FastLeaderElection.sendqueue中读取数据,调用QuorumCnxManager#toSend将数据传递给QuorumCnxManager.queueSendMap中相应队列。SendWorker线程会循环检测QuorumCnxManager.queueSendMap中有无可发送数据,如果有就调用SendWorker.dout将其发送
接收数据时RecvWorker线程会循环检测RecvWorker.din有无新数据,如果有新数据就将其添加至QuorumCnxManager.recvQueue中。WorkerReceiver线程会循环调用QuorumCnxManager#pollRecvQueue并尝试从中取出数据并将其添加至FastLeaderElection.recvqueue中。

源码解析

下面的源码分析是对"概览"的进一步解释

WorkerSender线程

WorkerSender#run

            public void run() {
                while (!stop) {
                    System.out.println("wjl Election$Messenger$WorkerSender ThreadName:" + Thread.currentThread().getName());

                    try {
                        ToSend m = sendqueue.poll(3000, TimeUnit.MILLISECONDS);
                        if(m == null) continue;

                        process(m);//发送数据
                    } catch (InterruptedException e) {
                        break;
                    }
                }
                LOG.info("WorkerSender is down");
            }

process(m)

            /**
             * Called by run() once there is a new message to send.
             *
             * @param m     message to send
             */
            void process(ToSend m) {
                ByteBuffer requestBuffer = buildMsg(m.state.ordinal(), 
                                                        m.leader,
                                                        m.zxid, 
                                                        m.electionEpoch, 
                                                        m.peerEpoch);
                manager.toSend(m.sid, requestBuffer);
            }

manager.toSend(m.sid, requestBuffer);

    /**
     * Processes invoke this message to queue a message to send. Currently, 
     * only leader election uses it.
     */
    public void toSend(Long sid, ByteBuffer b) {
        /*
         * If sending message to myself, then simply enqueue it (loopback).
         */
        if (this.mySid == sid) {
             b.position(0);
             addToRecvQueue(new Message(b.duplicate(), sid));
            /*
             * Otherwise send to the corresponding thread to send.
             */
        } else {
             /*
              * Start a new connection if doesn't have one already.
              */
             ArrayBlockingQueue<ByteBuffer> bq = new ArrayBlockingQueue<ByteBuffer>(SEND_CAPACITY);
             ArrayBlockingQueue<ByteBuffer> bqExisting = queueSendMap.putIfAbsent(sid, bq);
             if (bqExisting != null) {
                 addToSendQueue(bqExisting, b);//添加到队列中
             } else {
                 addToSendQueue(bq, b);//添加到队列中
             }
             connectOne(sid);
                
        }
    }

SendWorker线程

        @Override
        public void run() {
            threadCnt.incrementAndGet();
            try {
                /**
                 * If there is nothing in the queue to send, then we
                 * send the lastMessage to ensure that the last message
                 * was received by the peer. The message could be dropped
                 * in case self or the peer shutdown their connection
                 * (and exit the thread) prior to reading/processing
                 * the last message. Duplicate messages are handled correctly
                 * by the peer.
                 *
                 * If the send queue is non-empty, then we have a recent
                 * message than that stored in lastMessage. To avoid sending
                 * stale message, we should send the message in the send queue.
                 */
                ArrayBlockingQueue<ByteBuffer> bq = queueSendMap.get(sid);
                if (bq == null || isSendQueueEmpty(bq)) {
                   ByteBuffer b = lastMessageSent.get(sid);
                   if (b != null) {
                       LOG.debug("Attempting to send lastMessage to sid=" + sid);
                       send(b);
                   }
                }
            } catch (IOException e) {
                LOG.error("Failed to send last message. Shutting down thread.", e);
                this.finish();
            }
            
            try {
                while (running && !shutdown && sock != null) {
                    System.out.println("wjl Manager$SendWorker ThreadName:" + Thread.currentThread().getName());
                    ByteBuffer b = null;
                    try {
                        ArrayBlockingQueue<ByteBuffer> bq = queueSendMap
                                .get(sid);
                        if (bq != null) {
                            b = pollSendQueue(bq, 1000, TimeUnit.MILLISECONDS);//尝试从队列中获取数据
                        } else {
                            LOG.error("No queue of incoming messages for " +
                                      "server " + sid);
                            break;
                        }

                        if(b != null){//如果数据不为空,发送数据
                            lastMessageSent.put(sid, b);
                            send(b);
                        }
                    } catch (InterruptedException e) {
                        LOG.warn("Interrupted while waiting for message on queue",
                                e);
                    }
                }
            } catch (Exception e) {
                LOG.warn("Exception when using channel: for id " + sid
                         + " my id = " + QuorumCnxManager.this.mySid
                         + " error = " + e);
            }
            this.finish();
            LOG.warn("Send worker leaving thread");
        }

send(b);

        synchronized void send(ByteBuffer b) throws IOException {
            byte[] msgBytes = new byte[b.capacity()];
            try {
                b.position(0);
                b.get(msgBytes);
            } catch (BufferUnderflowException be) {
                LOG.error("BufferUnderflowException ", be);
                return;
            }
            dout.writeInt(b.capacity());
            dout.write(b.array());
            dout.flush();
        }

RecvWorker线程

        @Override
        public void run() {
            threadCnt.incrementAndGet();
            try {
                while (running && !shutdown && sock != null) {
                    System.out.println("wjl Manager$RecvWorker ThreadName:" + Thread.currentThread().getName());
                    /**
                     * Reads the first int to determine the length of the
                     * message
                     */
                    int length = din.readInt();
                    if (length <= 0 || length > PACKETMAXSIZE) {
                        throw new IOException(
                                "Received packet with invalid packet: "
                                        + length);
                    }
                    /**
                     * Allocates a new ByteBuffer to receive the message
                     */
                    byte[] msgArray = new byte[length];
                    din.readFully(msgArray, 0, length);
                    ByteBuffer message = ByteBuffer.wrap(msgArray);
                    addToRecvQueue(new Message(message.duplicate(), sid));
                }
            } catch (Exception e) {
                LOG.warn("Connection broken for id " + sid + ", my id = "
                         + QuorumCnxManager.this.mySid + ", error = " , e);
            } finally {
                LOG.warn("Interrupting SendWorker");
                sw.finish();
                if (sock != null) {
                    closeSocket(sock);
                }
            }
        }

addToRecvQueue(new Message(message.duplicate(), sid));

    /**
     * Inserts an element in the {@link #recvQueue}. If the Queue is full, this
     * methods removes an element from the head of the Queue and then inserts
     * the element at the tail of the queue.
     *
     * This method is synchronized to achieve fairness between two threads that
     * are trying to insert an element in the queue. Each thread checks if the
     * queue is full, then removes the element at the head of the queue, and
     * then inserts an element at the tail. This three-step process is done to
     * prevent a thread from blocking while inserting an element in the queue.
     * If we do not synchronize the call to this method, then a thread can grab
     * a slot in the queue created by the second thread. This can cause the call
     * to insert by the second thread to fail.
     * Note that synchronizing this method does not block another thread
     * from polling the queue since that synchronization is provided by the
     * queue itself.
     *
     * @param msg
     *          Reference to the message to be inserted in the queue
     */
    public void addToRecvQueue(Message msg) {
        synchronized(recvQLock) {
            if (recvQueue.remainingCapacity() == 0) {
                try {
                    recvQueue.remove();
                } catch (NoSuchElementException ne) {
                    // element could be removed by poll()
                     LOG.debug("Trying to remove from an empty " +
                         "recvQueue. Ignoring exception " + ne);
                }
            }
            try {
                recvQueue.add(msg);
            } catch (IllegalStateException ie) {
                // This should never happen
                LOG.error("Unable to insert element in the recvQueue " + ie);
            }
        }
    }

注意,recvQueue是一个ArrayBlockingQueue<Message>对象,而Message中包含sid,所以这个队列可以被很多peer使用(待验证)。

WorkerReceiver线程

            public void run() {

                Message response;
                while (!stop) {
                    System.out.println("wjl Election$Messenger$WorkerReceive ThreadName:" + Thread.currentThread().getName());
                    // Sleeps on receive
                    try{
                        response = manager.pollRecvQueue(3000, TimeUnit.MILLISECONDS);//获取RecvWorker线程生产的数据
               ..............................
                LOG.info("WorkerReceiver is down");
            }

建立连接

概览

连接的建立分两种,接收和发起
接收:QuorumCnxManager$Listener#run
发起:QuorumCnxManager#connectOne,此方法通常由WorkerSender线程调用

不论接收还是发送,都会根据是否使用sasl判断是否使用异步。如果使用异步,就会调用QuorumCnxManager$QuorumConnectionReqThreadQuorumCnxManager$QuorumConnectionReceiverThread创建任务,并将此任务交由线程池管理

源码解析

下面的源码分析是对"概览"的进一步解释

。。。待完善

集群配置下单节点启动异常

配置文件中是集群关系,但是只启动一个节点时会出现以下两种异常。了解这俩异常可以更清晰理解启动流程

WorkerSender线程下的异常

2021-06-20 08:11:54,686 [myid:2] - WARN  [WorkerSender[myid=2]:QuorumCnxManager@589] - Cannot open channel to 1 at election address localhost/127.0.0.1:3877
java.net.ConnectException: Connection refused: connect
	at java.net.DualStackPlainSocketImpl.waitForConnect(Native Method)
	at java.net.DualStackPlainSocketImpl.socketConnect(DualStackPlainSocketImpl.java:85)
	at java.net.AbstractPlainSocketImpl.doConnect(AbstractPlainSocketImpl.java:350)
	at java.net.AbstractPlainSocketImpl.connectToAddress(AbstractPlainSocketImpl.java:206)
	at java.net.AbstractPlainSocketImpl.connect(AbstractPlainSocketImpl.java:188)
	at java.net.PlainSocketImpl.connect(PlainSocketImpl.java:172)
	at java.net.SocksSocketImpl.connect(SocksSocketImpl.java:392)
	at java.net.Socket.connect(Socket.java:606)
	at org.apache.zookeeper.server.quorum.QuorumCnxManager.connectOne(QuorumCnxManager.java:563)
	at org.apache.zookeeper.server.quorum.QuorumCnxManager.toSend(QuorumCnxManager.java:534)
	at org.apache.zookeeper.server.quorum.FastLeaderElection$Messenger$WorkerSender.process(FastLeaderElection.java:461)
	at org.apache.zookeeper.server.quorum.FastLeaderElection$Messenger$WorkerSender.run(FastLeaderElection.java:442)

这个异常的持续次数与集群中节点数量有关。如果节点数量是2n+1,那么这个异常会出现2n次。

为什么之后不出现了?因为在几个必要条件下这个异常才会出现。
1、FastLeaderElection.sendQueue不为空
2、FastLeaderElection.sendQueue取出的数据不是由本机发送的
3、无法连接到FastLeaderElection.sendQueue中所指定的远程机器

流程中涉及到两个线程,这两个线程用FastLeaderElection.sendQueue实现通讯

1、FastLeaderElection.sendQueue不为空

//WorkerSender#run
public void run() {
                while (!stop) {
                    try {
                        ToSend m = sendqueue.poll(3000, TimeUnit.MILLISECONDS);//如果其他节点一直不启动,这个队列会吐出2n+1个数据
                        if(m == null) continue;//突出2n+1个数据之后,队列中将不会再有数据,直接continue跳到下一次循环
                        process(m);
                    } catch (InterruptedException e) {
                        break;
                    }
                }
                LOG.info("WorkerSender is down");
            }

为什么这个队列只会突出2n+1个数据?这数据哪儿来的?
在QuorumPeer线程中会向队列中添加元素

//QuorumPeer#run
    @Override
    public void run() {
        。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。

        try {
            /*
             * Main loop
             */
            while (running) {
                switch (getPeerState()) {
                case LOOKING:
                    LOG.info("LOOKING");

                    if (Boolean.getBoolean("readonlymode.enabled")) {
                        LOG.info("Attempting to start ReadOnlyZooKeeperServer");

                        ...........................
                        try {
                            roZkMgr.start();
                            setBCVote(null);
                            setCurrentVote(makeLEStrategy().lookForLeader());//其中会向FastLeaderElection.sendQueue中添加元素
                        } catch (Exception e) {
                            ..........................
                        } finally {
                            // If the thread is in the the grace period, interrupt
                            // to come out of waiting.
                            roZkMgr.interrupt();
                            roZk.shutdown();
                        }
                    } else {
                        try {
                            setBCVote(null);
                            setCurrentVote(makeLEStrategy().lookForLeader());
                        } catch (Exception e) {
                            LOG.warn("Unexpected exception", e);
                            setPeerState(ServerState.LOOKING);
                        }
                    }
                    break;
                case OBSERVING:
                    .....................
                    break;
                case FOLLOWING:
                    ..........................
                    break;
                case LEADING:
                    .........
                    break;
                }
            }
        } finally {
            .........................
        }
    }
/**
     * Starts a new round of leader election. Whenever our QuorumPeer
     * changes its state to LOOKING, this method is invoked, and it
     * sends notifications to all other peers.
     */
    public Vote lookForLeader() throws InterruptedException {
        ...........................

            LOG.info("New election. My id =  " + self.getId() +
                    ", proposed zxid=0x" + Long.toHexString(proposedZxid));
            sendNotifications();//here

            /*
             * Loop in which we exchange notifications until we find a leader
             */

            while ((self.getPeerState() == ServerState.LOOKING) &&
                    (!stop)){
                /*
                 * Remove next notification from queue, times out after 2 times
                 * the termination time
                 */
                Notification n = recvqueue.poll(notTimeout,
                        TimeUnit.MILLISECONDS);

                /*
                 * Sends more notifications if haven't received enough.
                 * Otherwise processes new notification.
                 */
                if(n == null){
                    if(manager.haveDelivered()){
                        sendNotifications();//如果此节点一直维持现在这种looking状态,不会走到这里
                    } else {
                        manager.connectAll();
                    }

                    /*
                     * Exponential backoff
                     */
                    int tmpTimeOut = notTimeout*2;
                    notTimeout = (tmpTimeOut < maxNotificationInterval?
                            tmpTimeOut : maxNotificationInterval);
                    LOG.info("Notification time out: " + notTimeout);
                }
                else if(validVoter(n.sid) && validVoter(n.leader)) {
                    /*
                     * Only proceed if the vote comes from a replica in the
                     * voting view for a replica in the voting view.
                     */
                    switch (n.state) {
                    case LOOKING:
                        // If notification > current, replace and send messages out
                        if (n.electionEpoch > logicalclock.get()) {
                            logicalclock.set(n.electionEpoch);
                            recvset.clear();
                            if(totalOrderPredicate(n.leader, n.zxid, n.peerEpoch,
                                    getInitId(), getInitLastLoggedZxid(), getPeerEpoch())) {
                                updateProposal(n.leader, n.zxid, n.peerEpoch);
                            } else {
                                updateProposal(getInitId(),
                                        getInitLastLoggedZxid(),
                                        getPeerEpoch());
                            }
                            sendNotifications();//如果此节点一直维持现在这种looking状态,不会走到这里
                        } else if (n.electionEpoch < logicalclock.get()) {
                            if(LOG.isDebugEnabled()){
                                LOG.debug("Notification election epoch is smaller than logicalclock. n.electionEpoch = 0x"
                                        + Long.toHexString(n.electionEpoch)
                                        + ", logicalclock=0x" + Long.toHexString(logicalclock.get()));
                            }
                            break;
                        } else if (totalOrderPredicate(n.leader, n.zxid, n.peerEpoch,
                                proposedLeader, proposedZxid, proposedEpoch)) {
                            updateProposal(n.leader, n.zxid, n.peerEpoch);
                            sendNotifications();//如果此节点一直维持现在这种looking状态,不会走到这里
                        }

                        ...........
                        break;
                    case OBSERVING:
                        LOG.debug("Notification from observer: " + n.sid);
                        break;
                    case FOLLOWING:
                    case LEADING:
                        .....................
                        break;
                    }
                } else {
                    ............
                }
            }
            return null;
        } finally {
            ...............
        }
    }

FastLeaderElection#sendNotifications中会为所有节点创建ToSend对象,并放到队列中

//FastLeaderElection#sendNotifications
/**
     * Send notifications to all peers upon a change in our vote
     */
    private void sendNotifications() {
        for (QuorumServer server : self.getVotingView().values()) {
            long sid = server.id;

            ToSend notmsg = new ToSend(ToSend.mType.notification,
                    proposedLeader,
                    proposedZxid,
                    logicalclock.get(),
                    QuorumPeer.ServerState.LOOKING,
                    sid,
                    proposedEpoch);
            if(LOG.isDebugEnabled()){
                LOG.debug("Sending Notification: " + proposedLeader + " (n.leader), 0x"  +
                      Long.toHexString(proposedZxid) + " (n.zxid), 0x" + Long.toHexString(logicalclock.get())  +
                      " (n.round), " + sid + " (recipient), " + self.getId() +
                      " (myid), 0x" + Long.toHexString(proposedEpoch) + " (n.peerEpoch)");
            }
            System.out.println("wjl sendqueue.offer(notmsg)");
            sendqueue.offer(notmsg);
        }
    }

至此我们已经知道这个队列中为什么会有2n+1个数据以及数据是哪来的。

现在从Quorum线程回到WorkerSender线程,在WorkerSender线程中如果成功从队列中获取到数据,会调用WorkerSender#process,其中又会调用QuorumCnxManager#toSend方法

/**
     * Processes invoke this message to queue a message to send. Currently, 
     * only leader election uses it.
     */
    public void toSend(Long sid, ByteBuffer b) {
        /*
         * If sending message to myself, then simply enqueue it (loopback).
         */
        if (this.mySid == sid) {//如果这个消息是发送给本机的,就直接放到接收队列中
             b.position(0);
             addToRecvQueue(new Message(b.duplicate(), sid));
            /*
             * Otherwise send to the corresponding thread to send.
             */
        } else {
             /*
              * Start a new connection if doesn't have one already.
              */
             ArrayBlockingQueue<ByteBuffer> bq = new ArrayBlockingQueue<ByteBuffer>(SEND_CAPACITY);
             ArrayBlockingQueue<ByteBuffer> bqExisting = queueSendMap.putIfAbsent(sid, bq);
             if (bqExisting != null) {
                 addToSendQueue(bqExisting, b);
             } else {
                 addToSendQueue(bq, b);
             }
             connectOne(sid);//尝试连接指定的远程节点,但是由于远程节点此时尚未运行,所以此处会报错
                
        }
    }

在QuorumCnxManager#toSend方法中,只有远程节点才会调用connectOne尝试建立连接,所以WorkerSender线程中由于网络连接失败导致的错误只会发生2n次。

QuorumPeer线程中的异常

2021-06-20 08:11:56,717 [myid:2] - WARN  [QuorumPeer[myid=2]/0:0:0:0:0:0:0:0:2182:QuorumCnxManager@589] - Cannot open channel to 1 at election address localhost/127.0.0.1:3877
java.net.ConnectException: Connection refused: connect
	at java.net.DualStackPlainSocketImpl.waitForConnect(Native Method)
	at java.net.DualStackPlainSocketImpl.socketConnect(DualStackPlainSocketImpl.java:85)
	at java.net.AbstractPlainSocketImpl.doConnect(AbstractPlainSocketImpl.java:350)
	at java.net.AbstractPlainSocketImpl.connectToAddress(AbstractPlainSocketImpl.java:206)
	at java.net.AbstractPlainSocketImpl.connect(AbstractPlainSocketImpl.java:188)
	at java.net.PlainSocketImpl.connect(PlainSocketImpl.java:172)
	at java.net.SocksSocketImpl.connect(SocksSocketImpl.java:392)
	at java.net.Socket.connect(Socket.java:606)
	at org.apache.zookeeper.server.quorum.QuorumCnxManager.connectOne(QuorumCnxManager.java:563)
	at org.apache.zookeeper.server.quorum.QuorumCnxManager.connectAll(QuorumCnxManager.java:615)
	at org.apache.zookeeper.server.quorum.FastLeaderElection.lookForLeader(FastLeaderElection.java:846)
	at org.apache.zookeeper.server.quorum.QuorumPeer.run(QuorumPeer.java:958)

相比于WorkerSender中的异常只出现有限次数,QuorumPeer的异常会一直出现

这个异常只涉及到一个线程

当节点处于looking状态时,会调用FastLeaderElection#lookForLeader(上一个异常中也提到了这个方法),然后在这个方法中的一个while循环中不断调用QuorumCnxManager#connectAll

/**
     * Try to establish a connection with each server if one
     * doesn't exist.
     */
    
    public void connectAll(){
        long sid;
        for(Enumeration<Long> en = queueSendMap.keys();
            en.hasMoreElements();){//遍历除自己以外的其他节点,然后尝试连接
            sid = en.nextElement();
            connectOne(sid);
        }      
    }
/**
     * Try to establish a connection to server with id sid.
     * 
     *  @param sid  server id
     */
    synchronized public void connectOne(long sid){
        System.out.println("wjl initiateConnection(sock, sid) ThreadName:" + Thread.currentThread().getName());

        if (!connectedToPeer(sid)){//如果尚无连接
            ....尝试建立连接,因为远程节点尚未运行,所以会报错....
        } else {//如果已有连接
            LOG.debug("There is a connection already for server " + sid);
        }
    }

其中connectedToPeer通过是否有远程节点的SendWorker线程来判断是否已经有连接

/**
*判断当前节点是否有这个远程节点的网络连接
*/
    public boolean connectedToPeer(long peerSid) {
        return senderWorkerMap.get(peerSid) != null;
    }

与多个peer连接通讯

待续

时钟同步

待续

选举算法

选举算法的主要实现方法是FastLeaderElection#lookForLeader,在这个函数中会不断从recvqueue中拿数据,并根据数据中的state分情况处理

while ((self.getPeerState() == ServerState.LOOKING) && (!stop)) {
    /*
     * 从投票消息队列中接收一条消息
     */
    Notification n = recvqueue.poll(notTimeout,
              TimeUnit.MILLISECONDS);
    if (n == null) {...}
    /*
     * 检查n节点是不是参与投票的节点,只有PeerType=PARTICIPANT的节点消息才会参与投票
     * Observers are not contained in this view, only nodes with
     * PeerType=PARTICIPANT.
     */
    else if(self.getVotingView().containsKey(n.sid)) {
        switch (n.state) {
            case LOOKING: 跟自己的投票比较。
            case OBSERVING: 没有操作
            case FOLLOWING:
            case LEADING: 当已经收到LEADING和FOLLOWING表示已经票选出Leader,然后投最后一票给Leader,结束投票
            default: 没有操作
    }
}

LOOKING

// If notification > current, replace and send messages out
if (n.electionEpoch > logicalclock.get()) {//如果自己的选举轮次落后
    logicalclock.set(n.electionEpoch);
    recvset.clear();//清空自己的投票箱
    if(totalOrderPredicate(n.leader, n.zxid, n.peerEpoch,
            getInitId(), getInitLastLoggedZxid(), getPeerEpoch())) {
        updateProposal(n.leader, n.zxid, n.peerEpoch);
    } else {
        updateProposal(getInitId(),
                getInitLastLoggedZxid(),
                getPeerEpoch());
    }
    sendNotifications();
} else if (n.electionEpoch < logicalclock.get()) {
    if(LOG.isDebugEnabled()){
        LOG.debug("Notification election epoch is smaller than logicalclock. n.electionEpoch = 0x"
                + Long.toHexString(n.electionEpoch)
                + ", logicalclock=0x" + Long.toHexString(logicalclock.get()));
    }
    break;
} else if (totalOrderPredicate(n.leader, n.zxid, n.peerEpoch,
        proposedLeader, proposedZxid, proposedEpoch)) {
    updateProposal(n.leader, n.zxid, n.peerEpoch);
    sendNotifications();
}

if(LOG.isDebugEnabled()){
    LOG.debug("Adding vote: from=" + n.sid +
            ", proposed leader=" + n.leader +
            ", proposed zxid=0x" + Long.toHexString(n.zxid) +
            ", proposed election epoch=0x" + Long.toHexString(n.electionEpoch));
}

recvset.put(n.sid, new Vote(n.leader, n.zxid, n.electionEpoch, n.peerEpoch));//更新投票箱

if (termPredicate(recvset,
        new Vote(proposedLeader, proposedZxid,
                logicalclock.get(), proposedEpoch))) {//此节点所持投票在投票箱中是否已经过半

    // Verify if there is any change in the proposed leader
    // 等待一段时间,看有无更优选票
    while((n = recvqueue.poll(finalizeWait,
            TimeUnit.MILLISECONDS)) != null){
        if(totalOrderPredicate(n.leader, n.zxid, n.peerEpoch,
                proposedLeader, proposedZxid, proposedEpoch)){
            recvqueue.put(n);
            break;
        }
    }

    /*
     * This predicate is true once we don't read any new
     * relevant message from the reception queue
     */
    if (n == null) {//如果没有更优选票
        self.setPeerState((proposedLeader == self.getId()) ?
                ServerState.LEADING: learningState());

        Vote endVote = new Vote(proposedLeader,
                                proposedZxid,
                                logicalclock.get(),
                                proposedEpoch);
        leaveInstance(endVote);
        return endVote;
    }
}

LEADING&FOLLOWING

/*
 * Consider all notifications from the same epoch
 * together.
 */
if(n.electionEpoch == logicalclock.get()){// 轮次相符
    recvset.put(n.sid, new Vote(n.leader,
                                  n.zxid,
                                  n.electionEpoch,
                                  n.peerEpoch));
   
    if(ooePredicate(recvset, outofelection, n)) {
        self.setPeerState((n.leader == self.getId()) ?
                ServerState.LEADING: learningState());

        Vote endVote = new Vote(n.leader, 
                n.zxid, 
                n.electionEpoch, 
                n.peerEpoch);
        leaveInstance(endVote);
        return endVote;
    }
}

/*
 * Before joining an established ensemble, verify
 * a majority is following the same leader.
 */
outofelection.put(n.sid, new Vote(n.version,
                                    n.leader,
                                    n.zxid,
                                    n.electionEpoch,
                                    n.peerEpoch,
                                    n.state));

if(ooePredicate(outofelection, outofelection, n)) {
    synchronized(this){
        logicalclock.set(n.electionEpoch);
        self.setPeerState((n.leader == self.getId()) ?
                ServerState.LEADING: learningState());
    }
    Vote endVote = new Vote(n.leader,
                            n.zxid,
                            n.electionEpoch,
                            n.peerEpoch);
    leaveInstance(endVote);
    return endVote;
}

待续

数据同步

待续

疑问

1、electionEpoch和peerEpoch(好像和currentEpoch有关,每次启动时会从currentEpoch文件中加载数据)啥区别?答:区别暂时不知道,只知道目前选票pk过程中使用的是peerEpoch
2、服务器加入集群
3、选举过程中会发送几条数据(比如一个节点因为先后收到多条优于当前所持选票的选票,应该发送多条更新信息吧,且发送信息不受心跳同步机制影响)答:多条,且不跟随心跳

    /**
     * Send notifications to all peers upon a change in our vote
     */
    private void sendNotifications() {
        for (QuorumServer server : self.getVotingView().values()) {
            long sid = server.id;

            ToSend notmsg = new ToSend(ToSend.mType.notification,
                    proposedLeader,
                    proposedZxid,
                    logicalclock.get(),
                    QuorumPeer.ServerState.LOOKING,
                    sid,
                    proposedEpoch);
            if(LOG.isDebugEnabled()){
                LOG.debug("Sending Notification: " + proposedLeader + " (n.leader), 0x"  +
                      Long.toHexString(proposedZxid) + " (n.zxid), 0x" + Long.toHexString(logicalclock.get())  +
                      " (n.round), " + sid + " (recipient), " + self.getId() +
                      " (myid), 0x" + Long.toHexString(proposedEpoch) + " (n.peerEpoch)");
            }
            //查看发送的信息
            System.out.println("wjl Sending Notification: " + proposedLeader + " (n.leader), 0x"  +
                    Long.toHexString(proposedZxid) + " (n.zxid), 0x" + Long.toHexString(logicalclock.get())  +
                    " (n.round), " + sid + " (recipient), " + self.getId() +
                    " (myid), 0x" + Long.toHexString(proposedEpoch) + " (n.peerEpoch)");
            sendqueue.offer(notmsg);
        }
        //查看调用堆栈
        StackTraceElement stack[] = Thread.currentThread().getStackTrace();
        for (int i = 0; i < stack.length; i++) {
            System.out.println("类路径:"+stack[i].getClassName() + " 方法名:" + stack[i].getMethodName() + " 调用行号:"+stack[i].getLineNumber());
        }
    }
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值