一、集群模式
1、数据同步总流程
1)进入QuorumPeerMain .main()方法
public static void main(String[] args) {
QuorumPeerMain main = new QuorumPeerMain();
main.initializeAndRun(args);
2)QuorumPeerMain.runFromConfig(config)方法
if (args.length == 1 && config.servers.size() > 0) {
runFromConfig(config); // 集群模式
} else { // there is only server in the quorum -- run as standalone
ZooKeeperServerMain.main(args);
}
public void runFromConfig(QuorumPeerConfig config) throws IOException {
try {
//创建NIOServerCnxnFactory:serverCnxnFactoryName = NIOServerCnxnFactory.class.getName();
ServerCnxnFactory cnxnFactory = ServerCnxnFactory.createFactory();
// NIOServerCnxnFactory.configure()
cnxnFactory.configure(config.getClientPortAddress(),config.getMaxClientCnxns());
quorumPeer = getQuorumPeer();
quorumPeer.setQuorumPeers(config.getServers());
quorumPeer.setTxnFactory(new FileTxnSnapLog(new File(config.getDataLogDir()),
new File(config.getDataDir())));
quorumPeer.setElectionType(config.getElectionAlg());
quorumPeer.setMyid(config.getServerId());
quorumPeer.setTickTime(config.getTickTime());
quorumPeer.setInitLimit(config.getInitLimit());
quorumPeer.setSyncLimit(config.getSyncLimit());
......
quorumPeer.setQuorumCnxnThreadsSize(config.quorumCnxnThreadsSize);
3)初始化quorumPeer并启动
quorumPeer.initialize();
quorumPeer.start();
quorumPeer.join();
} catch (InterruptedException e) { // warn, but generally this is ok
LOG.warn("Quorum Peer interrupted", e);
}
}
@Override
public void configure(InetSocketAddress addr, int maxcc) throws IOException {
configureSaslLogin();
thread = new ZooKeeperThread(this, "NIOServerCxn.Factory:" + addr);
thread.setDaemon(true);
maxClientCnxns = maxcc;
this.ss = ServerSocketChannel.open();
ss.socket().setReuseAddress(true);
LOG.info("binding to port " + addr);
ss.socket().bind(addr);
ss.configureBlocking(false);
ss.register(selector, SelectionKey.OP_ACCEPT);
}
@Override
public synchronized void start() {
loadDataBase(); // 将快照信息加载到内存
cnxnFactory.start(); // 构建读写request
startLeaderElection(); // 集群选举
super.start(); //实际调用QuorumPeer.run()方法
}
4)loadDataBase()把快照加载到内存
private void loadDataBase() {
File updating = new File(getTxnFactory().getSnapDir(),UPDATING_EPOCH_FILENAME);
try {
zkDb.loadDataBase();
long lastProcessedZxid = zkDb.getDataTree().lastProcessedZxid;
long epochOfZxid = ZxidUtils.getEpochFromZxid(lastProcessedZxid);
try {
currentEpoch = readLongFromFile(CURRENT_EPOCH_FILENAME);
if (epochOfZxid > currentEpoch && updating.exists()) {
setCurrentEpoch(epochOfZxid);
}
}
try {
acceptedEpoch = readLongFromFile(ACCEPTED_EPOCH_FILENAME);
}
}
}
5)NIOServerCnxnFactory.start(); 前面介绍过该方法,通过run()方法构建读写的request,
6)进行集群选举startLeaderElection();
7)执行super.start(); 实际调用QuorumPeer.run()方法
8)判断服务器的状态进行初始化QuorumPeer.run()
case LOOKING:
LOG.info("LOOKING");
case OBSERVING:
LOG.info("OBSERVING");
setObserver(makeObserver(logFactory));
observer.observeLeader();
case FOLLOWING:
setFollower(makeFollower(logFactory));
follower.followLeader();
case LEADING:
setLeader(makeLeader(logFactory));
leader.lead();
setLeader(null);
1.1、OBSERVING
2)创建ObserverZooKeeperServer
protected Observer makeObserver(FileTxnSnapLog logFactory) throws IOException {
return new Observer(this, new ObserverZooKeeperServer(logFactory,
this, new ZooKeeperServer.BasicDataTreeBuilder(), this.zkDb));
}
3)以observer.observeLeader();方式与leader交互
4)通过findLeader()方法找到leader服务器
void observeLeader() throws InterruptedException {
zk.registerJMX(new ObserverBean(this, zk), self.jmxLocalPeerBean);
try {
QuorumServer leaderServer = findLeader();
try {
connectToLeader(leaderServer.addr, leaderServer.hostname);
long newLeaderZxid = registerWithLeader(Leader.OBSERVERINFO);
syncWithLeader(newLeaderZxid);
QuorumPacket qp = new QuorumPacket();
while (this.isRunning()) {
readPacket(qp);
processPacket(qp);
}
}
} finally {
zk.unregisterJMX(this);
}
}
5)从leader同步并调用zk.startup()方法
protected void syncWithLeader(long newLeaderZxid) throws IOException, InterruptedException{
QuorumPacket ack = new QuorumPacket(Leader.ACK, 0, null, null);
QuorumPacket qp = new QuorumPacket();
......
zk.startup();
6)调用ObserverZooKeeperServer.setupRequestProcessors()方法
protected void setupRequestProcessors() {
RequestProcessor finalProcessor = new FinalRequestProcessor(this);
commitProcessor = new CommitProcessor(finalProcessor, Long.toString(getServerId()), true,getZooKeeperServerListener());
commitProcessor.start();
firstProcessor = new ObserverRequestProcessor(this, commitProcessor);
((ObserverRequestProcessor) firstProcessor).start();
if (syncRequestProcessorEnabled) {
syncProcessor = new SyncRequestProcessor(this, null);
syncProcessor.start();
}
}
7)通过ObserverRequestProcessor.processRequest将请求加入到queuedRequests队列中
//根据构造方法传入的实际processor,这里传入的就是ObserverRequestProcessor
nextProcessor.processRequest(request);
public void processRequest(Request request) {
if (!finished) {
queuedRequests.add(request);
}
}
8)执行线程逻辑,从queuedRequests.take()中取出request,通过 zks.getObserver().request(request);方法,如果是事务请求就把request包装成Leader.RQUEST类型的QuorumPacket发送给leader
@Override
public void run() {
try {
while (!finished) {
Request request = queuedRequests.take();
if (request == Request.requestOfDeath) {
break;
}
nextProcessor.processRequest(request);
switch (request.type) {
case OpCode.sync:
zks.pendingSyncs.add(request);
zks.getObserver().request(request);
break;
case OpCode.create:
case OpCode.delete:
case OpCode.setData:
case OpCode.setACL:
case OpCode.createSession:
case OpCode.closeSession:
case OpCode.multi:
zks.getObserver().request(request);
break;
}
}
}
}
QuorumPacket qp = new QuorumPacket(Leader.REQUEST, -1, baos.toByteArray(), request.authInfo);
writePacket(qp, true);
void writePacket(QuorumPacket pp, boolean flush) throws IOException {
synchronized (leaderOs) {
if (pp != null) {
leaderOs.writeRecord(pp, "packet");
}
if (flush) {
bufferedOutput.flush();
}
}
}
9)nextProcessor.processRequest(request)方法实际调用CommitProcessor.processRequest(request),如果是非事务请求,直接交给后面的FinalRequestProcessor处理,如果是事务请求,等待leader发送commit请求被唤醒,再把运行提交的事务请求添加都FinalRequestProcessor处理。
下面为CommitProcessor的run方法代码:
@Override
public void run() {
try {
Request nextPending = null;
while (!finished) {
int len = toProcess.size();
for (int i = 0; i < len; i++) {
nextProcessor.processRequest(toProcess.get(i));
}
toProcess.clear();
synchronized (this) {
if ((queuedRequests.size() == 0 || nextPending != null) && committedRequests.size() == 0) {
wait();
continue;
}
if ((queuedRequests.size() == 0 || nextPending != null)&& committedRequests.size() > 0) {
Request r = committedRequests.remove();
if(nextPending!= null && nextPending.sessionId==r.sessionId && nextPending.cxid == r.cxid){
nextPending.hdr = r.hdr;
nextPending.txn = r.txn;
nextPending.zxid = r.zxid;
toProcess.add(nextPending);
nextPending = null;
} else {
toProcess.add(r);
}
}
}
if (nextPending != null) {
continue;
}
synchronized (this) {// Process the next requests in the queuedRequests
while (nextPending == null && queuedRequests.size() > 0) {
Request request = queuedRequests.remove();
switch (request.type) {
case OpCode.create:
case OpCode.delete:
case OpCode.setData:
case OpCode.multi:
case OpCode.setACL:
case OpCode.createSession:
case OpCode.closeSession:
nextPending = request;
break;
case OpCode.sync:
if (matchSyncs) {
nextPending = request;
} else {
toProcess.add(request);
}
break;
default:
toProcess.add(request);
}
}
}
}
}
LOG.info("CommitProcessor exited loop!");
}
10)最后FinalRequestProcessor把处理结果返回,具体处理方法参考代码如下:
public void processRequest(Request request) {
......
ProcessTxnResult rc = null;
if (request.cnxn == null) { return; }
ServerCnxn cnxn = request.cnxn;
String lastOp = "NA";
zks.decInProcess();
Code err = Code.OK;
Record rsp = null;
boolean closeSession = false;