zookeeper单机服务端启动类:ZooKeeperServerMain 查看其main方法:
public static void main(String[] args) {
ZooKeeperServerMain main = new ZooKeeperServerMain();
try {
main.initializeAndRun(args);//初始化以及启动
} catch (IllegalArgumentException e) {
LOG.error("Invalid arguments, exiting abnormally", e);
LOG.info(USAGE);
System.err.println(USAGE);
System.exit(2);
} catch (ConfigException e) {
LOG.error("Invalid config, exiting abnormally", e);
System.err.println("Invalid config, exiting abnormally");
System.exit(2);
} catch (DatadirException e) {
LOG.error("Unable to access datadir, exiting abnormally", e);
System.err.println("Unable to access datadir, exiting abnormally");
System.exit(3);
} catch (AdminServerException e) {
LOG.error("Unable to start AdminServer, exiting abnormally", e);
System.err.println("Unable to start AdminServer, exiting abnormally");
System.exit(4);
} catch (Exception e) {
LOG.error("Unexpected exception, exiting abnormally", e);
System.exit(1);
}
LOG.info("Exiting normally");
System.exit(0);
}
protected void initializeAndRun(String[] args)
throws ConfigException, IOException, AdminServerException
{
try {
ManagedUtil.registerLog4jMBeans();//jmx相关 暂时忽略
} catch (JMException e) {
LOG.warn("Unable to register log4j JMX control", e);
}
ServerConfig config = new ServerConfig();
if (args.length == 1) {
config.parse(args[0]);//解析zoo.cfg文件 装载成ServerConfig对象
} else {
config.parse(args);
}
runFromConfig(config);//根据配置来启动服务
}
public void runFromConfig(ServerConfig config)
throws IOException, AdminServerException {
LOG.info("Starting server");
FileTxnSnapLog txnLog = null;//快照
try {
// Note that this thread isn't going to be doing anything else,
// so rather than spawning another thread, we will just call
// run() in this thread.
// create a file logger url from the command line args
txnLog = new FileTxnSnapLog(config.dataLogDir, config.dataDir);//创建FileTxnSnapLog对象 生成日志和快照数据
final ZooKeeperServer zkServer = new ZooKeeperServer(txnLog,
config.tickTime, config.minSessionTimeout, config.maxSessionTimeout, null);
// Registers shutdown handler which will be used to know the
// server error or shutdown state changes.
final CountDownLatch shutdownLatch = new CountDownLatch(1);//门栓 锁住主线程等待
zkServer.registerServerShutdownHandler(
new ZooKeeperServerShutdownHandler(shutdownLatch));
// Start Admin server
adminServer = AdminServerFactory.createAdminServer();//反射创建JettyAdminServer对象
adminServer.setZooKeeperServer(zkServer);//设置zkServer对象
adminServer.start();//启动adminServer线程 启动jetty服务器 zk也是可以通过http访问
boolean needStartZKServer = true;
if (config.getClientPortAddress() != null) {
cnxnFactory = ServerCnxnFactory.createFactory();//默认NioServer NIOServerCnxnFactory
cnxnFactory.configure(config.getClientPortAddress(), config.getMaxClientCnxns(), false);//设置参数 创建线程对象
cnxnFactory.startup(zkServer);//启动nio Server线程
// zkServer has been started. So we don't need to start it again in secureCnxnFactory.
needStartZKServer = false;
}
if (config.getSecureClientPortAddress() != null) {//安全相关
secureCnxnFactory = ServerCnxnFactory.createFactory();
secureCnxnFactory.configure(config.getSecureClientPortAddress(), config.getMaxClientCnxns(), true);
secureCnxnFactory.startup(zkServer, needStartZKServer);
}
containerManager = new ContainerManager(zkServer.getZKDatabase(), zkServer.firstProcessor,
Integer.getInteger("znode.container.checkIntervalMs", (int) TimeUnit.MINUTES.toMillis(1)),
Integer.getInteger("znode.container.maxPerMinute", 10000)
);//创建ContainerManager线程 定时发送发送delete请求 清理过期的容器节点
containerManager.start();
// Watch status of ZooKeeper server. It will do a graceful shutdown
// if the server is not running or hits an internal error.
shutdownLatch.await();//阻塞线程
shutdown();//关闭
if (cnxnFactory != null) {
cnxnFactory.join();
}
if (secureCnxnFactory != null) {
secureCnxnFactory.join();
}
if (zkServer.canShutdown()) {
zkServer.shutdown(true);
}
} catch (InterruptedException e) {
// warn, but generally this is ok
LOG.warn("Server interrupted", e);
} finally {
if (txnLog != null) {
txnLog.close();
}
}
}
比较关键的地方:
创建cnxnFactory,给其属性赋值 然后启动相关线程
已默认的NIOServerCnxnFactory为例
org.apache.zookeeper.server.NIOServerCnxnFactory#configure 设置属性
public void configure(InetSocketAddress addr, int maxcc, boolean secure) throws IOException {
if (secure) {
throw new UnsupportedOperationException("SSL isn't supported in NIOServerCnxn");
}
configureSaslLogin();
maxClientCnxns = maxcc;
sessionlessCnxnTimeout = Integer.getInteger(
ZOOKEEPER_NIO_SESSIONLESS_CNXN_TIMEOUT, 10000);
// We also use the sessionlessCnxnTimeout as expiring interval for
// cnxnExpiryQueue. These don't need to be the same, but the expiring
// interval passed into the ExpiryQueue() constructor below should be
// less than or equal to the timeout.
cnxnExpiryQueue =
new ExpiryQueue<NIOServerCnxn>(sessionlessCnxnTimeout);
expirerThread = new ConnectionExpirerThread();//终止线程
int numCores = Runtime.getRuntime().availableProcessors();//可用线程数 内核数
// 32 cores sweet spot seems to be 4 selector threads 32核计算下来就是4个selector线程
numSelectorThreads = Integer.getInteger(
ZOOKEEPER_NIO_NUM_SELECTOR_THREADS,
Math.max((int) Math.sqrt((float) numCores/2), 1));
if (numSelectorThreads < 1) {
throw new IOException("numSelectorThreads must be at least 1");
}
numWorkerThreads = Integer.getInteger(
ZOOKEEPER_NIO_NUM_WORKER_THREADS, 2 * numCores);//工作线程数
workerShutdownTimeoutMS = Long.getLong(
ZOOKEEPER_NIO_SHUTDOWN_TIMEOUT, 5000);
LOG.info("Configuring NIO connection handler with "
+ (sessionlessCnxnTimeout/1000) + "s sessionless connection"
+ " timeout, " + numSelectorThreads + " selector thread(s), "
+ (numWorkerThreads > 0 ? numWorkerThreads : "no")
+ " worker threads, and "
+ (directBufferBytes == 0 ? "gathered writes." :
("" + (directBufferBytes/1024) + " kB direct buffers.")));
for(int i=0; i<numSelectorThreads; ++i) {
selectorThreads.add(new SelectorThread(i));//创建selectorThread
}
this.ss = ServerSocketChannel.open();//打开ServerSocketChannel
ss.socket().setReuseAddress(true);
LOG.info("binding to port " + addr);
ss.socket().bind(addr);//绑定端口
ss.configureBlocking(false);//设置非阻塞
acceptThread = new AcceptThread(ss, addr, selectorThreads);//创建接收线程
/**
* 1、AcceptThread线程负责监听Accept事件,接收Socket请求 获取到SocketChannel
* 2、将SocketChannel 放入队列acceptedQueue 等待SelectorThread处理,并将SelectorThread从阻塞中唤醒 selector.wakeUp
* 3、将SocketChannel注册到SelectorThread线程中的selector中 让该线程负责监听Read请求
*/
}
启动相关线程
org.apache.zookeeper.server.NIOServerCnxnFactory#startup
public void startup(ZooKeeperServer zks, boolean startServer)
throws IOException, InterruptedException {
start();//启动相关线程
setZooKeeperServer(zks);//设置zks的全局变量serverCnxnFactory
if (startServer) {//startServer默认为true
zks.startdata();//开始加载数据到内存
zks.startup();//设置响应的请求处理器
}
}
public void start() {
stopped = false;
if (workerPool == null) {
workerPool = new WorkerService(
"NIOWorker", numWorkerThreads, false);//创建WorkerService对象
}
for(SelectorThread thread : selectorThreads) {
if (thread.getState() == Thread.State.NEW) {
thread.start();//启动所有SelectorThread线程
}
}
// ensure thread is started once and only once 启动accept线程
if (acceptThread.getState() == Thread.State.NEW) {
acceptThread.start();
}
if (expirerThread.getState() == Thread.State.NEW) {//启动终止线程
expirerThread.start();
}
}
干了三件事情:
1、启动相关线程
accept线程 接收请求
SelectorThread线程处理请求
ConnectionExpirerThread线程负责关闭过期的连接
2、加载数据到内存
3、设置请求处理器
2和3步骤在ZooKeeperServer
public void startdata()
throws IOException, InterruptedException {
//check to see if zkDb is not null
if (zkDb == null) {
zkDb = new ZKDatabase(this.txnLogFactory);
}
if (!zkDb.isInitialized()) {//没有被初始化过
loadData();//加载数据
}
}
public synchronized void startup() {
if (sessionTracker == null) {
createSessionTracker();
}
startSessionTracker();//开启回话追踪器
setupRequestProcessors();//设置请求处理器
registerJMX();//注册JMX
setState(State.RUNNING);
notifyAll();
}
protected void setupRequestProcessors() {
RequestProcessor finalProcessor = new FinalRequestProcessor(this);
RequestProcessor syncProcessor = new SyncRequestProcessor(this,
finalProcessor);
((SyncRequestProcessor)syncProcessor).start();//责任链 SyncRequestProcessor -FinalRequestProcessor 启动线程 从队列queuedRequests中获取request请求
firstProcessor = new PrepRequestProcessor(this, syncProcessor);//设置firstProcessor 为PrepRequestProcessor 启动线程
((PrepRequestProcessor)firstProcessor).start();//启动线程 自旋从submittedRequests队列中获取request请求
}
请求处理器链路:
firstProcessor 第一个处理器指向了PrepRequestProcessor
↓
PrepRequestProcessor-》SyncRequestProcessor-》FinalRequestProcessor
并且启动了俩线程PrepRequestProcessor和SyncRequestProcessor线程 这些线程都是用来处理请求的 并且通过链路的形式 每个处理器分工不同。
PrepRequestProcessor的run方法如下:
1、自旋从submittedRequests队列获取请求数据
2、校验请求
3、交给nextProcessor处理器 也就是SyncRequestProcessor处理器来处理 也就是把消息丢到了queuedRequests队列中
我们以create为例 其他代码省略
public void run() {
try {
while (true) {//自旋线程
Request request = submittedRequests.take();//获取request对象
long traceMask = ZooTrace.CLIENT_REQUEST_TRACE_MASK;
if (request.type == OpCode.ping) {
traceMask = ZooTrace.CLIENT_PING_TRACE_MASK;
}
if (LOG.isTraceEnabled()) {
ZooTrace.logRequest(LOG, traceMask, 'P', request, "");
}
if (Request.requestOfDeath == request) {
break;
}
pRequest(request);//处理request
}
}
...
}
protected void pRequest(Request request) throws RequestProcessorException {
// LOG.info("Prep>>> cxid = " + request.cxid + " type = " +
// request.type + " id = 0x" + Long.toHexString(request.sessionId));
request.setHdr(null);
request.setTxn(null);
try {
switch (request.type) {
case OpCode.createContainer:
case OpCode.create:
case OpCode.create2:
CreateRequest create2Request = new CreateRequest();//create请求进入此
pRequest2Txn(request.type, zks.getNextZxid(), request, create2Request, true);//调用pRequest2Txn处理相关请求命令
break;
...
request.zxid = zks.getZxid();
nextProcessor.processRequest(request);//下一个处理器SyncRequestProcessor来继续处理请求
}
protected void pRequest2Txn(int type, long zxid, Request request,
Record record, boolean deserialize)
throws KeeperException, IOException, RequestProcessorException
{
request.setHdr(new TxnHeader(request.sessionId, request.cxid, zxid,
Time.currentWallTime(), type));
switch (type) {
case OpCode.create:
case OpCode.create2:
case OpCode.createTTL:
case OpCode.createContainer: {
pRequest2TxnCreate(type, request, record, deserialize);//处理create请求
break;
}
...
}
private void pRequest2TxnCreate(int type, Request request, Record record, boolean deserialize) throws IOException, KeeperException {//通过request请求真正create
if (deserialize) {
ByteBufferInputStream.byteBuffer2Record(request.request, record);
}
int flags;
String path;
List<ACL> acl;
byte[] data;
long ttl;
if (type == OpCode.createTTL) {
CreateTTLRequest createTtlRequest = (CreateTTLRequest)record;
flags = createTtlRequest.getFlags();
path = createTtlRequest.getPath();
acl = createTtlRequest.getAcl();
data = createTtlRequest.getData();
ttl = createTtlRequest.getTtl();
} else {
CreateRequest createRequest = (CreateRequest)record;
flags = createRequest.getFlags();
path = createRequest.getPath();
acl = createRequest.getAcl();
data = createRequest.getData();
ttl = -1;
}
CreateMode createMode = CreateMode.fromFlag(flags);
validateCreateRequest(path, createMode, request, ttl);//验证create请求
String parentPath = validatePathForCreate(path, request.sessionId);
List<ACL> listACL = fixupACL(path, request.authInfo, acl);
ChangeRecord parentRecord = getRecordForPath(parentPath);
checkACL(zks, parentRecord.acl, ZooDefs.Perms.CREATE, request.authInfo);
int parentCVersion = parentRecord.stat.getCversion();
if (createMode.isSequential()) {//临时节点 修改path自增
path = path + String.format(Locale.ENGLISH, "%010d", parentCVersion);
}
validatePath(path, request.sessionId);
try {
if (getRecordForPath(path) != null) {
throw new KeeperException.NodeExistsException(path);
}
} catch (KeeperException.NoNodeException e) {
// ignore this one
}
boolean ephemeralParent = EphemeralType.get(parentRecord.stat.getEphemeralOwner()) == EphemeralType.NORMAL;
if (ephemeralParent) {//父节点是临时节点 抛出异常
throw new KeeperException.NoChildrenForEphemeralsException(path);
}
int newCversion = parentRecord.stat.getCversion()+1;
if (type == OpCode.createContainer) {
request.setTxn(new CreateContainerTxn(path, data, listACL, newCversion));
} else if (type == OpCode.createTTL) {
request.setTxn(new CreateTTLTxn(path, data, listACL, newCversion, ttl));
} else {
request.setTxn(new CreateTxn(path, data, listACL, createMode.isEphemeral(),
newCversion));
}
StatPersisted s = new StatPersisted();
if (createMode.isEphemeral()) {
s.setEphemeralOwner(request.sessionId);
}
parentRecord = parentRecord.duplicate(request.getHdr().getZxid());
parentRecord.childCount++;
parentRecord.stat.setCversion(newCversion);//父节点的Cversion+1
addChangeRecord(parentRecord);//父节点修改记录设置到outstandingChanges数组中
addChangeRecord(new ChangeRecord(request.getHdr().getZxid(), path, s, 0, listACL));//当前节点修改记录设置到outstandingChanges数组中
}
处理器SyncRequestProcessor处理逻辑
1、自旋从queuedRequests队列中获取消息
2、write请求 写入到日志文件流中,滚日志文件、启动线程记录快照日志
3、读写请求调用nextProcessor也就是FinalRequestProcessor来处理接下来逻辑
public void run() {//请求处理器运行 持久化的处理器 生成log文件
try {
int logCount = 0;
// we do this in an attempt to ensure that not all of the servers
// in the ensemble take a snapshot at the same time
int randRoll = r.nextInt(snapCount/2);
while (true) {//自旋
Request si = null;
if (toFlush.isEmpty()) {
si = queuedRequests.take();//request队列为空 take阻塞
} else {
si = queuedRequests.poll();//否则直接poll
if (si == null) {//为null
flush(toFlush);//刷新
continue;
}
}
if (si == requestOfDeath) {
break;
}
if (si != null) {//队列不为空
// track the number of records written to the log
if (zks.getZKDatabase().append(si)) {//写入到日志文件流中 但是并没有刷新
logCount++;
if (logCount > (snapCount / 2 + randRoll)) {
randRoll = r.nextInt(snapCount/2);
// roll the log
zks.getZKDatabase().rollLog();//刷新日志文件流 会再滚一个日志文件
// take a snapshot
if (snapInProcess != null && snapInProcess.isAlive()) {
LOG.warn("Too busy to snap, skipping");
} else {
snapInProcess = new ZooKeeperThread("Snapshot Thread") {//创建一个快照的线程 生成一个快照
public void run() {
try {
zks.takeSnapshot();//生成一个快照
} catch(Exception e) {
LOG.warn("Unexpected exception", e);
}
}
};
snapInProcess.start();
}
logCount = 0;
}
} else if (toFlush.isEmpty()) {
// optimization for read heavy workloads
// iff this is a read, and there are no pending
// flushes (writes), then just pass this to the next
// processor
if (nextProcessor != null) {//执行最后一个FinalRequestProcessor
nextProcessor.processRequest(si);
if (nextProcessor instanceof Flushable) {
((Flushable)nextProcessor).flush();
}
}
continue;
}
toFlush.add(si);
if (toFlush.size() > 1000) {//1000笔 刷新一次
flush(toFlush);//刷新文件流 并且调用nextProcessor
}
}
}
} catch (Throwable t) {
handleException(this.getName(), t);
} finally{
running = false;
}
LOG.info("SyncRequestProcessor exited!");
}
private void flush(LinkedList<Request> toFlush)
throws IOException, RequestProcessorException
{
if (toFlush.isEmpty())
return;
zks.getZKDatabase().commit();//提交
while (!toFlush.isEmpty()) {
Request i = toFlush.remove();
if (nextProcessor != null) {
nextProcessor.processRequest(i);//下一个处理器处理
}
}
if (nextProcessor != null && nextProcessor instanceof Flushable) {
((Flushable)nextProcessor).flush();
}
}
最后一个处理器FinalRequestProcessor处理逻辑:
1、对于读操作 从本服务中读取数据并创建response放到outgoingBuffers队列中等待后续处理
2、对于写操作 则是将数据更新到内存
public void processRequest(Request request) {{
...
ProcessTxnResult rc = null;
synchronized (zks.outstandingChanges) {//在PreRequestProcessor中设置 内存还未修改
// Need to process local session requests
rc = zks.processTxn(request);//处理txn 将数据写到DataTree
// request.hdr is set for write requests, which are the only ones
// that add to outstandingChanges.
if (request.getHdr() != null) {
TxnHeader hdr = request.getHdr();
Record txn = request.getTxn();
long zxid = hdr.getZxid();
while (!zks.outstandingChanges.isEmpty()
&& zks.outstandingChanges.get(0).zxid <= zxid) {
ChangeRecord cr = zks.outstandingChanges.remove(0);
if (cr.zxid < zxid) {
LOG.warn("Zxid outstanding " + cr.zxid
+ " is less than current " + zxid);
}
if (zks.outstandingChangesForPath.get(cr.path) == cr) {
zks.outstandingChangesForPath.remove(cr.path);
}
}
}
// do not add non quorum packets to the queue.
if (request.isQuorum()) {//集群的情况下
zks.getZKDatabase().addCommittedProposal(request);
}
}
...
if (request.cnxn == null) {
return;
}
ServerCnxn cnxn = request.cnxn;
String lastOp = "NA";
zks.decInProcess();
Code err = Code.OK;
Record rsp = null;
try {
...
//处理返回结果
switch (request.type) {
...
case OpCode.create: {
lastOp = "CREA";
rsp = new CreateResponse(rc.path);//创建response
err = Code.get(rc.err);
break;
}
...
try {
cnxn.sendResponse(hdr, rsp, "response");//发送response 设置response到outgoingBuffers队列等待后续处理
if (request.type == OpCode.closeSession) {
cnxn.sendCloseSession();
}
} catch (IOException e) {
LOG.error("FIXMSG",e);
}
}
org.apache.zookeeper.server.NIOServerCnxn#sendResponse
public void sendResponse(ReplyHeader h, Record r, String tag) {//发送响应结果 就是放到outgoingBuffers队列中
try {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
// Make space for length
BinaryOutputArchive bos = BinaryOutputArchive.getArchive(baos);
try {
baos.write(fourBytes);
bos.writeRecord(h, "header");
if (r != null) {
bos.writeRecord(r, tag);
}
baos.close();
} catch (IOException e) {
LOG.error("Error serializing response");
}
byte b[] = baos.toByteArray();
ByteBuffer bb = ByteBuffer.wrap(b);
bb.putInt(b.length - 4).rewind();
sendBuffer(bb);//发送buffer
if (h.getXid() > 0) {
// check throttling
if (outstandingRequests.decrementAndGet() < 1 ||
zkServer.getInProcess() < outstandingLimit) {
enableRecv();
}
}
} catch(Exception e) {
LOG.warn("Unexpected exception. Destruction averted.", e);
}
}
public void sendBuffer(ByteBuffer bb) {
if (LOG.isTraceEnabled()) {
LOG.trace("Add a buffer to outgoingBuffers, sk " + sk
+ " is valid: " + sk.isValid());
}
outgoingBuffers.add(bb);//设置到outgoingBuffers队列当中
requestInterestOpsUpdate();//更新请求的感兴趣事件集
}
private void requestInterestOpsUpdate() {
if (isSelectable()) {
selectorThread.addInterestOpsUpdateRequest(sk);
}
}
org.apache.zookeeper.server.NIOServerCnxnFactory.SelectorThread#addInterestOpsUpdateRequest
public boolean addInterestOpsUpdateRequest(SelectionKey sk) {
if (stopped || !updateQueue.offer(sk)) {//添加到updateQueue队列 等待被处理
return false;
}
wakeupSelector();//唤醒selector线程
return true;
}
看完了processor,还有几个线程没有看。
org.apache.zookeeper.server.NIOServerCnxnFactory.AcceptThread
1、相当于bossGroup线程接收请求线程
2、将请求交给Selector线程 也就是workGroup去处理
public void run() {
try {
while (!stopped && !acceptSocket.socket().isClosed()) {//自旋
try {
select();//获取就绪事件
} catch (RuntimeException e) {
LOG.warn("Ignoring unexpected runtime exception", e);
} catch (Exception e) {
LOG.warn("Ignoring unexpected exception", e);
}
}
} finally {
closeSelector();
// This will wake up the selector threads, and tell the
// worker thread pool to begin shutdown.
if (!reconfiguring) {
NIOServerCnxnFactory.this.stop();
}
LOG.info("accept thread exitted run method");
}
}
private void select() {
try {
selector.select();//阻塞获取就绪事件
Iterator<SelectionKey> selectedKeys =
selector.selectedKeys().iterator();
while (!stopped && selectedKeys.hasNext()) {
SelectionKey key = selectedKeys.next();
selectedKeys.remove();
if (!key.isValid()) {
continue;
}
if (key.isAcceptable()) {
if (!doAccept()) {//处理Accept
// If unable to pull a new connection off the accept
// queue, pause accepting to give us time to free
// up file descriptors and so the accept thread
// doesn't spin in a tight loop.
pauseAccept(10);//处理失败 终止
}
} else {
LOG.warn("Unexpected ops in accept select "
+ key.readyOps());
}
}
} catch (IOException e) {
LOG.warn("Ignoring IOException while selecting", e);
}
}
private boolean doAccept() {//处理Accept
boolean accepted = false;
SocketChannel sc = null;
try {
sc = acceptSocket.accept();//获取客户端SocketChannel
accepted = true;
InetAddress ia = sc.socket().getInetAddress();
int cnxncount = getClientCnxnCount(ia);
if (maxClientCnxns > 0 && cnxncount >= maxClientCnxns){
throw new IOException("Too many connections from " + ia
+ " - max is " + maxClientCnxns );
}
LOG.info("Accepted socket connection from "
+ sc.socket().getRemoteSocketAddress());
sc.configureBlocking(false);//设置非阻塞
// Round-robin assign this connection to a selector thread
if (!selectorIterator.hasNext()) {
selectorIterator = selectorThreads.iterator();//获取下一个SelectorThread线程来处理
}
SelectorThread selectorThread = selectorIterator.next();
if (!selectorThread.addAcceptedConnection(sc)) {//添加到队列中
throw new IOException(
"Unable to add connection to selector queue"
+ (stopped ? " (shutdown in progress)" : ""));
}
acceptErrorLogger.flush();
} catch (IOException e) {
// accept, maxClientCnxns, configureBlocking
acceptErrorLogger.rateLimitLog(
"Error accepting new connection: " + e.getMessage());
fastCloseSock(sc);
}
return accepted;
}
org.apache.zookeeper.server.NIOServerCnxnFactory.SelectorThread#addAcceptedConnection
public boolean addAcceptedConnection(SocketChannel accepted) {
if (stopped || !acceptedQueue.offer(accepted)) {//入队
return false;
}
wakeupSelector();//唤醒selector.selector阻塞的当前线程
return true;
}
public void wakeupSelector() {
selector.wakeup();
}
org.apache.zookeeper.server.NIOServerCnxnFactory.SelectorThread
1、监听selector的就绪事件 处理读写事件
2、启动ScheduledWorkRequest线程处理
3、从队列acceptedQueue中接收新的客户端连接(AcceptThread分发过来的)注册读事件 开启监听
4、从updateQueue队列中获取响应 触发写事件 将响应写入socket
public void run() {
try {
while (!stopped) {
try {
select();//SelectorThread的selector获取就绪事件 读写事件
processAcceptedConnections();//处理AcceptThread线程处理完的SocketChannel 放进了acceptedQueue队列中 注册到当前Selector上出去
processInterestOpsUpdateRequests();//处理更新的感兴趣事件集请求
} catch (RuntimeException e) {
LOG.warn("Ignoring unexpected runtime exception", e);
} catch (Exception e) {
LOG.warn("Ignoring unexpected exception", e);
}
}
// Close connections still pending on the selector. Any others
// with in-flight work, let drain out of the work queue.
for (SelectionKey key : selector.keys()) {
NIOServerCnxn cnxn = (NIOServerCnxn) key.attachment();
if (cnxn.isSelectable()) {
cnxn.close();
}
cleanupSelectionKey(key);
}
SocketChannel accepted;
while ((accepted = acceptedQueue.poll()) != null) {
fastCloseSock(accepted);
}
updateQueue.clear();
} finally {
closeSelector();
// This will wake up the accept thread and the other selector
// threads, and tell the worker thread pool to begin shutdown.
NIOServerCnxnFactory.this.stop();
LOG.info("selector thread exitted run method");
}
}
private void select() {
try {
selector.select();
Set<SelectionKey> selected = selector.selectedKeys();
ArrayList<SelectionKey> selectedList =
new ArrayList<SelectionKey>(selected);
Collections.shuffle(selectedList);
Iterator<SelectionKey> selectedKeys = selectedList.iterator();
while(!stopped && selectedKeys.hasNext()) {
SelectionKey key = selectedKeys.next();
selected.remove(key);
if (!key.isValid()) {
cleanupSelectionKey(key);
continue;
}
if (key.isReadable() || key.isWritable()) {
handleIO(key);//处理io事件
} else {
LOG.warn("Unexpected ops in select " + key.readyOps());
}
}
} catch (IOException e) {
LOG.warn("Ignoring IOException while selecting", e);
}
}
private void handleIO(SelectionKey key) {//处理io读写事件
IOWorkRequest workRequest = new IOWorkRequest(this, key);//创建IOWorkRequest对象
NIOServerCnxn cnxn = (NIOServerCnxn) key.attachment();
// Stop selecting this key while processing on its
// connection
cnxn.disableSelectable();
key.interestOps(0);//设置感兴趣事件为0
touchCnxn(cnxn);
workerPool.schedule(workRequest);//执行schedule方法 启动先关线程
}
public void schedule(WorkRequest workRequest, long id) {//执行schedule方法
if (stopped) {
workRequest.cleanup();
return;
}
ScheduledWorkRequest scheduledWorkRequest =
new ScheduledWorkRequest(workRequest);//创建ScheduledWorkRequest对象
// If we have a worker thread pool, use that; otherwise, do the work
// directly.
int size = workers.size();
if (size > 0) {//workers设置过
try {
// make sure to map negative ids as well to [0, size-1]
int workerNum = ((int) (id % size) + size) % size;
ExecutorService worker = workers.get(workerNum);
worker.execute(scheduledWorkRequest);//执行ScheduledWorkRequest的run方法
} catch (RejectedExecutionException e) {
LOG.warn("ExecutorService rejected execution", e);
workRequest.cleanup();
}
} else {
// When there is no worker thread pool, do the work directly
// and wait for its completion
scheduledWorkRequest.start();
try {
scheduledWorkRequest.join();
} catch (InterruptedException e) {
LOG.warn("Unexpected exception", e);
Thread.currentThread().interrupt();
}
}
}
private void processAcceptedConnections() {
SocketChannel accepted;
while (!stopped && (accepted = acceptedQueue.poll()) != null) {//处理acceptThread分发过来新的连接
SelectionKey key = null;
try {
key = accepted.register(selector, SelectionKey.OP_READ);//读事件
NIOServerCnxn cnxn = createConnection(accepted, key, this);
key.attach(cnxn);
addCnxn(cnxn);
} catch (IOException e) {
// register, createConnection
cleanupSelectionKey(key);
fastCloseSock(accepted);
}
}
}
private void processInterestOpsUpdateRequests() {
SelectionKey key;
while (!stopped && (key = updateQueue.poll()) != null) {
if (!key.isValid()) {//失效
cleanupSelectionKey(key);
}
NIOServerCnxn cnxn = (NIOServerCnxn) key.attachment();
if (cnxn.isSelectable()) {
key.interestOps(cnxn.getInterestOps());//设置新的事件集
}
}
}
public int getInterestOps() {
if (!isSelectable()) {
return 0;
}
int interestOps = 0;
if (getReadInterest()) {//设置读事件
interestOps |= SelectionKey.OP_READ;
}
if (getWriteInterest()) {//outgoingBuffers队列不为空 就设置写事件
interestOps |= SelectionKey.OP_WRITE;
}
return interestOps;
}
private boolean getWriteInterest() {
return !outgoingBuffers.isEmpty();//如果outgoingBuffers不为空
}
其中启动了ScheduledWorkRequest的线程
对于读事件:建立连接 处理读请求 将请求放入firstProcessor也就是PrepRequestProcessor的队列submittedRequests中等待 processor线程处理
对于写事件:从队列outgoingBuffers中依次通过socket发送出去
public void run() {//被线程池调用
try {
// Check if stopped while request was on queue
if (stopped) {
workRequest.cleanup();
return;
}
workRequest.doWork();//工作 调用IOWorkRequest的doWork
} catch (Exception e) {
LOG.warn("Unexpected exception", e);
workRequest.cleanup();
}
}
org.apache.zookeeper.server.NIOServerCnxnFactory.IOWorkRequest#doWork
public void doWork() throws InterruptedException {
if (!key.isValid()) {
selectorThread.cleanupSelectionKey(key);
return;
}
if (key.isReadable() || key.isWritable()) {//读写事件
cnxn.doIO(key);//真正处理io
// Check if we shutdown or doIO() closed this connection
if (stopped) {
cnxn.close();
return;
}
if (!key.isValid()) {
selectorThread.cleanupSelectionKey(key);
return;
}
touchCnxn(cnxn);
}
// Mark this connection as once again ready for selection
cnxn.enableSelectable();
// Push an update request on the queue to resume selecting
// on the current set of interest ops, which may have changed
// as a result of the I/O operations we just performed.
if (!selectorThread.addInterestOpsUpdateRequest(key)) {
cnxn.close();
}
}
org.apache.zookeeper.server.NIOServerCnxn#doIO
void doIO(SelectionKey k) throws InterruptedException {
try {
if (isSocketOpen() == false) {
LOG.warn("trying to do i/o on a null socket for session:0x"
+ Long.toHexString(sessionId));
return;
}
if (k.isReadable()) {//读就绪
int rc = sock.read(incomingBuffer);//读取客户端发过来的数据
if (rc < 0) {
throw new EndOfStreamException(
"Unable to read additional data from client sessionid 0x"
+ Long.toHexString(sessionId)
+ ", likely client has closed socket");
}
if (incomingBuffer.remaining() == 0) {
boolean isPayload;
if (incomingBuffer == lenBuffer) { // start of next request
incomingBuffer.flip();
isPayload = readLength(k);
incomingBuffer.clear();
} else {
// continuation
isPayload = true;
}
if (isPayload) { // not the case for 4letterword
readPayload();//读处理
}
else {
// four letter words take care
// need not do anything else
return;
}
}
}
if (k.isWritable()) {
handleWrite(k);//处理写事件
if (!initialized && !getReadInterest() && !getWriteInterest()) {
throw new CloseRequestException("responded to info probe");
}
}
}
...
}
private void readPayload() throws IOException, InterruptedException {//处理读事件
if (incomingBuffer.remaining() != 0) { // have we read length bytes?
int rc = sock.read(incomingBuffer); // sock is non-blocking, so ok
if (rc < 0) {
throw new EndOfStreamException(
"Unable to read additional data from client sessionid 0x"
+ Long.toHexString(sessionId)
+ ", likely client has closed socket");
}
}
if (incomingBuffer.remaining() == 0) { // have we read length bytes?
packetReceived();//计数方法
incomingBuffer.flip();
if (!initialized) {
readConnectRequest();
} else {//连接成功
readRequest();//读request
}
lenBuffer.clear();
incomingBuffer = lenBuffer;
}
}
private void readRequest() throws IOException {
zkServer.processPacket(this, incomingBuffer);
}
org.apache.zookeeper.server.ZooKeeperServer#processPacket
public void processPacket(ServerCnxn cnxn, ByteBuffer incomingBuffer) throws IOException {
// We have the request, now process and setup for next
InputStream bais = new ByteBufferInputStream(incomingBuffer);
BinaryInputArchive bia = BinaryInputArchive.getArchive(bais);
RequestHeader h = new RequestHeader();
h.deserialize(bia, "header");
// Through the magic of byte buffers, txn will not be
// pointing
// to the start of the txn
incomingBuffer = incomingBuffer.slice();
...
submitRequest(si);//提交request
}
}
cnxn.incrOutstandingRequests(h);
}
public void submitRequest(Request si) {
...
try {
touch(si.cnxn);
boolean validpacket = Request.isValid(si.type);//判断请求是否合法
if (validpacket) {
firstProcessor.processRequest(si);//firstProcessor是PrepRequestProcessor
if (si.cnxn != null) {
incInProcess();
}
} else {
LOG.warn("Received packet at server of unknown type " + si.type);
new UnimplementedRequestProcessor().processRequest(si);
}
} catch (MissingSessionException e) {
if (LOG.isDebugEnabled()) {
LOG.debug("Dropping request: " + e.getMessage());
}
} catch (RequestProcessorException e) {
LOG.error("Unable to process request:" + e.getMessage(), e);
}
}
void handleWrite(SelectionKey k) throws IOException, CloseRequestException {//处理写事件
if (outgoingBuffers.isEmpty()) {//从outgoingBuffers队列中获取相应数据
return;
}
/*
* This is going to reset the buffer position to 0 and the
* limit to the size of the buffer, so that we can fill it
* with data from the non-direct buffers that we need to
* send.
*/
ByteBuffer directBuffer = NIOServerCnxnFactory.getDirectBuffer();
if (directBuffer == null) {
ByteBuffer[] bufferList = new ByteBuffer[outgoingBuffers.size()];
// Use gathered write call. This updates the positions of the
// byte buffers to reflect the bytes that were written out.
sock.write(outgoingBuffers.toArray(bufferList));
// Remove the buffers that we have sent
ByteBuffer bb;
while ((bb = outgoingBuffers.peek()) != null) {
if (bb == ServerCnxnFactory.closeConn) {
throw new CloseRequestException("close requested");
}
if (bb.remaining() > 0) {
break;
}
packetSent();
outgoingBuffers.remove();
}
} else {
directBuffer.clear();
for (ByteBuffer b : outgoingBuffers) {
if (directBuffer.remaining() < b.remaining()) {
/*
* When we call put later, if the directBuffer is to
* small to hold everything, nothing will be copied,
* so we've got to slice the buffer if it's too big.
*/
b = (ByteBuffer) b.slice().limit(
directBuffer.remaining());
}
/*
* put() is going to modify the positions of both
* buffers, put we don't want to change the position of
* the source buffers (we'll do that after the send, if
* needed), so we save and reset the position after the
* copy
*/
int p = b.position();
directBuffer.put(b);
b.position(p);
if (directBuffer.remaining() == 0) {
break;
}
}
/*
* Do the flip: limit becomes position, position gets set to
* 0. This sets us up for the write.
*/
directBuffer.flip();
int sent = sock.write(directBuffer);//写数据
ByteBuffer bb;
// Remove the buffers that we have sent
while ((bb = outgoingBuffers.peek()) != null) {
if (bb == ServerCnxnFactory.closeConn) {
throw new CloseRequestException("close requested");
}
if (sent < bb.remaining()) {
/*
* We only partially sent this buffer, so we update
* the position and exit the loop.
*/
bb.position(bb.position() + sent);
break;
}
packetSent();
/* We've sent the whole buffer, so drop the buffer */
sent -= bb.remaining();
outgoingBuffers.remove();
}
}
}
总结如下:
假设当一个create命令来了之后
1、AcceptThread线程
1.1、acceptThread线程监听accept事件 获取客户端连接 SocketChannel
1.2、将socket客户端分发给SelectorThread线程处理 并唤醒该线程
2、SelectorThread线程
2.1、监听selector的就绪事件 处理读写事件
2.2、启动ScheduledWorkRequest线程处理
2.3、从队列acceptedQueue中接收新的客户端连接(AcceptThread分发过来的)注册读事件 开启监听
2.4、从updateQueue队列中获取响应 触发写事件 将响应写入socket
3、ScheduledWorkRequest线程
3.1、对于读事件:建立连接 处理读请求 将请求放入firstProcessor也就是PrepRequestProcessor的队列submittedRequests中等待 processor线程处理
3.2、对于写事件:从队列outgoingBuffers中依次通过socket发送出去
4、PrepRequestProcessor线程:
4.1、自旋从submittedRequests队列获取请求数据
4.2、校验请求合法性
4.3、把消息丢到了queuedRequests队列中,等待yncRequestProcessor线程处理
5、SyncRequestProcessor线程
5.1、自旋从queuedRequests队列中获取消息
5.2、write请求 写入到日志文件流中,滚日志文件、启动线程记录快照日志
5.3、读写请求调用nextProcessor也就是FinalRequestProcessor来处理接下来逻辑
6、FinalRequestProcessor处理器
6.1、对于读操作 从本服务中读取数据并创建response放到outgoingBuffers队列中等待SelectorThread线程处理
6.2、将SelectionKey放入到updateQueue队列中,唤醒SelectorThread去处理写事件
6.3、对于写操作 则是将数据更新到内存