SocketServer
class SocketServer(val brokerId: Int,
val host: String,
val port: Int,
val numProcessorThreads: Int,
val maxQueuedRequests: Int,
val sendBufferSize: Int,
val recvBufferSize: Int,
val maxRequestSize: Int = Int.MaxValue,
val maxConnectionsPerIp: Int = Int.MaxValue,
val connectionsMaxIdleMs: Long,
val maxConnectionsPerIpOverrides: Map[String, Int] ) extends Logging with KafkaMetricsGroup {
this.logIdent = "[Socket Server on Broker " + brokerId + "], "
private val time = SystemTime
private val processors = new Array[Processor](numProcessorThreads)
@volatile private var acceptor: Acceptor = null
创建numProcessorThreads个队列和processor一 一绑定
val requestChannel = new RequestChannel(numProcessorThreads, maxQueuedRequests)
启动numProcessorThreads个processor
def startup() {
val quotas = new ConnectionQuotas(maxConnectionsPerIp, maxConnectionsPerIpOverrides)
for(i <- 0 until numProcessorThreads) {
processors(i) = new Processor(i,
time,
maxRequestSize,
aggregateIdleMeter,
newMeter("IdlePercent", "percent", TimeUnit.NANOSECONDS, Map("networkProcessor" -> i.toString)),
numProcessorThreads,
requestChannel,
quotas,
connectionsMaxIdleMs)
Utils.newThread("kafka-network-thread-%d-%d".format(port, i), processors(i), false).start()
}
创建Acceptor
// start accepting connections
this.acceptor = new Acceptor(host, port, processors, sendBufferSize, recvBufferSize, quotas)
ConnectionQuotas连接控制器
class ConnectionQuotas(val defaultMax: Int, overrideQuotas: Map[String, Int]) {
private val overrides = overrideQuotas.map(entry => (InetAddress.getByName(entry._1), entry._2))
private val counts = mutable.Map[InetAddress, Int]()
def inc(addr: InetAddress) {
counts synchronized {
val count = counts.getOrElse(addr, 0)
counts.put(addr, count + 1)
val max = overrides.getOrElse(addr, defaultMax)
if(count >= max)
throw new TooManyConnectionsException(addr, max)
}
}
def dec(addr: InetAddress) {
counts synchronized {
val count = counts.get(addr).get
if(count == 1)
counts.remove(addr)
else
counts.put(addr, count - 1)
}
}
}
Acceptor简单的socket接收器
调用accept(key, processors(currentProcessor))
/**
* Thread that accepts and configures new connections. There is only need for one of these
*/
private[kafka] class Acceptor(val host: String,
val port: Int,
private val processors: Array[Processor],
val sendBufferSize: Int,
val recvBufferSize: Int,
connectionQuotas: ConnectionQuotas) extends AbstractServerThread(connectionQuotas) {
val serverChannel = openServerSocket(host, port)
/**
* Accept loop that checks for new connection attempts
*/
def run() {
serverChannel.register(selector, SelectionKey.OP_ACCEPT);
startupComplete()
var currentProcessor = 0
while(isRunning) {
val ready = selector.select(500)
if(ready > 0) {
val keys = selector.selectedKeys()
val iter = keys.iterator()
while(iter.hasNext && isRunning) {
var key: SelectionKey = null
try {
key = iter.next
iter.remove()
if(key.isAcceptable)
accept(key, processors(currentProcessor))
else
throw new IllegalStateException("Unrecognized key state for acceptor thread.")
// round robin to the next processor thread
currentProcessor = (currentProcessor + 1) % processors.length
} catch {
case e: Throwable => error("Error while accepting connection", e)
}
}
}
}
debug("Closing server socket and selector.")
swallowError(serverChannel.close())
swallowError(selector.close())
shutdownComplete()
}
accept()
socket绑定到固定的processor
/*
* Accept a new connection
*/
def accept(key: SelectionKey, processor: Processor) {
val serverSocketChannel = key.channel().asInstanceOf[ServerSocketChannel]
val socketChannel = serverSocketChannel.accept()
try {
connectionQuotas.inc(socketChannel.socket().getInetAddress)
socketChannel.configureBlocking(false)
socketChannel.socket().setTcpNoDelay(true)
socketChannel.socket().setSendBufferSize(sendBufferSize)
debug("Accepted connection from %s on %s. sendBufferSize [actual|requested]: [%d|%d] recvBufferSize [actual|requested]: [%d|%d]"
.format(socketChannel.socket.getInetAddress, socketChannel.socket.getLocalSocketAddress,
socketChannel.socket.getSendBufferSize, sendBufferSize,
socketChannel.socket.getReceiveBufferSize, recvBufferSize))
processor.accept(socketChannel)
} catch {
case e: TooManyConnectionsException =>
info("Rejected connection from %s, address already has the configured maximum of %d connections.".format(e.ip, e.count))
close(socketChannel)
}
}
Processor.accept(socketChannel)
放入ConcurrentLinkedQueue
/**
* Queue up a new connection for reading
*/
def accept(socketChannel: SocketChannel) {
newConnections.add(socketChannel)
wakeup()
}
Processor
private[kafka] class Processor(val id: Int,
val time: Time,
val maxRequestSize: Int,
val aggregateIdleMeter: Meter,
val idleMeter: Meter,
val totalProcessorThreads: Int,
val requestChannel: RequestChannel,
connectionQuotas: ConnectionQuotas,
val connectionsMaxIdleMs: Long) extends AbstractServerThread(connectionQuotas) {
private val newConnections = new ConcurrentLinkedQueue[SocketChannel]()
private val connectionsMaxIdleNanos = connectionsMaxIdleMs * 1000 * 1000
private var currentTimeNanos = SystemTime.nanoseconds
private val lruConnections = new util.LinkedHashMap[SelectionKey, Long]
private var nextIdleCloseCheckTime = currentTimeNanos + connectionsMaxIdleNanos
nio select方法
configureNewConnections()
processNewResponses()
select处理
maybeCloseOldestConnection
override def run() {
startupComplete()
while(isRunning) {
// setup any new connections that have been queued up
configureNewConnections()
// register any new responses for writing
processNewResponses()
val startSelectTime = SystemTime.nanoseconds
val ready = selector.select(300)
currentTimeNanos = SystemTime.nanoseconds
val idleTime = currentTimeNanos - startSelectTime
idleMeter.mark(idleTime)
// We use a single meter for aggregate idle percentage for the thread pool.
// Since meter is calculated as total_recorded_value / time_window and
// time_window is independent of the number of threads, each recorded idle
// time should be discounted by # threads.
aggregateIdleMeter.mark(idleTime / totalProcessorThreads)
trace("Processor id " + id + " selection time = " + idleTime + " ns")
if(ready > 0) {
val keys = selector.selectedKeys()
val iter = keys.iterator()
while(iter.hasNext && isRunning) {
var key: SelectionKey = null
try {
key = iter.next
iter.remove()
if(key.isReadable)
read(key)
else if(key.isWritable)
write(key)
else if(!key.isValid)
close(key)
else
throw new IllegalStateException("Unrecognized key state for processor thread.")
} catch {
case e: EOFException => {
info("Closing socket connection to %s.".format(channelFor(key).socket.getInetAddress))
close(key)
} case e: InvalidRequestException => {
info("Closing socket connection to %s due to invalid request: %s".format(channelFor(key).socket.getInetAddress, e.getMessage))
close(key)
} case e: Throwable => {
error("Closing socket for " + channelFor(key).socket.getInetAddress + " because of error", e)
close(key)
}
}
}
}
maybeCloseOldestConnection
}
debug("Closing selector.")
closeAll()
swallowError(selector.close())
shutdownComplete()
}
configureNewConnections()
获取连接ConcurrentLinkedQueue中的socket注册channel
channel.register(selector, SelectionKey.OP_READ)
/**
* Register any new connections that have been queued up
*/
private def configureNewConnections() {
while(newConnections.size() > 0) {
val channel = newConnections.poll()
debug("Processor " + id + " listening to new connection from " + channel.socket.getRemoteSocketAddress)
channel.register(selector, SelectionKey.OP_READ)
}
}
processNewResponses()
从requestChannel中poll()自己的BlockingQueue[RequestChannel.Response],
有消息就设置key注册写事件,后面会调用write方法
private def processNewResponses() {
var curr = requestChannel.receiveResponse(id)
while(curr != null) {
val key = curr.request.requestKey.asInstanceOf[SelectionKey]
try {
curr.responseAction match {
case RequestChannel.NoOpAction => {
// There is no response to send to the client, we need to read more pipelined requests
// that are sitting in the server's socket buffer
curr.request.updateRequestMetrics
trace("Socket server received empty response to send, registering for read: " + curr)
key.interestOps(SelectionKey.OP_READ)
key.attach(null)
}
case RequestChannel.SendAction => {
trace("Socket server received response to send, registering for write: " + curr)
key.interestOps(SelectionKey.OP_WRITE)
key.attach(curr)
}
case RequestChannel.CloseConnectionAction => {
curr.request.updateRequestMetrics
trace("Closing socket connection actively according to the response code.")
close(key)
}
case responseCode => throw new KafkaException("No mapping found for response code " + responseCode)
}
} catch {
case e: CancelledKeyException => {
debug("Ignoring response for closed socket.")
close(key)
}
} finally {
curr = requestChannel.receiveResponse(id)
}
}
}
read()
read完request(设置processorId到request),放入requestChannel中的requestQueue。
由KafkaRequestHandler单线程读,处理完然后放入processor的requestChannel的队列(所有的processor共享一个request队列,但每个processor都有自己对应的response队列)中
/*
* Process reads from ready sockets
*/
def read(key: SelectionKey) {
lruConnections.put(key, currentTimeNanos)
val socketChannel = channelFor(key)
var receive = key.attachment.asInstanceOf[Receive]
if(key.attachment == null) {
receive = new BoundedByteBufferReceive(maxRequestSize)
key.attach(receive)
}
val read = receive.readFrom(socketChannel)
val address = socketChannel.socket.getRemoteSocketAddress();
trace(read + " bytes read from " + address)
if(read < 0) {
close(key)
} else if(receive.complete) {
val req = RequestChannel.Request(processor = id, requestKey = key, buffer = receive.buffer, startTimeMs = time.milliseconds, remoteAddress = address)
requestChannel.sendRequest(req)
key.attach(null)
// explicitly reset interest ops to not READ, no need to wake up the selector just yet
key.interestOps(key.interestOps & (~SelectionKey.OP_READ))
} else {
// more reading to be done
trace("Did not finish reading, registering for read again on connection " + socketChannel.socket.getRemoteSocketAddress())
key.interestOps(SelectionKey.OP_READ)
wakeup()
}
}
write
写入消息到socket
/*
* Process writes to ready sockets
*/
def write(key: SelectionKey) {
val socketChannel = channelFor(key)
val response = key.attachment().asInstanceOf[RequestChannel.Response]
val responseSend = response.responseSend
if(responseSend == null)
throw new IllegalStateException("Registered for write interest but no response attached to key.")
val written = responseSend.writeTo(socketChannel)
trace(written + " bytes written to " + socketChannel.socket.getRemoteSocketAddress() + " using key " + key)
if(responseSend.complete) {
response.request.updateRequestMetrics()
key.attach(null)
trace("Finished writing, registering for read on connection " + socketChannel.socket.getRemoteSocketAddress())
key.interestOps(SelectionKey.OP_READ)
} else {
trace("Did not finish writing, registering for write again on connection " + socketChannel.socket.getRemoteSocketAddress())
key.interestOps(SelectionKey.OP_WRITE)
wakeup()
}
}
maybeCloseOldestConnection
最早的channel如果太不活跃,就移除(有点粗糙)
private def maybeCloseOldestConnection {
if(currentTimeNanos > nextIdleCloseCheckTime) {
if(lruConnections.isEmpty) {
nextIdleCloseCheckTime = currentTimeNanos + connectionsMaxIdleNanos
} else {
val oldestConnectionEntry = lruConnections.entrySet.iterator().next()
val connectionLastActiveTime = oldestConnectionEntry.getValue
nextIdleCloseCheckTime = connectionLastActiveTime + connectionsMaxIdleNanos
if(currentTimeNanos > nextIdleCloseCheckTime) {
val key: SelectionKey = oldestConnectionEntry.getKey
trace("About to close the idle connection from " + key.channel.asInstanceOf[SocketChannel].socket.getRemoteSocketAddress
+ " due to being idle for " + (currentTimeNanos - connectionLastActiveTime) / 1000 / 1000 + " millis")
close(key)
}
}
}
}
/**
* Close the given key and associated socket
*/
override def close(key: SelectionKey): Unit = {
lruConnections.remove(key)
super.close(key)
}
AbstractServerThread
/**
* Close the given key and associated socket
*/
def close(key: SelectionKey) {
if(key != null) {
key.attach(null)
close(key.channel.asInstanceOf[SocketChannel])
swallowError(key.cancel())
}
}
/**
* Close all open connections
*/
def closeAll() {
// removes cancelled keys from selector.keys set
this.selector.selectNow()
val iter = this.selector.keys().iterator()
while (iter.hasNext) {
val key = iter.next()
close(key)
}
}
KafkaServer
初始化KafkaApis 和 KafkaRequestHandlerPool
startUp(){
socketServer = new SocketServer(config.brokerId,
config.hostName,
config.port,
config.numNetworkThreads,
config.queuedMaxRequests,
config.socketSendBufferBytes,
config.socketReceiveBufferBytes,
config.socketRequestMaxBytes,
config.maxConnectionsPerIp,
config.connectionsMaxIdleMs,
config.maxConnectionsPerIpOverrides)
socketServer.startup()
/* start processing requests */
apis = new KafkaApis(socketServer.requestChannel, replicaManager, offsetManager, zkClient, config.brokerId, config, kafkaController)
requestHandlerPool = new KafkaRequestHandlerPool(config.brokerId, socketServer.requestChannel, apis, config.numIoThreads)
}
KafkaRequestHandlerPool
numThreads个KafkaRequestHandler
class KafkaRequestHandlerPool(val brokerId: Int,
val requestChannel: RequestChannel,
val apis: KafkaApis,
numThreads: Int) extends Logging with KafkaMetricsGroup {
val threads = new Array[Thread](numThreads)
val runnables = new Array[KafkaRequestHandler](numThreads)
for(i <- 0 until numThreads) {
runnables(i) = new KafkaRequestHandler(i, brokerId, aggregateIdleMeter, numThreads, requestChannel, apis)
threads(i) = Utils.daemonThread("kafka-request-handler-" + i, runnables(i))
threads(i).start()
}
KafkaRequestHandler
获取请求,调用KafkaApis来处理
/**
* A thread that answers kafka requests.
*/
class KafkaRequestHandler(id: Int,
brokerId: Int,
val aggregateIdleMeter: Meter,
val totalHandlerThreads: Int,
val requestChannel: RequestChannel,
apis: KafkaApis) extends Runnable with Logging {
this.logIdent = "[Kafka Request Handler " + id + " on Broker " + brokerId + "], "
def run() {
while(true) {
try {
var req : RequestChannel.Request = null
while (req == null) {
// We use a single meter for aggregate idle percentage for the thread pool.
// Since meter is calculated as total_recorded_value / time_window and
// time_window is independent of the number of threads, each recorded idle
// time should be discounted by # threads.
val startSelectTime = SystemTime.nanoseconds
req = requestChannel.receiveRequest(300)
val idleTime = SystemTime.nanoseconds - startSelectTime
aggregateIdleMeter.mark(idleTime / totalHandlerThreads)
}
if(req eq RequestChannel.AllDone) {
debug("Kafka request handler %d on broker %d received shut down command".format(
id, brokerId))
return
}
req.requestDequeueTimeMs = SystemTime.milliseconds
trace("Kafka request handler %d on broker %d handling request %s".format(id, brokerId, req))
apis.handle(req)
} catch {
case e: Throwable => error("Exception when handling request", e)
}
}
}
RequestChannel
numProcessors个LinkedBlockingQueue
(所有的processor共享一个request队列,但每个processor都有自己对应的response队列)
class RequestChannel(val numProcessors: Int, val queueSize: Int) extends KafkaMetricsGroup {
private var responseListeners: List[(Int) => Unit] = Nil
private val requestQueue = new ArrayBlockingQueue[RequestChannel.Request](queueSize)
private val responseQueues = new Array[BlockingQueue[RequestChannel.Response]](numProcessors)
for(i <- 0 until numProcessors)
responseQueues(i) = new LinkedBlockingQueue[RequestChannel.Response]()
receiveRequest()就是poll
/** Get the next request or block until specified time has elapsed */
def receiveRequest(timeout: Long): RequestChannel.Request =
requestQueue.poll(timeout, TimeUnit.MILLISECONDS)
/** Send a request to be handled, potentially blocking until there is room in the queue for the request */
def sendRequest(request: RequestChannel.Request) {
requestQueue.put(request)
}
/** No operation to take for the request, need to read more over the network */
def noOperation(processor: Int, request: RequestChannel.Request) {
responseQueues(processor).put(new RequestChannel.Response(processor, request, null, RequestChannel.NoOpAction))
for(onResponse <- responseListeners)
onResponse(processor)
}
/** Close the connection for the request */
def closeConnection(processor: Int, request: RequestChannel.Request) {
responseQueues(processor).put(new RequestChannel.Response(processor, request, null, RequestChannel.CloseConnectionAction))
for(onResponse <- responseListeners)
onResponse(processor)
}
/** Get the next request or block until there is one */
def receiveRequest(): RequestChannel.Request =
requestQueue.take()
/** Get a response for the given processor if there is one */
def receiveResponse(processor: Int): RequestChannel.Response = {
val response = responseQueues(processor).poll()
if (response != null)
response.request.responseDequeueTimeMs = SystemTime.milliseconds
response
}
def addResponseListener(onResponse: Int => Unit) {
responseListeners ::= onResponse
}
def shutdown() {
requestQueue.clear
}
KafkaApis 处理请求
/**
* Logic to handle the various Kafka requests
*/
class KafkaApis(val requestChannel: RequestChannel,
val replicaManager: ReplicaManager,
val offsetManager: OffsetManager,
val zkClient: ZkClient,
val brokerId: Int,
val config: KafkaConfig,
val controller: KafkaController) extends Logging {
val producerRequestPurgatory = new ProducerRequestPurgatory(replicaManager, offsetManager, requestChannel)
val fetchRequestPurgatory = new FetchRequestPurgatory(replicaManager, requestChannel)
// TODO: the following line will be removed in 0.9
replicaManager.initWithRequestPurgatory(producerRequestPurgatory, fetchRequestPurgatory)
var metadataCache = new MetadataCache
this.logIdent = "[KafkaApi-%d] ".format(brokerId)
/**
* Top-level method that handles all requests and multiplexes to the right api
*/
def handle(request: RequestChannel.Request) {
try{
trace("Handling request: " + request.requestObj + " from client: " + request.remoteAddress)
request.requestId match {
case RequestKeys.ProduceKey => handleProducerOrOffsetCommitRequest(request)
case RequestKeys.FetchKey => handleFetchRequest(request)
case RequestKeys.OffsetsKey => handleOffsetRequest(request)
case RequestKeys.MetadataKey => handleTopicMetadataRequest(request)
case RequestKeys.LeaderAndIsrKey => handleLeaderAndIsrRequest(request)
case RequestKeys.StopReplicaKey => handleStopReplicaRequest(request)
case RequestKeys.UpdateMetadataKey => handleUpdateMetadataRequest(request)
case RequestKeys.ControlledShutdownKey => handleControlledShutdownRequest(request)
case RequestKeys.OffsetCommitKey => handleOffsetCommitRequest(request)
case RequestKeys.OffsetFetchKey => handleOffsetFetchRequest(request)
case RequestKeys.ConsumerMetadataKey => handleConsumerMetadataRequest(request)
case requestId => throw new KafkaException("Unknown api code " + requestId)
}
} catch {
case e: Throwable =>
request.requestObj.handleError(e, requestChannel, request)
error("error when handling request %s".format(request.requestObj), e)
} finally
request.apiLocalCompleteTimeMs = SystemTime.milliseconds
}
handleOffsetCommitRequest处理offsetCommit请求,把响应消息放入对应processor的responseChannel队列中
def handleOffsetCommitRequest(request: RequestChannel.Request) {
val offsetCommitRequest = request.requestObj.asInstanceOf[OffsetCommitRequest]
if (offsetCommitRequest.versionId == 0) {
// version 0 stores the offsets in ZK
val responseInfo = offsetCommitRequest.requestInfo.map{
case (topicAndPartition, metaAndError) => {
val topicDirs = new ZKGroupTopicDirs(offsetCommitRequest.groupId, topicAndPartition.topic)
try {
ensureTopicExists(topicAndPartition.topic)
if(metaAndError.metadata != null && metaAndError.metadata.length > config.offsetMetadataMaxSize) {
(topicAndPartition, ErrorMapping.OffsetMetadataTooLargeCode)
} else {
ZkUtils.updatePersistentPath(zkClient, topicDirs.consumerOffsetDir + "/" +
topicAndPartition.partition, metaAndError.offset.toString)
(topicAndPartition, ErrorMapping.NoError)
}
} catch {
case e: Throwable => (topicAndPartition, ErrorMapping.codeFor(e.getClass.asInstanceOf[Class[Throwable]]))
}
}
}
val response = new OffsetCommitResponse(responseInfo, offsetCommitRequest.correlationId)
requestChannel.sendResponse(new RequestChannel.Response(request, new BoundedByteBufferSend(response)))
} else {
// version 1 and above store the offsets in a special Kafka topic
handleProducerOrOffsetCommitRequest(request)
}
}