NetworkClient是kafka的网络层,也就是真正发生网络I/O的地方,是一个通用的网络客户端实现,不只用于生产者消息的发送,也用于消费者消费消息以及服务端Broker之间的通信。
public class NetworkClient implements KafkaClient {
/* 执行网路io的selector */
private final Selectable selector;
// 管理metadata更新操作
private final MetadataUpdater metadataUpdater;
private final Random randOffset;
/* 每个节点连接的状态 */
private final ClusterConnectionStates connectionStates;
/* 发送但是还没有收到响应的消息 */
private final InFlightRequests inFlightRequests;
/* 发送buffer的socket */
private final int socketSendBuffer;
/* 接受byteBuffer的socket */
private final int socketReceiveBuffer;
/* clientID */
private final String clientId;
/* 关联id */
private int correlation;
/* 发送等待时间*/
private final int requestTimeoutMs;
private final Time time;
}
NetWorkClient中所有连接的状态由ClusterConnectionStates管理,它底层使用Map<String, NodeConnectionState>实现,NodeConnectionState枚举状态连接,为DISCONNECTED, CONNECTING, CONNECTED。
NetWorkClient的核心方法是ready(),检测Node是否准备好接受数据,首先通过isReady方法检测是否可以向一个Node发送请求,符合以下三个条件:
1. metadata不处于正在更新或者需要更新的状态,!metadataUpdater.isUpdateDue(now)
2. 成功建立连接并且连接正常:connectionState.isConnect(node)
3. canSendRequest返回true:return connectionStates.isConnected(node) && selector.isChannelReady(node) && inFlightRequests.canSendMore(node);
如果isReady返回false,则当满足以下两个条件,就会调用initiateConnect发起连接:
1. 连接是DISCONNECTED状态。
2. 两次重试的时间必须大于reconnectBackoffMs。
public class NetworkClient implements KafkaClient {
public boolean ready(Node node, long now) {
if (node.isEmpty())
throw new IllegalArgumentException("Cannot connect to empty node " + node);
if (isReady(node, now))
return true;
if (connectionStates.canConnect(node.idString(), now))
// if we are interested in sending to a node and we don't have a connection to it, initiate one
initiateConnect(node, now);
return false;
}
public boolean isReady(Node node, long now) {
// if we need to update our metadata now declare all requests unready to make metadata requests first
// priority
// metadata不处于正在更新或者需要更新的状态
return !metadataUpdater.isUpdateDue(now) && canSendRequest(node.idString());
}
private boolean canSendRequest(String node) {
return connectionStates.isConnected(node) && selector.isChannelReady(node) && inFlightRequests.canSendMore(node);
}
private void initiateConnect(Node node, long now) {
String nodeConnectionId = node.idString();
try {
log.debug("Initiating connection to node {} at {}:{}.", node.id(), node.host(), node.port());
this.connectionStates.connecting(nodeConnectionId, now);
selector.connect(nodeConnectionId,
new InetSocketAddress(node.host(), node.port()),
this.socketSendBuffer,
this.socketReceiveBuffer);
} catch (IOException e) {
/* attempt failed, we'll try again after the backoff */
connectionStates.disconnected(nodeConnectionId, now);
/* maybe the problem is our metadata, update it */
metadataUpdater.requestUpdate();
log.debug("Error connecting to node {} at {}:{}:", node.id(), node.host(), node.port(), e);
}
}
}
NetworkClient.send()方法吧请求放在KafkaChannel.send字段,放在InFlightRequests中等待。
public class NetworkClient implements KafkaClient {
public void send(ClientRequest request, long now) {
String nodeId = request.request().destination();
//按照以上条件检测node是否可以发送请求
if (!canSendRequest(nodeId))
throw new IllegalStateException("Attempt to send a request to node " + nodeId + " which is not ready.");
doSend(request, now);
}
private void doSend(ClientRequest request, long now) {
request.setSendTimeMs(now);
//放在InFlightRequests中等待
this.inFlightRequests.add(request);
//selector.send写入channel中
selector.send(request.request());
}
}
NetworkClient.poll()方法调用KSelecter.poll()进行网络I/O,并调用handle*()方法对poll回来的各种数据和队列进行处理:
public class NetworkClient implements KafkaClient {
public List<ClientResponse> poll(long timeout, long now) {
//更新metadata
long metadataTimeout = metadataUpdater.maybeUpdate(now);
try {
this.selector.poll(Utils.min(timeout, metadataTimeout, requestTimeoutMs));
} catch (IOException e) {
log.error("Unexpected error during I/O", e);
}
// process completed actions
long updatedNow = this.time.milliseconds();
List<ClientResponse> responses = new ArrayList<>();
//处理各种情况,生成响应,添加到列表中
//完成发送的handler(不需要 response 的 request,如 send)
handleCompletedSends(responses, updatedNow);
//完成接收的handler(如 Metadata 请求)
handleCompletedReceives(responses, updatedNow);
//断开连接的handler
handleDisconnections(responses, updatedNow);
//处理连接的handler
handleConnections();
//超时请求的handler
handleTimedOutRequests(responses, updatedNow);
// 调用 callbacks
for (ClientResponse response : responses) {
if (response.request().hasCallback()) {
try {
response.request().callback().onComplete(response);
} catch (Exception e) {
log.error("Uncaught error in request completion:", e);
}
}
}
return responses;
}
}
下面看各种handle方法的处理:
handleCompletedSends处理已经完成的队列,如果发现不需要响应的请求,就从inFlightRequests中删除,向response中添加一个ClientResponse。
客户端发送请求后,handleCompletedSends中对于有响应的请求,并不会将ClientRequest从inFlightRequests中移除。(因为没收到响应),除非是客户端请求不需要响应,则这时候是可以将ClientRequest从中删除,添加时放到头部,删除时也是从头部删除。
public class NetworkClient implements KafkaClient {
private void handleCompletedSends(List<ClientResponse> responses, long now) {
// 遍历completeSend集合
for (Send send : this.selector.completedSends()) {
//获取inFlightRequests队列的第一个元素
ClientRequest request = this.inFlightRequests.lastSent(send.destination());
//检测请求是否需要响应
if (!request.expectResponse()) {
//把第一个请求删除
this.inFlightRequests.completeLastSent(send.destination());
//生成ClientResponse对象,添加到response集合中。
responses.add(new ClientResponse(request, now, false, null));
}
// 如果客户端请求需要有响应, 那么它的响应是在下面的handleCompletedReceives中设置的
}
}
}
handleCompletedReceives遍历completedReceives队列,
public class NetworkClient implements KafkaClient {
private void handleCompletedReceives(List<ClientResponse> responses, long now) {
//遍历响应,通过Selector返回未处理的响应
for (NetworkReceive receive : this.selector.completedReceives()) {
//获得返回响应的nodeId
String source = receive.source();
//从缓存队列获取已发送请求并删除(这里会从inFlightRequests删除,因为inFlightRequests存的是未收到请求的ClientRequest,现在这个请求已经有响应了,就不需要再其中保存了。)
ClientRequest req = inFlightRequests.completeNext(source);
//解析响应,并且验证响应头,生成Struct实例
Struct body = parseResponse(receive.payload(), req.request().header());
//调用maybeHandleCompletedReceive处理MetadataResponse。
if (!metadataUpdater.maybeHandleCompletedReceive(req, now, body))
//如果不是MetadataResponse,则直接创建Clientsresponses添加到Responses中
responses.add(new ClientResponse(req, now, false, body));
}
}
}
handleDisconnections处理disconnect列表
public class NetworkClient implements KafkaClient {
private void handleDisconnections(List<ClientResponse> responses, long now) {
//更新连接状态,清理掉inFlightRequests中断开连接的node对应的ClientRequest
for (String node : this.selector.disconnected()) {
log.debug("Node {} disconnected.", node);
processDisconnection(responses, node, now);
}
// we got a disconnect so we should probably refresh our metadata and see if that broker is dead
if (this.selector.disconnected().size() > 0)
metadataUpdater.requestUpdate();
}
private void processDisconnection(List<ClientResponse> responses, String nodeId, long now) {
//更新连接状态为disconnect
connectionStates.disconnected(nodeId, now);
for (ClientRequest request : this.inFlightRequests.clearAll(nodeId)) {
log.trace("Cancelled request {} due to node {} being disconnected", request, nodeId);
//看是否是metadata请求。
if (!metadataUpdater.maybeHandleDisconnection(request))
//如果不是MetadataResponse,则直接创建Clientsresponses添加到Responses中
responses.add(new ClientResponse(request, now, true, null));
}
}
}
handleConnections把状态设置为Connect,handleTimedOutRequests与handleDisconnections大致相同,调用this.selector.close(nodeId)。
在经过一系列的handle之后,poll方法中产生的全部ClientResponse已经都在responses中了,之后,遍历response调用每个ClientRequest中的回调,如果是异常响应则请求重发,正常就调用callback:
response.request().callback().onComplete(response);最终会调用创建PproduceRequst时候的注册handleProduceResponse方法:
public class NetworkClient implements KafkaClient {
private void handleProduceResponse(ClientResponse response, Map<TopicPartition, RecordBatch> batches, long now) {
int correlationId = response.request().request().header().correlationId();
//对于连接断开而产生的ClientResponse,则重新发送,如果不能则低啊用回调。
if (response.wasDisconnected()) {
log.trace("Cancelled request {} due to node {} being disconnected", response, response.request()
.request()
.destination());
for (RecordBatch batch : batches.values())
completeBatch(batch, Errors.NETWORK_EXCEPTION, -1L, Record.NO_TIMESTAMP, correlationId, now);
} else {
log.trace("Received produce response from node {} with correlation id {}",
response.request().request().destination(),
correlationId);
// if we have a response, parse it
if (response.hasResponse()) {
ProduceResponse produceResponse = new ProduceResponse(response.responseBody());
for (Map.Entry<TopicPartition, ProduceResponse.PartitionResponse> entry : produceResponse.responses().entrySet()) {
TopicPartition tp = entry.getKey();
ProduceResponse.PartitionResponse partResp = entry.getValue();
Errors error = Errors.forCode(partResp.errorCode);
RecordBatch batch = batches.get(tp);
//调用completeBatch处理
completeBatch(batch, error, partResp.baseOffset, partResp.timestamp, correlationId, now);
}
this.sensors.recordLatency(response.request().request().destination(), response.requestLatencyMs());
this.sensors.recordThrottleTime(response.request().request().destination(),
produceResponse.getThrottleTime());
} else {
// 不需要响应,则直接调用completeBatch
for (RecordBatch batch : batches.values())
completeBatch(batch, Errors.NONE, -1L, Record.NO_TIMESTAMP, correlationId, now);
}
}
}
private void completeBatch(RecordBatch batch, Errors error, long baseOffset, long timestamp, long correlationId, long now) {
if (error != Errors.NONE && canRetry(batch, error)) {
// retry
log.warn("Got error produce response with correlation id {} on topic-partition {}, retrying ({} attempts left). Error: {}",
correlationId,
batch.topicPartition,
this.retries - batch.attempts - 1,
error);
//对于可以重试的RecordBatch,重新添加到accumulator中,等待发送
this.accumulator.reenqueue(batch, now);
this.sensors.recordRetries(batch.topicPartition.topic(), batch.recordCount);
} else {
//不能重试的,就标记为异常完成,释放accumulator
RuntimeException exception;
if (error == Errors.TOPIC_AUTHORIZATION_FAILED)
exception = new TopicAuthorizationException(batch.topicPartition.topic());
else
exception = error.exception();
//done调用callback函数
batch.done(baseOffset, timestamp, exception);
//释放accumulator空间
this.accumulator.deallocate(batch);
if (error != Errors.NONE)
this.sensors.recordErrors(batch.topicPartition.topic(), batch.recordCount);
}
//如果exception是因为错误的metadata,如leader已经改变,则标记更新metadata。
if (error.exception() instanceof InvalidMetadataException)
metadata.requestUpdate();
// Unmute the completed partition.
if (guaranteeMessageOrder)
this.accumulator.unmutePartition(batch.topicPartition);
}
}