Selector.pollSelectionKeys
if (channel.ready() && key.isReadable() && !hasStagedReceive(channel)) {
NetworkReceive networkReceive;
//接受服务端发送回来的响应(请求)
//networkReceive 代表的就是一个服务端发送
//回来的响应
//里面不断的读取数据,读取数据的代码我们之前就已经分析过
//里面还涉及到粘包和拆包的一些问题。
while ((networkReceive = channel.read()) != null)
addToStagedReceives(channel, networkReceive);
}
private void addToStagedReceives(KafkaChannel channel, NetworkReceive receive) {
//channel代表的就是一个网络的连接,一台kafka的主机就对应了一个channel连接。
if (!stagedReceives.containsKey(channel))
stagedReceives.put(channel, new ArrayDeque<NetworkReceive>());
Deque<NetworkReceive> deque = stagedReceives.get(channel);
//往队列里面存放接受到响应
deque.add(receive);
}
Selector.poll -> addToCompletedReceives(对stagedReceives里面的数据要进行处理)
private void addToCompletedReceives() {
if (!this.stagedReceives.isEmpty()) {
Iterator<Map.Entry<KafkaChannel, Deque<NetworkReceive>>> iter = this.stagedReceives.entrySet().iterator();
while (iter.hasNext()) {
Map.Entry<KafkaChannel, Deque<NetworkReceive>> entry = iter.next();
KafkaChannel channel = entry.getKey();
if (!channel.isMute()) {
//获取到每个连接对应的 请求队列
Deque<NetworkReceive> deque = entry.getValue();
//获取到响应
//对于我们服务端来说,这儿接收到的是请求
NetworkReceive networkReceive = deque.poll();
//把响应存入到completedReceives 数据结构里面
this.completedReceives.add(networkReceive);
this.sensors.recordBytesReceived(channel.id(), networkReceive.payload().limit());
if (deque.isEmpty())
iter.remove();
}
}
}
}
=> NetworkClient.poll -> handleCompletedReceives
private void handleCompletedReceives(List<ClientResponse> responses, long now) {
for (NetworkReceive receive : this.selector.completedReceives()) {
//获取broker id
String source = receive.source();
/**
* kafka 有这样的一个机制:每个连接可以容忍5个发送出去了,但是还没接收到响应的请求。
*/
//从数据结构里面移除已经接收到响应的请求。
//把之前存入进去的请求也获取到了
ClientRequest req = inFlightRequests.completeNext(source);
//解析服务端发送回来的请求(里面有响应的结果数据)
Struct body = parseResponse(receive.payload(), req.request().header());
//TODO 如果是关于元数据信息的响应
if (!metadataUpdater.maybeHandleCompletedReceive(req, now, body))
//解析完了以后就把封装成一个一个的cilentResponse
//body 存储的是响应的内容
//req 发送出去的那个请求信息
responses.add(new ClientResponse(req, now, false, body));
}
}
// 处理回掉
for (ClientResponse response : responses) {
if (response.request().hasCallback()) {
try {
/**
* 调用的响应的里面的我们之前发送出去的请求的回调函数
* 看到了这儿,我们回头再去看一下
* 我们当时发送请求的时候,是如何封装这个请求。
* 不过虽然目前我们还没看到,但是我们可以大胆猜一下。
* 当时封装网络请求的时候,肯定是给他绑定了一个回调函数。
*/
response.request().callback().onComplete(response);
} catch (Exception e) {
log.error("Uncaught error in request completion:", e);
}
}
}
=> Sender.produceRequest
RequestCompletionHandler callback = new RequestCompletionHandler() {
public void onComplete(ClientResponse response) {
//回调函数要是被调用
//其实就是这个方法被执行了。
handleProduceResponse(response, recordsByPartition, time.milliseconds());
}
};
-> handleProduceResponse
private void handleProduceResponse(ClientResponse response, Map<TopicPartition, RecordBatch> batches, long now) {
int correlationId = response.request().request().header().correlationId();
//这个地方就是就是一个特殊情况
//我们要发送请求了,但是发现 broker失去连接。
//不过这个是一个小概率事件。
if (response.wasDisconnected()) {
log.trace("Cancelled request {} due to node {} being disconnected", response, response.request()
.request()
.destination());
for (RecordBatch batch : batches.values())
completeBatch(batch, Errors.NETWORK_EXCEPTION, -1L, Record.NO_TIMESTAMP, correlationId, now);
} else {
//正常情况下,都是走的这儿。
log.trace("Received produce response from node {} with correlation id {}",
response.request().request().destination(),
correlationId);
// if we have a response, parse it
//所以我们正常情况下,走的都是这个分支
if (response.hasResponse()) {
ProduceResponse produceResponse = new ProduceResponse(response.responseBody());
/**
* 遍历每个分区的响应
*/
for (Map.Entry<TopicPartition, ProduceResponse.PartitionResponse> entry : produceResponse.responses().entrySet()) {
TopicPartition tp = entry.getKey();
ProduceResponse.PartitionResponse partResp = entry.getValue();
//如果处理成功那就是成功了,但是如果服务端那儿处理失败了
//是不是也要给我们发送回来异常的信息。
//error 这个里面存储的就是服务端发送回来的异常码
Errors error = Errors.forCode(partResp.errorCode);
//获取到当前分区的响应。
RecordBatch batch = batches.get(tp);
// TODO 对响应进行处理
completeBatch(batch, error, partResp.baseOffset, partResp.timestamp, correlationId, now);
}
this.sensors.recordLatency(response.request().request().destination(), response.requestLatencyMs());
this.sensors.recordThrottleTime(response.request().request().destination(),
produceResponse.getThrottleTime());
} else {
// this is the acks = 0 case, just complete all requests
//acks=0意思就是不需要返回响应。
//1 p -> b leader partion
//-1 p -> broker leader partition -> follower partition
//在生产环境里面,我们一般是不会把acks 参数设置为0
for (RecordBatch batch : batches.values())
completeBatch(batch, Errors.NONE, -1L, Record.NO_TIMESTAMP, correlationId, now);
}
}
}
-> completeBatch
private void completeBatch(RecordBatch batch, Errors error, long baseOffset, long timestamp, long correlationId, long now) {
//如果响应里面带有异常 并且 这个请求是可以重试的
if (error != Errors.NONE && canRetry(batch, error)) {
// retry
log.warn("Got error produce response with correlation id {} on topic-partition {}, retrying ({} attempts left). Error: {}",
correlationId,
batch.topicPartition,
this.retries - batch.attempts - 1,
error);
//重新把发送失败等着批次 加入到队列里面。
this.accumulator.reenqueue(batch, now);
this.sensors.recordRetries(batch.topicPartition.topic(), batch.recordCount);
} else {
//TODO 这儿过来的数据:(1)带有异常,但是不可以重试(1:压根就不让重试2:重试次数超了。
// (2)没有异常
//其余的都走这个分支。
RuntimeException exception;
//如果响应里面带有 没有权限的异常
if (error == Errors.TOPIC_AUTHORIZATION_FAILED)
//自己封装一个异常信息(自定义了异常)
exception = new TopicAuthorizationException(batch.topicPartition.topic());
else
exception = error.exception();
// tell the user the result of their request
//TODO 核心代码 把异常的信息也给带过去了
//我们刚刚看的就是这儿的代码
//里面调用了用户传进来的回调函数
//回调函数调用了以后
//说明我们的一个完整的消息的发送流程就结束了。
batch.done(baseOffset, timestamp, exception);
//看起来这个代码就是要回收资源的。
this.accumulator.deallocate(batch);
if (error != Errors.NONE)
this.sensors.recordErrors(batch.topicPartition.topic(), batch.recordCount);
}
if (error.exception() instanceof InvalidMetadataException) {
if (error.exception() instanceof UnknownTopicOrPartitionException)
log.warn("Received unknown topic or partition error in produce request on partition {}. The " +
"topic/partition may not exist or the user may not have Describe access to it", batch.topicPartition);
metadata.requestUpdate();
}
// Unmute the completed partition.
if (guaranteeMessageOrder)
this.accumulator.unmutePartition(batch.topicPartition);
}
-> batch.done
public void done(long baseOffset, long timestamp, RuntimeException exception) {
log.trace("Produced messages to topic-partition {} with base offset offset {} and error: {}.",
topicPartition,
baseOffset,
exception);
// execute callbacks
/**
*
* 我们发送数据的时候,一条消息就代表一个thunk
* 遍历所以我们当时发送出去消息。
*/
for (int i = 0; i < this.thunks.size(); i++) {
try {
Thunk thunk = this.thunks.get(i);
//如果没有异常
if (exception == null) {
// If the timestamp returned by server is NoTimestamp, that means CreateTime is used. Otherwise LogAppendTime is used.
RecordMetadata metadata = new RecordMetadata(this.topicPartition, baseOffset, thunk.future.relativeOffset(),
timestamp == Record.NO_TIMESTAMP ? thunk.future.timestamp() : timestamp,
thunk.future.checksum(),
thunk.future.serializedKeySize(),
thunk.future.serializedValueSize());
// TODO
//调用我们发送的消息的回调函数
//大家还记不记得我们在发送数据的时候
//还不是绑定了一个回调函数。
//这儿说的调用的回调函数
//就是我们开发,生产者代码的时候,我们用户传进去的那个
//回调函数。
thunk.callback.onCompletion(metadata, null);//带过去的就是没有异常
//也就是说我们生产者那儿的代码,捕获异常的时候就是发现没有异常。
} else {
//如果有异常就会把异常传给回调函数。
//由我们用户自己去捕获这个异常。
//然后对这个异常进行处理
//大家根据自己公司的业务规则进行处理就可以了。
//如果走这个分支的话,我们的用户的代码是可以捕获到timeoutexception
//这个异常,如果用户捕获到了,做对应的处理就可以了。
thunk.callback.onCompletion(null, exception);
}
} catch (Exception e) {
log.error("Error executing user-provided callback on message for topic-partition {}:", topicPartition, e);
}
}
this.produceFuture.done(topicPartition, baseOffset, exception);
}
thunk.callback.onCompletion -> 调用用户自定义的回调函数
class DemoCallBack implements Callback {
private final long startTime;
private final int key;
private final String message;
public DemoCallBack(long startTime, int key, String message) {
this.startTime = startTime;
this.key = key;
this.message = message;
}
/**
* A callback method the user can implement to provide asynchronous handling of request completion. This method will
* be called when the record sent to the server has been acknowledged. Exactly one of the arguments will be
* non-null.
*
* @param metadata The metadata for the record that was sent (i.e. the partition and offset). Null if an error
* occurred.
* @param exception The exception thrown during processing of this record. Null if no error occurred.
*/
public void onCompletion(RecordMetadata metadata, Exception exception) {
long elapsedTime = System.currentTimeMillis() - startTime;
if(exception != null){
System.out.println("有异常");
//一般我们生产里面 还会有其它的备用的链路。
}else{
System.out.println("说明没有异常信息,成功的!!");
}
if (metadata != null) {
System.out.println(
"message(" + key + ", " + message + ") sent to partition(" + metadata.partition() +
"), " +
"offset(" + metadata.offset() + ") in " + elapsedTime + " ms");
} else {
exception.printStackTrace();
}
}
回调函数处理完后,释放accumulator内存
Sender.completeBatch. this.accumulator.deallocate(batch);
public void deallocate(RecordBatch batch) {
//从某个数据结构里面移除 已经成功处理的批次
incomplete.remove(batch);
//释放内存,回收内存
free.deallocate(batch.records.buffer(), batch.records.initialCapacity());
}
free.deallocate
public void deallocate(ByteBuffer buffer, int size) {
lock.lock();
try {
//如果你还回来的内存的大小 就等于一个批次的大小,
//我们的参数设置的内存是16K,你计算出来一个批次的大小也是16,申请的内存也是16k
//16K 32K
if (size == this.poolableSize && size == buffer.capacity()) {
//内存里面的东西清空
buffer.clear();
//把内存放入到内存池
this.free.add(buffer);
} else {
//但是如果 我们释放的内存的大小
//不是一个批次的大小,那就把归为可用内存
//等着垃圾回收即可
this.availableMemory += size;
}
Condition moreMem = this.waiters.peekFirst();
if (moreMem != null)
//释放了内存(或者是还了内存以后)
//都会唤醒等待内存的线程。
//接下来是不是还是要唤醒正在等待分配内存的线程。
moreMem.signal();
} finally {
lock.unlock();
}
}