先来看poll方法继续执行下去
// invoke callbacks
for (ClientResponse response : responses) {
if (response.request().hasCallback()) {
try {
//调用的响应的里面的之前发送出去的请求的回调函数
//这里可以回顾一下,是如何封装这个请求的
//先回到sender线程,看一下步骤七
response.request().callback().onComplete(response);
} catch (Exception e) {
log.error("Uncaught error in request completion:", e);
}
}
}
return responses;
}
到了这里就会有回调函数,我们去sender线程找一下
List<ClientRequest> requests = createProduceRequests(batches, now);
跟进来
private List<ClientRequest> createProduceRequests(Map<Integer, List<RecordBatch>> collated, long now) {
List<ClientRequest> requests = new ArrayList<ClientRequest>(collated.size());
for (Map.Entry<Integer, List<RecordBatch>> entry : collated.entrySet())
requests.add(produceRequest(now, entry.getKey(), acks, requestTimeout, entry.getValue()));
return requests;
}
主要看produceRequest
private ClientRequest produceRequest(long now, int destination, short acks, int timeout, List<RecordBatch> batches) {
Map<TopicPartition, ByteBuffer> produceRecordsByPartition = new HashMap<TopicPartition, ByteBuffer>(batches.size());
final Map<TopicPartition, RecordBatch> recordsByPartition = new HashMap<TopicPartition, RecordBatch>(batches.size());
for (RecordBatch batch : batches) {
TopicPartition tp = batch.topicPartition;
produceRecordsByPartition.put(tp, batch.records.buffer());
recordsByPartition.put(tp, batch);
}
ProduceRequest request = new ProduceRequest(acks, timeout, produceRecordsByPartition);
RequestSend send = new RequestSend(Integer.toString(destination),
this.client.nextRequestHeader(ApiKeys.PRODUCE),
request.toStruct());
//回调函数
RequestCompletionHandler callback = new RequestCompletionHandler() {
public void onComplete(ClientResponse response) {
//回调函数被调用
//那么就是这个方法被执行了
handleProduceResponse(response, recordsByPartition, time.milliseconds());
}
};
//这就是封装出来的请求,里面带有了回调函数
return new ClientRequest(now, acks != 0, send, callback);
}
这里发现果然在封装请求的时候绑定了一个回调函数,看一下细节handleProduceResponse
private void handleProduceResponse(ClientResponse response, Map<TopicPartition, RecordBatch> batches, long now) {
int correlationId = response.request().request().header().correlationId();
//这里是一个特殊情况
//就是发送请求了,但是发现broker失去连接
if (response.wasDisconnected()) {
log.trace("Cancelled request {} due to node {} being disconnected", response, response.request()
.request()
.destination());
for (RecordBatch batch : batches.values())
completeBatch(batch, Errors.NETWORK_EXCEPTION, -1L, Record.NO_TIMESTAMP, correlationId, now);
} else {
//正常情况下,要走这里
log.trace("Received produce response from node {} with correlation id {}",
response.request().request().destination(),
correlationId);
// if we have a response, parse it
//因为一般acks不会设置为0,所以会走这里
if (response.hasResponse()) {
ProduceResponse produceResponse = new ProduceResponse(response.responseBody());
/**
* 遍历每个分区的响应
*/
for (Map.Entry<TopicPartition, ProduceResponse.PartitionResponse> entry : produceResponse.responses().entrySet()) {
TopicPartition tp = entry.getKey();
ProduceResponse.PartitionResponse partResp = entry.getValue();
//如果服务端处理失败了
//会发送回来异常信息
//error这会存储服务端发送回来的异常码
Errors error = Errors.forCode(partResp.errorCode);
//获取到当前分区的响应
RecordBatch batch = batches.get(tp);
//对响应进行处理,进来看一下
completeBatch(batch, error, partResp.baseOffset, partResp.timestamp, correlationId, now);
}
this.sensors.recordLatency(response.request().request().destination(), response.requestLatencyMs());
this.sensors.recordThrottleTime(response.request().request().destination(),
produceResponse.getThrottleTime());
} else {
// this is the acks = 0 case, just complete all requests
//acks =0 ,不需要返回响应
for (RecordBatch batch : batches.values())
completeBatch(batch, Errors.NONE, -1L, Record.NO_TIMESTAMP, correlationId, now);
}
}
}
private void completeBatch(RecordBatch batch, Errors error, long baseOffset, long timestamp, long correlationId, long now) {
//说明响应里面带有异常 并且这个请求是可以重试的
if (error != Errors.NONE && canRetry(batch, error)) {
// retry
log.warn("Got error produce response with correlation id {} on topic-partition {}, retrying ({} attempts left). Error: {}",
correlationId,
batch.topicPartition,
this.retries - batch.attempts - 1,
error);
this.accumulator.reenqueue(batch, now);
this.sensors.recordRetries(batch.topicPartition.topic(), batch.recordCount);
} else {
//其余的情况会走这里,这里可能是原本就不可以重试,还可能是重试次数超了
RuntimeException exception;
//如果响应里面带有没有权限的异常
if (error == Errors.TOPIC_AUTHORIZATION_FAILED)
//自己封装一个异常信息(自定义异常)
exception = new TopicAuthorizationException(batch.topicPartition.topic());
else
exception = error.exception();
// tell the user the result of their request
//TODO 核心代码 把异常信息也带过去了
batch.done(baseOffset, timestamp, exception);
this.accumulator.deallocate(batch);
if (error != Errors.NONE)
this.sensors.recordErrors(batch.topicPartition.topic(), batch.recordCount);
}
if (error.exception() instanceof InvalidMetadataException) {
if (error.exception() instanceof UnknownTopicOrPartitionException)
log.warn("Received unknown topic or partition error in produce request on partition {}. The " +
"topic/partition may not exist or the user may not have Describe access to it", batch.topicPartition);
metadata.requestUpdate();
}
// Unmute the completed partition.
if (guaranteeMessageOrder)
this.accumulator.unmutePartition(batch.topicPartition);
}
再来看这里面的核心代码
public void done(long baseOffset, long timestamp, RuntimeException exception) {
log.trace("Produced messages to topic-partition {} with base offset offset {} and error: {}.",
topicPartition,
baseOffset,
exception);
// execute callbacks
/**
* 遍历一个一个的thunks
* 发送消息的时候,一条消息就代表一个thunks
* 这里就是遍历所有发送出去的消息
*/
for (int i = 0; i < this.thunks.size(); i++) {
try {
Thunk thunk = this.thunks.get(i);
if (exception == null) {
// If the timestamp returned by server is NoTimestamp, that means CreateTime is used. Otherwise LogAppendTime is used.
RecordMetadata metadata = new RecordMetadata(this.topicPartition, baseOffset, thunk.future.relativeOffset(),
timestamp == Record.NO_TIMESTAMP ? thunk.future.timestamp() : timestamp,
thunk.future.checksum(),
thunk.future.serializedKeySize(),
thunk.future.serializedValueSize());
//调用发送消息的回调函数
//在发送数据时候绑定了一个回调函数
//这里调用的就是那个生产者代码中
//自己写的那个回调函数
thunk.callback.onCompletion(metadata, null);//带过去的就是没有异常
//生产者代码在捕获异常的时候就是发现没有异常
} else {
//如果有异常就会把异常传给回调函数
//由用户自己去捕获这个异常
//然后对这个异常进行处理
//根据自己的实际情况进行处理
thunk.callback.onCompletion(null, exception);
}
} catch (Exception e) {
log.error("Error executing user-provided callback on message for topic-partition {}:", topicPartition, e);
}
}
this.produceFuture.done(topicPartition, baseOffset, exception);
}
到此我们就看到了响应是如何被处理的