它主要作用就是相当于一个队列,相当于一个缓冲区,用于储蓄record到MemoryRecords,然后被发送到服务器
一 核心字段
int batchSize: 批量大小
CompressionType compression: 压缩类型
long lingerMs: 延迟时间
long retryBackoffMs: 重试时间
BufferPool free: ByteBuffer缓冲池,只要大小满足条件,就可以复用缓冲池里的ByteBuffer
ConcurrentMap<TopicPartition,Deque<RecordBatch>> batches: TopicPartition和RecordBatch队列的映射关系
IncompleteRecordBatches incomplete: 处于完成状态的批量记录
int drainIndex: 使用drain方法导出RecordBatch的时候,为了防止饥饿,drainIndex就记录上次发送停止的位置,下次继续从此位置开始
二 重要方法
2.1 append 添加record到accumulator,并返回这个添加结果,添加结果主要包含元数据
public RecordAppendResult append(TopicPartition tp, long timestamp, byte[] key, byte[] value,
Callback callback, long maxTimeToBlock) throws InterruptedException{
appendsInProgress.incrementAndGet();
try {
// 获取一个双向队列,没有则创建
Deque<RecordBatch> dq = getOrCreateDeque(tp);
// 由于队列并不是线程安全的所以需要同步
synchronized (dq) {
if (closed)
throw new IllegalStateException("Cannotsend after the producer is closed.");
// 试图向双向队列添加数据
RecordAppendResultappendResult = tryAppend(timestamp, key, value, callback, dq);
// 如果返回结果不为空则返回
if (appendResult != null)
return appendResult;
}
int size = Math.max(this.batchSize, Records.LOG_OVERHEAD + Record.recordSize(key, value));
log.trace("Allocating a new {} bytemessage buffer for topic {} partition {}", size, tp.topic(), tp.partition());
// 追加失败,则从BufferPool申请新的空间
ByteBuffer buffer = free.allocate(size, maxTimeToBlock);
// 再次尝试添加
synchronized (dq) {
// 需要检测生产者是否已经关闭
if (closed)
throw new IllegalStateException("Cannotsend after the producer is closed.");
// 试图添加record到MemoryRecord,如果添加失败,内存可能满了,重置position等和关闭buffer防止继续写入
RecordAppendResultappendResult = tryAppend(timestamp, key, value, callback, dq);
// 如果添加成功
if (appendResult != null) {
// 释放申请的空间,并且返回结果
free.deallocate(buffer);
return appendResult;
}
// 添加仍然不成功,可能是MemoryRecord已经满了,内存不够了,就会创建一个新的MemoryRecord
MemoryRecords records = MemoryRecords.emptyRecords(buffer, compression, this.batchSize);
// 创建RecordBatch,并且追加records
RecordBatch batch = new RecordBatch(tp, records, time.milliseconds());
// 再次试图添加
FutureRecordMetadatafuture = Utils.notNull(batch.tryAppend(timestamp, key, value, callback, time.milliseconds()));
// 把刚才创建的RecordBatch添加到双向队列末尾
dq.addLast(batch);
// 把这个新创建的ReocrdBatch追加到incomplete集合
incomplete.add(batch);
// 返回RecordAppendResult
return new RecordAppendResult(future, dq.size() > 1 || batch.records.isFull(), true);
}
} finally {
appendsInProgress.decrementAndGet();
}
}
2.2 tryAppend 尝试向MemoryRecords中添加
private RecordAppendResult tryAppend(long timestamp, byte[] key, byte[] value, Callback callback, Deque<RecordBatch> deque) {
// 取出双向队列最后一个元素,但是不会删除它
RecordBatch last = deque.peekLast();
// 试图将取出来的最后一个元素添加
if (last != null) {
// 调用RecordBatch.truAppend方法,添加到MemoryRecords的buffer里
FutureRecordMetadata future = last.tryAppend(timestamp, key, value, callback, time.milliseconds());
// 表示MemoryRecord已经放不下了,然后flip将position置为0,当前buffer不可写,返回
if (future == null)
last.records.close();
else // 直接返回添加结果
return new RecordAppendResult(future, deque.size() > 1 || last.records.isFull(), false);
}
return null;
}
2.3 abortBatches
private void abortBatches() {
// 遍历已经处于完成状态的RecordBatch
for (RecordBatch batch : incomplete.all()) {
// 根据TopicPartition获取对应的Deque<RecordBatch>队列
Deque<RecordBatch> dq = getDeque(batch.topicPartition);
// 在中断之前关闭队列
synchronized (dq) {
// 关闭MemoryRecord不再添加
batch.records.close();
// 从队列移除这个RecordBatch
dq.remove(batch);
}
// 执行RecordBatch中所有消息的回调,并且标记这个生产者请求已经完成
batch.done(-1L, Record.NO_TIMESTAMP, new IllegalStateException("Producer is closed forcefully."));
// 从incomplete集合中移除当前batch,并且释放分配的ByteBuffer
deallocate(batch);
}
}
2.4 ready 主要就是判断分区里RecordBatch是否满足发送条件,一旦条件满足则添加到一个可以向哪些节点发送消息的集合
public ReadyCheckResult ready(Cluster cluster, long nowMs) {
// 用来保存向哪些Node节点发送信息
Set<Node> readyNodes = new HashSet<>();
// 下一次需要调用ready方法的时间间隔
long nextReadyCheckDelayMs = Long.MAX_VALUE;
// 根据Metadata元数据中找不到leader副本的topic的集合
Set<String> unknownLeaderTopics = new HashSet<>();
// 是否有线程在阻塞等待BufferPool释放空间
boolean exhausted = this.free.queued() > 0;
// 遍历每一个分区和RecordBatch队列映射集合
for (Map.Entry<TopicPartition, Deque<RecordBatch>> entry : this.batches.entrySet()) {
// 获取TopicPartition
TopicPartition part = entry.getKey();
// 获取RecordBatch队列
Deque<RecordBatch> deque = entry.getValue();
// 根据分区找到leader副本位于哪一个节点
Node leader = cluster.leaderFor(part);
synchronized (deque) {
// leader副本为空但是队列不为空
if (leader == null && !deque.isEmpty()) {
// 添加该分区的topic到未知leader的topic集合
unknownLeaderTopics.add(part.topic());
}
// 如果leader节点没有准备好且muted不包含这个分区
else if (!readyNodes.contains(leader) && !muted.contains(part)) {
// 获取队首的RecordBatch
RecordBatch batch = deque.peekFirst();
if (batch != null) {
boolean backingOff = batch.attempts > 0 && batch.lastAttemptMs + retryBackoffMs > nowMs;
long waitedTimeMs = nowMs - batch.lastAttemptMs;
long timeToWaitMs = backingOff ? retryBackoffMs : lingerMs;
long timeLeftMs = Math.max(timeToWaitMs - waitedTimeMs, 0);
boolean full = deque.size() > 1 || batch.records.isFull();
boolean expired = waitedTimeMs >= timeToWaitMs;
// 查看是否满足发送条件,满足其一即可
boolean sendable = full || expired || exhausted || closed || flushInProgress();
if (sendable && !backingOff) {
// 如果可以发送且没有重新尝试发送,添加到可以发送节点
readyNodes.add(leader);
} else {
// 更新下一次需要调用ready方法的时间间隔
nextReadyCheckDelayMs = Math.min(timeLeftMs, nextReadyCheckDelayMs);
}
}
}
}
}
// 返回ReadyCheckResult
return new ReadyCheckResult(readyNodes, nextReadyCheckDelayMs, unknownLeaderTopics);
}
2.5 drain 会根据ready方法获取readyNodes集和,然后该方法由sender线程调用,将TopicPartition -> RecordBatch转换成NodeId->
RecordBatch集合的映射
public Map<Integer, List<RecordBatch>> drain(Cluster cluster, Set<Node> nodes, int maxSize, long now) {
// 判断节点集合是否为空
if (nodes.isEmpty())
return Collections.emptyMap();
// 创建一个存储节点id和RecordBatch列表的映射
Map<Integer, List<RecordBatch>> batches = new HashMap<>();
// 遍历集合
for (Node node : nodes) {
int size = 0;
// 获取该node的所有分区信息
List<PartitionInfo> parts = cluster.partitionsForNode(node.id());
// 用于保存要发送的RecordBatch的列表
List<RecordBatch> ready = new ArrayList<>();
// drainIndex记录上次发送停止的位置,下一次继续从此位置开始发送,若一直从索引0的队列开始发送,可能会出现一直只发送前几个
// 分区的消息的情况,造成其他分区饥饿
// 计算开始位置
int start = drainIndex = drainIndex % parts.size();
do {
PartitionInfo part = parts.get(drainIndex);
TopicPartition tp = new TopicPartition(part.topic(), part.partition());
// Only proceed if the partition has no in-flight batches.
if (!muted.contains(tp)) {
// 更具TopicPartition获取队列
Deque<RecordBatch> deque = getDeque(new TopicPartition(part.topic(), part.partition()));
if (deque != null) {
synchronized (deque) {
// 取出队列第一个元素
RecordBatch first = deque.peekFirst();
// 第一个元素不为空
if (first != null) {
// 判断是否是重新发送
boolean backoff = first.attempts > 0 && first.lastAttemptMs + retryBackoffMs > now;
if (!backoff) {
if (size + first.records.sizeInBytes() > maxSize && !ready.isEmpty()) {
// 数据量已满结束循环
break;
} else {
// 从队列中获取一个RecordBatch,并将这个RecordBatch放到ready集合
// 每一个TopicPartition只取一个RecordBatch
RecordBatch batch = deque.pollFirst();
// 关闭Compressor,并将MemoryRecord放掉ready集合中
batch.records.close();
size += batch.records.sizeInBytes();
ready.add(batch);
batch.drainedMs = now;
}
}
}
}
}
}
// 更新drainIndex
this.drainIndex = (this.drainIndex + 1) % parts.size();
} while (start != drainIndex);
// 记录node id 和RecordBatch的对应关系
batches.put(node.id(), ready);
}
return batches;
}