RecordAccumulator分析

它主要作用就是相当于一个队列,相当于一个缓冲区,用于储蓄record到MemoryRecords,然后被发送到服务器

一 核心字段

int batchSize: 批量大小

CompressionType compression: 压缩类型

long lingerMs: 延迟时间

long retryBackoffMs: 重试时间

BufferPool free: ByteBuffer缓冲池,只要大小满足条件,就可以复用缓冲池里的ByteBuffer

ConcurrentMap<TopicPartition,Deque<RecordBatch>> batches: TopicPartition和RecordBatch队列的映射关系

IncompleteRecordBatches incomplete: 处于完成状态的批量记录

int drainIndex: 使用drain方法导出RecordBatch的时候,为了防止饥饿,drainIndex就记录上次发送停止的位置,下次继续从此位置开始

 

二 重要方法

2.1 append 添加record到accumulator,并返回这个添加结果,添加结果主要包含元数据

public RecordAppendResult append(TopicPartition tp, long timestamp, byte[] key, byte[] value,
    Callback callback, long maxTimeToBlock) throws InterruptedException{
    appendsInProgress.incrementAndGet();
    try {
        // 获取一个双向队列,没有则创建
       
Deque<RecordBatch> dq = getOrCreateDeque(tp);
        // 由于队列并不是线程安全的所以需要同步
       
synchronized (dq) {
            if (closed)
                throw new IllegalStateException("Cannotsend after the producer is closed.");
            // 试图向双向队列添加数据
           
RecordAppendResultappendResult = tryAppend(timestamp, key, value, callback, dq);
            // 如果返回结果不为空则返回
           
if (appendResult != null)
                return appendResult;
        }

        int size = Math.max(this.batchSize, Records.LOG_OVERHEAD + Record.recordSize(key, value));
        log.trace("Allocating a new {} bytemessage buffer for topic {} partition {}", size, tp.topic(), tp.partition());
        // 追加失败,则从BufferPool申请新的空间
       
ByteBuffer
buffer = free.allocate(size, maxTimeToBlock);
        // 再次尝试添加
       
synchronized (dq) {
            // 需要检测生产者是否已经关闭
           
if (closed)
                throw new IllegalStateException("Cannotsend after the producer is closed.");
            // 试图添加recordMemoryRecord,如果添加失败,内存可能满了,重置position等和关闭buffer防止继续写入
           
RecordAppendResultappendResult = tryAppend(timestamp, key, value, callback, dq);
            // 如果添加成功
           
if (appendResult != null) {
                // 释放申请的空间,并且返回结果
                
free.deallocate(buffer);
                return appendResult;
            }
            // 添加仍然不成功,可能是MemoryRecord已经满了,内存不够了,就会创建一个新的MemoryRecord
           
MemoryRecords records = MemoryRecords.emptyRecords(buffer, compression, this.batchSize);
            // 创建RecordBatch,并且追加records
           
RecordBatch batch = new RecordBatch(tp, records, time.milliseconds());
            // 再次试图添加
           
FutureRecordMetadatafuture = Utils.notNull(batch.tryAppend(timestamp, key, value, callback, time.milliseconds()));
            // 把刚才创建的RecordBatch添加到双向队列末尾
           
dq.addLast(batch);
            // 把这个新创建的ReocrdBatch追加到incomplete集合
           
incomplete.add(batch);
            // 返回RecordAppendResult
           
return new RecordAppendResult(future, dq.size() > 1 || batch.records.isFull(), true);
        }
    } finally {
        appendsInProgress.decrementAndGet();
    }
}

 

2.2 tryAppend 尝试向MemoryRecords中添加

private RecordAppendResult tryAppend(long timestamp, byte[] key, byte[] value, Callback callback, Deque<RecordBatch> deque) {
    // 取出双向队列最后一个元素,但是不会删除它
    RecordBatch last = deque.peekLast();
    // 试图将取出来的最后一个元素添加
    if (last != null) {
        // 调用RecordBatch.truAppend方法,添加到MemoryRecordsbuffer
        FutureRecordMetadata future = last.tryAppend(timestamp, key, value, callback, time.milliseconds());
        // 表示MemoryRecord已经放不下了,然后flipposition置为0,当前buffer不可写,返回
        if (future == null)
            last.records.close();
        else // 直接返回添加结果
            return new RecordAppendResult(future, deque.size() > 1 || last.records.isFull(), false);
    }
    return null;
}

 

2.3 abortBatches

private void abortBatches() {
    // 遍历已经处于完成状态的RecordBatch
    for (RecordBatch batch : incomplete.all()) {
        // 根据TopicPartition获取对应的Deque<RecordBatch>队列
        Deque<RecordBatch> dq = getDeque(batch.topicPartition);
        // 在中断之前关闭队列
        synchronized (dq) {
            // 关闭MemoryRecord不再添加
            batch.records.close();
            // 从队列移除这个RecordBatch
            dq.remove(batch);
        }
        // 执行RecordBatch中所有消息的回调,并且标记这个生产者请求已经完成
        batch.done(-1L, Record.NO_TIMESTAMP, new IllegalStateException("Producer is closed forcefully."));
        // incomplete集合中移除当前batch,并且释放分配的ByteBuffer
        deallocate(batch);
    }
}

 

2.4 ready 主要就是判断分区里RecordBatch是否满足发送条件,一旦条件满足则添加到一个可以向哪些节点发送消息的集合

public ReadyCheckResult ready(Cluster cluster, long nowMs) {
    // 用来保存向哪些Node节点发送信息
    Set<Node> readyNodes = new HashSet<>();
    // 下一次需要调用ready方法的时间间隔
    long nextReadyCheckDelayMs = Long.MAX_VALUE;
    // 根据Metadata元数据中找不到leader副本的topic的集合
    Set<String> unknownLeaderTopics = new HashSet<>();
    // 是否有线程在阻塞等待BufferPool释放空间
    boolean exhausted = this.free.queued() > 0;
    // 遍历每一个分区和RecordBatch队列映射集合
    for (Map.Entry<TopicPartition, Deque<RecordBatch>> entry : this.batches.entrySet()) {
        // 获取TopicPartition
        TopicPartition part = entry.getKey();
        // 获取RecordBatch队列
        Deque<RecordBatch> deque = entry.getValue();
        // 根据分区找到leader副本位于哪一个节点
        Node leader = cluster.leaderFor(part);
        synchronized (deque) {
            // leader副本为空但是队列不为空
            if (leader == null && !deque.isEmpty()) {
                // 添加该分区的topic到未知leadertopic集合
                unknownLeaderTopics.add(part.topic());
            }
            // 如果leader节点没有准备好且muted不包含这个分区
            else if (!readyNodes.contains(leader) && !muted.contains(part)) {
                // 获取队首的RecordBatch
                RecordBatch batch = deque.peekFirst();
                if (batch != null) {
                    boolean backingOff = batch.attempts > 0 && batch.lastAttemptMs + retryBackoffMs > nowMs;
                    long waitedTimeMs = nowMs - batch.lastAttemptMs;
                    long timeToWaitMs = backingOff ? retryBackoffMs : lingerMs;
                    long timeLeftMs = Math.max(timeToWaitMs - waitedTimeMs, 0);
                    boolean full = deque.size() > 1 || batch.records.isFull();
                    boolean expired = waitedTimeMs >= timeToWaitMs;
                    // 查看是否满足发送条件,满足其一即可
                    boolean sendable = full || expired || exhausted || closed || flushInProgress();
                    if (sendable && !backingOff) {
                        // 如果可以发送且没有重新尝试发送,添加到可以发送节点
                        readyNodes.add(leader);
                    } else {
                        // 更新下一次需要调用ready方法的时间间隔
                        nextReadyCheckDelayMs = Math.min(timeLeftMs, nextReadyCheckDelayMs);
                    }
                }
            }
        }
    }
    // 返回ReadyCheckResult
    return new ReadyCheckResult(readyNodes, nextReadyCheckDelayMs, unknownLeaderTopics);
}

 

 

2.5 drain 会根据ready方法获取readyNodes集和,然后该方法由sender线程调用,将TopicPartition -> RecordBatch转换成NodeId->

RecordBatch集合的映射

public Map<Integer, List<RecordBatch>> drain(Cluster cluster, Set<Node> nodes, int maxSize, long now) {
    // 判断节点集合是否为空
    if (nodes.isEmpty())
        return Collections.emptyMap();
    // 创建一个存储节点idRecordBatch列表的映射
    Map<Integer, List<RecordBatch>> batches = new HashMap<>();
    // 遍历集合
    for (Node node : nodes) {
        int size = 0;
        // 获取该node的所有分区信息
        List<PartitionInfo> parts = cluster.partitionsForNode(node.id());
        // 用于保存要发送的RecordBatch的列表
        List<RecordBatch> ready = new ArrayList<>();
        // drainIndex记录上次发送停止的位置,下一次继续从此位置开始发送,若一直从索引0的队列开始发送,可能会出现一直只发送前几个
        // 分区的消息的情况,造成其他分区饥饿
        // 计算开始位置
        int start = drainIndex = drainIndex % parts.size();
        do {
            PartitionInfo part = parts.get(drainIndex);
            TopicPartition tp = new TopicPartition(part.topic(), part.partition());
            // Only proceed if the partition has no in-flight batches.
            if (!muted.contains(tp)) {
                // 更具TopicPartition获取队列
                Deque<RecordBatch> deque = getDeque(new TopicPartition(part.topic(), part.partition()));
                if (deque != null) {
                    synchronized (deque) {
                        // 取出队列第一个元素
                        RecordBatch first = deque.peekFirst();
                        // 第一个元素不为空
                        if (first != null) {
                            // 判断是否是重新发送
                            boolean backoff = first.attempts > 0 && first.lastAttemptMs + retryBackoffMs > now;
                            if (!backoff) {
                                if (size + first.records.sizeInBytes() > maxSize && !ready.isEmpty()) {
                                    // 数据量已满结束循环
                                    break;
                                } else {
                                    // 从队列中获取一个RecordBatch,并将这个RecordBatch放到ready集合
                                    // 每一个TopicPartition只取一个RecordBatch
                                    RecordBatch batch = deque.pollFirst();
                                    // 关闭Compressor,并将MemoryRecord放掉ready集合中
                                    batch.records.close();
                                    size += batch.records.sizeInBytes();
                                    ready.add(batch);
                                    batch.drainedMs = now;
                                }
                            }
                        }
                    }
                }
            }
            // 更新drainIndex
            this.drainIndex = (this.drainIndex + 1) % parts.size();
        } while (start != drainIndex);
        // 记录node id RecordBatch的对应关系
        batches.put(node.id(), ready);
    }
    return batches;
}

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

莫言静好、

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值