kafka消费者（八）：Fetcher

最新推荐文章于 2024-07-22 14:48:22 发布

其实系一个须刨

最新推荐文章于 2024-07-22 14:48:22 发布

阅读量1.4k

点赞数

分类专栏： Kafka-0.10

本文链接：https://blog.csdn.net/lianggx3/article/details/98389947

版权

Kafka-0.10 专栏收录该内容

73 篇文章 12 订阅

订阅专栏

上次我们介绍了offset操作的原理，下面介绍消费者如何从服务端获取消息。Fetcher类的主要功能是发送Fetcher请求，获取指定消息集合，处理FetchResponse，更新消息位置。

public class Fetcher<K, V> {
    //client负责网络通信
    private final ConsumerNetworkClient client;
    private final Time time;
    //在服务端收到FetchRequest之后并不是立即响应，而是当可返回的消息数据累计到至少minBytes个字节时才响应。这样每个FetchResponse中就包含了多个消息，提供效率。
    private final int minBytes;
    //等待FetchResponse的最长时间
    private final int maxWaitMs;
    //每次fetch操作的最大字节数
    private final int fetchSize;
    //重试等待时间
    private final long retryBackoffMs;
    //每次获取record的最大数量
    private final int maxPollRecords;
    private final boolean checkCrcs;
    //记录kakfa集群的元数据
    private final Metadata metadata;
    private final FetchManagerMetrics sensors;
    //记录每个TopicPartition的消费情况
    private final SubscriptionState subscriptions;
    //每个FetchResponse首先会转换成completedFetches对象进入此缓存队列，此时没有解析消息
    private final List<CompletedFetch> completedFetches;
    private final Deserializer<K> keyDeserializer;
    private final Deserializer<V> valueDeserializer;
    //PartitionRecrods类型，保存了CompleteFetch解析后的结果集合，它有三个主要字段
    /*  
        private long fetchOffset;                       //records中第一个消息offset
        private TopicPartition partition;               //对应的TopicPartition
        private List<ConsumerRecord<K, V>> records;     //消息集合
    */
    private PartitionRecords<K, V> nextInLineRecords = null;
}

创建fetchRequest请求如下：

private Map<Node, FetchRequest> createFetchRequests() {
    // 获metadata元数据
    Cluster cluster = metadata.fetch();
    Map<Node, Map<TopicPartition, FetchRequest.PartitionData>> fetchable = new HashMap<>();
    //fetchablePartitions就是按照以下条件过滤
    //1. 分配给当前消费者的分区，即SubscriptionState.assign集合中有对应记录的分区
    //2. 分区未被标记为暂停，且对应的TopicPartitionState.position不为空
    //3. nextInLineRecords中没有来自此分区的消息
    //4. completedFetches中没有来自此分区的消息
    for (TopicPartition partition : fetchablePartitions()) {
        //查找分区leader所在副本的node
        Node node = cluster.leaderFor(partition);
        if (node == null) {
            //找不到leader就重新请求metadata
            metadata.requestUpdate();
        } // 如果这个节点还有unsent集合或者InFlightRquest中的对应请求队列不为空，就不对此Node发送FetchRequest请求
        else if (this.client.pendingRequestCount(node) == 0) {
            
            Map<TopicPartition, FetchRequest.PartitionData> fetch = fetchable.get(node);
            if (fetch == null) {
                fetch = new HashMap<>();
                fetchable.put(node, fetch);
            }
            //通过SubcriptionState查找每个分区对应的position，并封装成PartitionData对象
            long position = this.subscriptions.position(partition);
            fetch.put(partition, new FetchRequest.PartitionData(position, this.fetchSize));
            log.trace("Added fetch request for partition {} at offset {}", partition, position);
        }
    }
    // 对上面的fetchable集合进行转换，将发送同一个Node节点的所有TopicPartition的position信息封装成一个FetchRequest对象。
    Map<Node, FetchRequest> requests = new HashMap<>();
    for (Map.Entry<Node, Map<TopicPartition, FetchRequest.PartitionData>> entry : fetchable.entrySet()) {
        Node node = entry.getKey();
        FetchRequest fetch = new FetchRequest(this.maxWaitMs, this.minBytes, entry.getValue());
        requests.put(node, fetch);
    }
    return requests;
}

封装完后就是发送。.

public void sendFetches() {
for (Map.Entry<Node, FetchRequest> fetchEntry: createFetchRequests().entrySet()) {
final FetchRequest request = fetchEntry.getValue();
//把发往每个Node的FetchRequest都缓存到unsent队列上
client.send(fetchEntry.getKey(), ApiKeys.FETCH, request)
//添加Listener，这也是处理FetchResponse的入口
.addListener(new RequestFutureListener<ClientResponse>() {
@Override
public void onSuccess(ClientResponse resp) {
FetchResponse response = new FetchResponse(resp.responseBody());
Set<TopicPartition> partitions = new HashSet<>(response.responseData().keySet());
FetchResponseMetricAggregator metricAggregator = new FetchResponseMetricAggregator(sensors, partitions);

for (Map.Entry<TopicPartition, FetchResponse.PartitionData> entry : response.responseData().entrySet()) {
//遍历响应中的数据
TopicPartition partition = entry.getKey();
long fetchOffset = request.fetchData().get(partition).offset;
//注意，这里是FetchResponse.PartitionData类型
FetchResponse.PartitionData fetchData = entry.getValue();
//创建completedFetch，缓存到completedFetch队列中
completedFetches.add(new CompletedFetch(partition, fetchOffset, fetchData, metricAggregator));
}

sensors.fetchLatency.record(resp.requestLatencyMs());
sensors.fetchThrottleTimeSensor.record(response.getThrottleTime());
}

@Override
public void onFailure(RuntimeException e) {
log.debug("Fetch failed", e);
}
});
}
}

放在completeFetches队列中的消息还是未解析的FetchResponse.PartitionData对象。把fetchedRecords()方法中会把这些消息进行解析，然后把Record集合返回，同时再修改对应TopicPartitionState的position，为下次fetch做好准备。

/**
 * Return the fetched records, empty the record buffer and update the consumed position.
 *
 * NOTE: returning empty records guarantees the consumed position are NOT updated.
 */
public Map<TopicPartition, List<ConsumerRecord<K, V>>> fetchedRecords() {
    if (this.subscriptions.partitionAssignmentNeeded()) {
        //需要进行rebalance则返回空集合
        return Collections.emptyMap();
    } else {
        //按照TopicPartition分类
        Map<TopicPartition, List<ConsumerRecord<K, V>>> drained = new HashMap<>();
        //一次最多取出maxPollRecords条消息
        int recordsRemaining = maxPollRecords;
        //completedFetches集合的迭代器
        Iterator<CompletedFetch> completedFetchesIterator = completedFetches.iterator();

        while (recordsRemaining > 0) {//遍历completedFetches集合
            if (nextInLineRecords == null || nextInLineRecords.isEmpty()) {
                if (!completedFetchesIterator.hasNext())
                    break;

                CompletedFetch completion = completedFetchesIterator.next();
                completedFetchesIterator.remove();
                //解析一个completedFetches得到一个PartitionRecords对象
                nextInLineRecords = parseFetchedData(completion);
            } else {
                //nextInLineRecords中的消息添加到drained中
                recordsRemaining -= append(drained, nextInLineRecords, recordsRemaining);
            }
        }
        //将结果集合返回
        return drained;
    }
}
/**
 * The callback for fetch completion
   解析CompleteFetch
 */
private PartitionRecords<K, V> parseFetchedData(CompletedFetch completedFetch) {
    TopicPartition tp = completedFetch.partition;
    FetchResponse.PartitionData partition = completedFetch.partitionData;
    long fetchOffset = completedFetch.fetchedOffset;
    int bytes = 0;
    int recordsCount = 0;
    PartitionRecords<K, V> parsedRecords = null;

    try {
        if (!subscriptions.isFetchable(tp)) {
            // this can happen when a rebalance happened or a partition consumption paused
            // while fetch is still in-flight
            log.debug("Ignoring fetched records for partition {} since it is no longer fetchable", tp);
        } else if (partition.errorCode == Errors.NONE.code()) {
            // we are interested in this fetch only if the beginning offset matches the
            // current consumed position
            Long position = subscriptions.position(tp);
            if (position == null || position != fetchOffset) {
                //拉取下来的消息要与目标拉取的offset相匹配。
                log.debug("Discarding stale fetch response for partition {} since its offset {} does not match " +
                        "the expected offset {}", tp, fetchOffset, position);
                return null;
            }

            ByteBuffer buffer = partition.recordSet;
            //创建MemoryRecords，其中ByteBuffer来自FecthResponse
            MemoryRecords records = MemoryRecords.readableRecords(buffer);
            List<ConsumerRecord<K, V>> parsed = new ArrayList<>();
            boolean skippedRecords = false;
            //遍历MemoryRecords集合获取Record集合
            for (LogEntry logEntry : records) {
                // Skip the messages earlier than current position.
                // 跳过早于position的消息
                if (logEntry.offset() >= position) {
                    parsed.add(parseRecord(tp, logEntry));
                    bytes += logEntry.size();
                } else {
                    skippedRecords = true;
                }
            }

            recordsCount = parsed.size();
            this.sensors.recordTopicFetchMetrics(tp.topic(), bytes, recordsCount);

            if (!parsed.isEmpty()) {
                log.trace("Adding fetched record for partition {} with offset {} to buffered record list", tp, position);
                //把解析后端Record集合封装成PartitionRecords
                parsedRecords = new PartitionRecords<>(fetchOffset, tp, parsed);
                ConsumerRecord<K, V> record = parsed.get(parsed.size() - 1);
                this.sensors.recordsFetchLag.record(partition.highWatermark - record.offset());
            } else if (buffer.limit() > 0 && !skippedRecords) {
                // we did not read a single message from a non-empty buffer
                // because that message's size is larger than fetch size, in this case
                // record this exception
                Map<TopicPartition, Long> recordTooLargePartitions = Collections.singletonMap(tp, fetchOffset);
                throw new RecordTooLargeException("There are some messages at [Partition=Offset]: "
                        + recordTooLargePartitions
                        + " whose size is larger than the fetch size "
                        + this.fetchSize
                        + " and hence cannot be ever returned."
                        + " Increase the fetch size on the client (using max.partition.fetch.bytes),"
                        + " or decrease the maximum message size the broker will allow (using message.max.bytes).",
                        recordTooLargePartitions);
            }
        } else if (partition.errorCode == Errors.NOT_LEADER_FOR_PARTITION.code()
                || partition.errorCode == Errors.UNKNOWN_TOPIC_OR_PARTITION.code()) {
            this.metadata.requestUpdate();
        } else if (partition.errorCode == Errors.OFFSET_OUT_OF_RANGE.code()) {
            if (fetchOffset != subscriptions.position(tp)) {
                log.debug("Discarding stale fetch response for partition {} since the fetched offset {}" +
                        "does not match the current offset {}", tp, fetchOffset, subscriptions.position(tp));
            } else if (subscriptions.hasDefaultOffsetResetPolicy()) {
                log.info("Fetch offset {} is out of range for partition {}, resetting offset", fetchOffset, tp);
                subscriptions.needOffsetReset(tp);
            } else {
                throw new OffsetOutOfRangeException(Collections.singletonMap(tp, fetchOffset));
            }
        } else if (partition.errorCode == Errors.TOPIC_AUTHORIZATION_FAILED.code()) {
            log.warn("Not authorized to read from topic {}.", tp.topic());
            throw new TopicAuthorizationException(Collections.singleton(tp.topic()));
        } else if (partition.errorCode == Errors.UNKNOWN.code()) {
            log.warn("Unknown error fetching data for topic-partition {}", tp);
        } else {
            throw new IllegalStateException("Unexpected error code " + partition.errorCode + " while fetching data");
        }
    } finally {
        completedFetch.metricAggregator.record(tp, bytes, recordsCount);
    }
    return parsedRecords;
}

在fetchedRecords()中，把消息添加到drained集合中时，还更新TopicPartitionState的position字段，下面是Fetcher.append()方法：

private int append(Map<TopicPartition, List<ConsumerRecord<K, V>>> drained,
                   PartitionRecords<K, V> partitionRecords,
                   int maxRecords) {
    if (partitionRecords.isEmpty())
        return 0;

    if (!subscriptions.isAssigned(partitionRecords.partition)) {
        // this can happen when a rebalance happened before fetched records are returned to the consumer's poll call
        // 可能有rebalance
        log.debug("Not returning fetched records for partition {} since it is no longer assigned", partitionRecords.partition);
    } else {
        // note that the consumed position should always be available as long as the partition is still assigned
        long position = subscriptions.position(partitionRecords.partition);
        if (!subscriptions.isFetchable(partitionRecords.partition)) {
            // this can happen when a partition is paused before fetched records are returned to the consumer's poll call
            log.debug("Not returning fetched records for assigned partition {} since it is no longer fetchable", partitionRecords.partition);
        } else if (partitionRecords.fetchOffset == position) {
            // we are ensured to have at least one record since we already checked for emptiness
            //获得消息集合，最多maxRecords个消息
            List<ConsumerRecord<K, V>> partRecords = partitionRecords.take(maxRecords);
            //最后一个消息的offset
            long nextOffset = partRecords.get(partRecords.size() - 1).offset() + 1;

            log.trace("Returning fetched records at offset {} for assigned partition {} and update " +
                    "position to {}", position, partitionRecords.partition, nextOffset);

            List<ConsumerRecord<K, V>> records = drained.get(partitionRecords.partition);
            if (records == null) {
                records = partRecords;
                drained.put(partitionRecords.partition, records);
            } else {
                records.addAll(partRecords);
            }
            //更新SubcriptionState中对应TopicPartitionState的position字段
            subscriptions.position(partitionRecords.partition, nextOffset);
            return partRecords.size();
        } else {
            // these records aren't next in line based on the last consumed position, ignore them
            // they must be from an obsolete request
            log.debug("Ignoring fetched records for {} at offset {} since the current position is {}",
                    partitionRecords.partition, partitionRecords.fetchOffset, position);
        }
    }
    partitionRecords.discard();
    return 0;
}

更新position
在updateFetchPositions中实现了TopicPartitionState.position字段的功能

/**
 * Update the fetch positions for the provided partitions.
 * @param partitions the partitions to update positions for
 * @throws NoOffsetForPartitionException If no offset is stored for a given partition and no reset policy is available
 */
public void updateFetchPositions(Set<TopicPartition> partitions) {
    // reset the fetch position to the committed position
    for (TopicPartition tp : partitions) {
        //检测position是否为空，如果非空则表示正常，不需要重置操作
        if (!subscriptions.isAssigned(tp) || subscriptions.isFetchable(tp))
            continue;

        // TODO: If there are several offsets to reset, we could submit offset requests in parallel
        if (subscriptions.isOffsetResetNeeded(tp)) {
            //按照指定策略更新position的值
            resetOffset(tp);
        } else if (subscriptions.committed(tp) == null) {
            // there's no committed position, so we need to reset with the default strategy
            //commit字段为空，则使用默认策略更新
            subscriptions.needOffsetReset(tp);
            resetOffset(tp);
        } else {
            //commited字段不是空的，就把position字段设置为commited的值
            long committed = subscriptions.committed(tp).offset();
            log.debug("Resetting offset for partition {} to the committed offset {}", tp, committed);
            subscriptions.seek(tp, committed);
        }
    }
}

private void resetOffset(TopicPartition partition) {
    //根据配置的重置策略选择timestamp
    OffsetResetStrategy strategy = subscriptions.resetStrategy(partition);
    final long timestamp;
    if (strategy == OffsetResetStrategy.EARLIEST)
        timestamp = ListOffsetRequest.EARLIEST_TIMESTAMP; //-2
    else if (strategy == OffsetResetStrategy.LATEST)
        timestamp = ListOffsetRequest.LATEST_TIMESTAMP;//-1
    else
        throw new NoOffsetForPartitionException(partition);

    log.debug("Resetting offset for partition {} to {} offset.", partition, strategy.name().toLowerCase(Locale.ROOT));
    //向分区的leader副本所在的Node发送一个OffsesRequest请求，阻塞等待响应
    //listOffset方法实现了对offsetRequest的发送和OffsetResponse的处理。
    long offset = listOffset(partition, timestamp);

    // we might lose the assignment while fetching the offset, so check it is still active
    //更新position
    if (subscriptions.isAssigned(partition))
        this.subscriptions.seek(partition, offset);
}