概述
kafka 0.10.1版本之后,consumer端支持根据时间戳获取offset,再重放到该offset处开始消费。
consumer端根据指定时间戳找到offset开始消费的代码示例如下:
consumer.subscribe(topicA);
consumer.poll(100);//正常订阅topic和poll消息
Set<TopicPartition> assignments = consumer.assignment();//获取consumer所分配的分区信息
Map<TopicPartition, Long> query = new HashMap<>();//构造offsetsForTimes参数,通过时间戳找到offset
for (TopicPartition topicPartition : assignments) {
System.out.println(topicPartition);
query.put(topicPartition, 1550804131000L);
}
Map<TopicPartition, OffsetAndTimestamp> result = consumer.offsetsForTimes(query);
for (Map.Entry<TopicPartition, OffsetAndTimestamp> entry : result.entrySet()) {
System.out.println(entry);
consumer.seek(entry.getKey(), entry.getValue().offset());//每个topic的partition都seek到执行的offset
}
下面通过源码分析offsetsForTimes的过程。
consumer端
Consumer#offsetsForTimes方法
/**
* Look up the offsets for the given partitions by timestamp. The returned offset for each partition is the
* earliest offset whose timestamp is greater than or equal to the given timestamp in the corresponding partition.
*
* This is a blocking call. The consumer does not have to be assigned the partitions.
* If the message format version in a partition is before 0.10.0, i.e. the messages do not have timestamps, null
* will be returned for that partition.
*
* @param timestampsToSearch the mapping from partition to the timestamp to look up.
* @param timeout The maximum amount of time to await retrieval of the offsets
*
* @return a mapping from partition to the timestamp and offset of the first message with timestamp greater
* than or equal to the target timestamp. {@code null} will be returned for the partition if there is no
* such message.
* @throws org.apache.kafka.common.errors.AuthenticationException if authentication fails. See the exception for more details
* @throws org.apache.kafka.common.errors.AuthorizationException if not authorized to the topic(s). See the exception for more details
* @throws IllegalArgumentException if the target timestamp is negative
* @throws org.apache.kafka.common.errors.TimeoutException if the offset metadata could not be fetched before
* expiration of the passed timeout
* @throws org.apache.kafka.common.errors.UnsupportedVersionException if the broker does not support looking up
* the offsets by timestamp
*/
@Override
public Map<TopicPartition, OffsetAndTimestamp> offsetsForTimes(Map<TopicPartition, Long> timestampsToSearch, Duration timeout) {
acquireAndEnsureOpen();
try {
for (Map.Entry<TopicPartition, Long> entry : timestampsToSearch.entrySet()) {
// we explicitly exclude the earliest and latest offset here so the timestamp in the returned
// OffsetAndTimestamp is always positive.
if (entry.getValue() < 0)
throw new IllegalArgumentException("The target time for partition " + entry.getKey() + " is " +
entry.getValue() + ". The target time cannot be negative.");
}
return fetcher.offsetsByTimes(timestampsToSearch, time.timer(timeout));
} finally {
release();
}
}
Fetcher#offsetByTimes方法
public Map<TopicPartition, OffsetAndTimestamp> offsetsByTimes(Map<TopicPartition, Long> timestampsToSearch,
Timer timer) {
Map<TopicPartition, OffsetData> fetchedOffsets = fetchOffsetsByTimes(timestampsToSearch,
timer, true).fetchedOffsets;
HashMap<TopicPartition, OffsetAndTimestamp> offsetsByTimes = new HashMap<>(timestampsToSearch.size());
for (Map.Entry<TopicPartition, Long> entry : timestampsToSearch.entrySet())
offsetsByTimes.put(entry.getKey(), null);
for (Map.Entry<TopicPartition, OffsetData> entry : fetchedOffsets.entrySet()) {
// 'entry.getValue().timestamp' will not be null since we are guaranteed
// to work with a v1 (or later) ListOffset request
OffsetData offsetData = entry.getValue();
offsetsByTimes.put(entry.getKey(), new OffsetAndTimestamp(offsetData.offset, offsetData.timestamp,
offsetData.leaderEpoch));
}
return offsetsByTimes;
}
Fetcher#fetchOffsetsByTimes方法
private ListOffsetResult fetchOffsetsByTimes(Map<TopicPartition, Long> timestampsToSearch,
Timer timer,
boolean requireTimestamps) {
ListOffsetResult result = new ListOffsetResult();
if (timestampsToSearch.isEmpty())
return result;
Map<TopicPartition, Long> remainingToSearch = new HashMap<>(timestampsToSearch);
do {
RequestFuture<ListOffsetResult> future = sendListOffsetsRequests(remainingToSearch, requireTimestamps);
client.poll(future, timer);
if (!future.isDone())
break;
if (future.succeeded()) {
ListOffsetResult value = future.value();
result.fetchedOffsets.putAll(value.fetchedOffsets);
if (value.partitionsToRetry.isEmpty())
return result;
remainingToSearch.keySet().retainAll(value.partitionsToRetry);
} else if (!future.isRetriable()) {
throw future.exception();
}
if (metadata.updateRequested())
client.awaitMetadataUpdate(timer);
else
timer.sleep(retryBackoffMs);
} while (timer.notExpired());
throw new TimeoutException("Failed to get offsets by times in " + timer.elapsedMs() + "ms");
}
服务端
KafkaApis#handleListOffsetRequest方法
def handleListOffsetRequest(request: RequestChannel.Request) {
val version = request.header.apiVersion()
val mergedResponseMap = if (version == 0)
handleListOffsetRequestV0(request)
else
handleListOffsetRequestV1AndAbove(request)
sendResponseMaybeThrottle(request, requestThrottleMs => new ListOffsetResponse(requestThrottleMs, mergedResponseMap.asJava))
}
KafkaApis#handleListOffsetRequestV1AndAbove方法
private def handleListOffsetRequestV1AndAbove(request : RequestChannel.Request): Map[TopicPartition, ListOffsetResponse.PartitionData] = {
val correlationId = request.header.correlationId
val clientId = request.header.clientId
val offsetRequest = request.body[ListOffsetRequest]
val (authorizedRequestInfo, unauthorizedRequestInfo) = offsetRequest.partitionTimestamps.asScala.partition {
case (topicPartition, _) => authorize(request.session, Describe, Resource(Topic, topicPartition.topic, LITERAL))
}
val unauthorizedResponseStatus = unauthorizedRequestInfo.mapValues(_ => {
new ListOffsetResponse.PartitionData(Errors.TOPIC_AUTHORIZATION_FAILED,
ListOffsetResponse.UNKNOWN_TIMESTAMP,
ListOffsetResponse.UNKNOWN_OFFSET,
Optional.empty())
})
val responseMap = authorizedRequestInfo.map { case (topicPartition, partitionData) =>
if (offsetRequest.duplicatePartitions.contains(topicPartition)) {
debug(s"OffsetRequest with correlation id $correlationId from client $clientId on partition $topicPartition " +
s"failed because the partition is duplicated in the request.")
(topicPartition, new ListOffsetResponse.PartitionData(Errors.INVALID_REQUEST,
ListOffsetResponse.UNKNOWN_TIMESTAMP,
ListOffsetResponse.UNKNOWN_OFFSET,
Optional.empty()))
} else {
try {
val fetchOnlyFromLeader = offsetRequest.replicaId != ListOffsetRequest.DEBUGGING_REPLICA_ID
val isolationLevelOpt = if (offsetRequest.replicaId == ListOffsetRequest.CONSUMER_REPLICA_ID)
Some(offsetRequest.isolationLevel)
else
None
//调用ReplicaManager.fetchOffsetForTimestamp方法
val found = replicaManager.fetchOffsetForTimestamp(topicPartition,
partitionData.timestamp,
isolationLevelOpt,
partitionData.currentLeaderEpoch,
fetchOnlyFromLeader)
(topicPartition, new ListOffsetResponse.PartitionData(Errors.NONE, found.timestamp, found.offset,
Optional.empty()))
} catch {
// NOTE: These exceptions are special cased since these error messages are typically transient or the client
// would have received a clear exception and there is no value in logging the entire stack trace for the same
case e @ (_ : UnknownTopicOrPartitionException |
_ : NotLeaderForPartitionException |
_ : UnknownLeaderEpochException |
_ : FencedLeaderEpochException |
_ : KafkaStorageException |
_ : UnsupportedForMessageFormatException) =>
debug(s"Offset request with correlation id $correlationId from client $clientId on " +
s"partition $topicPartition failed due to ${e.getMessage}")
(topicPartition, new ListOffsetResponse.PartitionData(Errors.forException(e),
ListOffsetResponse.UNKNOWN_TIMESTAMP,
ListOffsetResponse.UNKNOWN_OFFSET,
Optional.empty()))
case e: Throwable =>
error("Error while responding to offset request", e)
(topicPartition, new ListOffsetResponse.PartitionData(Errors.forException(e),
ListOffsetResponse.UNKNOWN_TIMESTAMP,
ListOffsetResponse.UNKNOWN_OFFSET,
Optional.empty()))
}
}
}
responseMap ++ unauthorizedResponseStatus
}
ReplicaManager#fetchOffsetForTimestamp方法
def fetchOffsetForTimestamp(topicPartition: TopicPartition,
timestamp: Long,
isolationLevel: Option[IsolationLevel],
currentLeaderEpoch: Optional[Integer],
fetchOnlyFromLeader: Boolean): TimestampOffset = {
val partition = getPartitionOrException(topicPartition, expectLeader = fetchOnlyFromLeader)
partition.fetchOffsetForTimestamp(timestamp, isolationLevel, currentLeaderEpoch, fetchOnlyFromLeader)
}
Partition#fetchOffsetForTimestamp方法
def fetchOffsetForTimestamp(timestamp: Long,
isolationLevel: Option[IsolationLevel],
currentLeaderEpoch: Optional[Integer],
fetchOnlyFromLeader: Boolean): TimestampOffset = inReadLock(leaderIsrUpdateLock) {
// decide whether to only fetch from leader
val localReplica = localReplicaWithEpochOrException(currentLeaderEpoch, fetchOnlyFromLeader)
val lastFetchableOffset = isolationLevel match {
case Some(IsolationLevel.READ_COMMITTED) => localReplica.lastStableOffset.messageOffset
case Some(IsolationLevel.READ_UNCOMMITTED) => localReplica.highWatermark.messageOffset
case None => localReplica.logEndOffset.messageOffset
}
if (timestamp == ListOffsetRequest.LATEST_TIMESTAMP) {
TimestampOffset(RecordBatch.NO_TIMESTAMP, lastFetchableOffset)
} else {
def allowed(timestampOffset: TimestampOffset): Boolean =
timestamp == ListOffsetRequest.EARLIEST_TIMESTAMP || timestampOffset.offset < lastFetchableOffset
val fetchedOffset = logManager.getLog(topicPartition).flatMap { log =>
//调用Log#fetchOffsetForTimestamp方法
log.fetchOffsetsByTimestamp(timestamp)
}
fetchedOffset.filter(allowed).getOrElse(TimestampOffset.Unknown)
}
}
Log#fetchOffsetForTimestamp方法
/**
* Get an offset based on the given timestamp
* The offset returned is the offset of the first message whose timestamp is greater than or equals to the
* given timestamp.
*
* If no such message is found, the log end offset is returned.
*
* `NOTE:` OffsetRequest V0 does not use this method, the behavior of OffsetRequest V0 remains the same as before
* , i.e. it only gives back the timestamp based on the last modification time of the log segments.
*
* @param targetTimestamp The given timestamp for offset fetching.
* @return The offset of the first message whose timestamp is greater than or equals to the given timestamp.
* None if no such message is found.
*/
def fetchOffsetsByTimestamp(targetTimestamp: Long): Option[TimestampOffset] = {
maybeHandleIOException(s"Error while fetching offset by timestamp for $topicPartition in dir ${dir.getParent}") {
debug(s"Searching offset for timestamp $targetTimestamp")
if (config.messageFormatVersion < KAFKA_0_10_0_IV0 &&
targetTimestamp != ListOffsetRequest.EARLIEST_TIMESTAMP &&
targetTimestamp != ListOffsetRequest.LATEST_TIMESTAMP)
throw new UnsupportedForMessageFormatException(s"Cannot search offsets based on timestamp because message format version " +
s"for partition $topicPartition is ${config.messageFormatVersion} which is earlier than the minimum " +
s"required version $KAFKA_0_10_0_IV0")
// Cache to avoid race conditions. `toBuffer` is faster than most alternatives and provides
// constant time access while being safe to use with concurrent collections unlike `toArray`.
val segmentsCopy = logSegments.toBuffer
// For the earliest and latest, we do not need to return the timestamp.
if (targetTimestamp == ListOffsetRequest.EARLIEST_TIMESTAMP)
return Some(TimestampOffset(RecordBatch.NO_TIMESTAMP, logStartOffset))
else if (targetTimestamp == ListOffsetRequest.LATEST_TIMESTAMP)
return Some(TimestampOffset(RecordBatch.NO_TIMESTAMP, logEndOffset))
val targetSeg = {
// Get all the segments whose largest timestamp is smaller than target timestamp
val earlierSegs = segmentsCopy.takeWhile(_.largestTimestamp < targetTimestamp)
// We need to search the first segment whose largest timestamp is greater than the target timestamp if there is one.
if (earlierSegs.length < segmentsCopy.length)
Some(segmentsCopy(earlierSegs.length))
else
None
}
targetSeg.flatMap(_.findOffsetByTimestamp(targetTimestamp, logStartOffset))
}
}
LogSegment#fetchOffsetsByTimestamp方法
/**
* Search the message offset based on timestamp and offset.
*
* This method returns an option of TimestampOffset. The returned value is determined using the following ordered list of rules:
*
* - If all the messages in the segment have smaller offsets, return None
* - If all the messages in the segment have smaller timestamps, return None
* - If all the messages in the segment have larger timestamps, or no message in the segment has a timestamp
* the returned the offset will be max(the base offset of the segment, startingOffset) and the timestamp will be Message.NoTimestamp.
* - Otherwise, return an option of TimestampOffset. The offset is the offset of the first message whose timestamp
* is greater than or equals to the target timestamp and whose offset is greater than or equals to the startingOffset.
*
* This methods only returns None when 1) all messages' offset < startOffing or 2) the log is not empty but we did not
* see any message when scanning the log from the indexed position. The latter could happen if the log is truncated
* after we get the indexed position but before we scan the log from there. In this case we simply return None and the
* caller will need to check on the truncated log and maybe retry or even do the search on another log segment.
*
* @param timestamp The timestamp to search for.
* @param startingOffset The starting offset to search.
* @return the timestamp and offset of the first message that meets the requirements. None will be returned if there is no such message.
*/
def findOffsetByTimestamp(timestamp: Long, startingOffset: Long = baseOffset): Option[TimestampOffset] = {
// Get the index entry with a timestamp less than or equal to the target timestamp
val timestampOffset = timeIndex.lookup(timestamp)
val position = offsetIndex.lookup(math.max(timestampOffset.offset, startingOffset)).position
// Search the timestamp
Option(log.searchForTimestamp(timestamp, position, startingOffset)).map { timestampAndOffset =>
TimestampOffset(timestampAndOffset.timestamp, timestampAndOffset.offset)
}
}