Kafka LogSegment append方法源码解读

Log Segment的源码解析

Kafka日志在磁盘上的组织架构:

在这里插入图片描述

LogSegment 的 append写入流程图

在这里插入图片描述

第一步:判断当前日志段是否为空

val physicalPosition = log.sizeInBytes() //当前日志段位移
if (physicalPosition == 0) //如果是空,记录用于切分日志段依据
	rollingBasedTimestamp = Some(largestTimestamp) //  largestTimestamp: Long, 当前这批消息里面最大的时间戳
        
//FileRecords中的方法
private final AtomicInteger size;
@Override
public int sizeInBytes() {
    return size.get();
}

/**
 * Gets the current value.
 *
 * @return the current value
*/
public final int get() {
    return value;
}

// The timestamp we used for time based log rolling and for ensuring max compaction delay
// volatile for LogCleaner to see the update
@volatile private var rollingBasedTimestamp: Option[Long] = None

/** Class `Some[A]` represents existing values of type
 *  `A`.
 */
@SerialVersionUID(1234815782226070388L) // value computed by serialver for 2.11.2, annotation added in 2.11.4
final case class Some[+A](value: A) extends Option[A] {
  def get: A = value
}

第二步:确保输入参数最大位移值是合法的

ensureOffsetInRange(largestOffset)

private def ensureOffsetInRange(offset: Long): Unit = {
    if (!canConvertToRelativeOffset(offset))
      throw new LogSegmentOffsetOverflowException(this, offset)
}

/**
   * checks that the argument offset can be represented as an integer offset relative to the baseOffset.
   */
def canConvertToRelativeOffset(offset: Long): Boolean = {
   offsetIndex.canAppendOffset(offset)
}

def offsetIndex: OffsetIndex = lazyOffsetIndex.get

/**
   * Check if a particular offset is valid to be appended to this index.
   * @param offset The offset to check
   * @return true if this offset is valid to be appended to this index; false otherwise
   */
  def canAppendOffset(offset: Long): Boolean = {
    toRelative(offset).isDefined
  }

第三步:调用FileRecords的append方法进行真正写入

/**
     * Append a set of records to the file. This method is not thread-safe and must be
     * protected with a lock.
     *
     * @param records The records to append
     * @return the number of bytes written to the underlying file
     */
    public int append(MemoryRecords records) throws IOException {
        // records当前日志段位移 和 Integer最大值与AtomicInteger.get
        if (records.sizeInBytes() > Integer.MAX_VALUE - size.get())
            throw new IllegalArgumentException("Append of size " + records.sizeInBytes() +
                    " bytes is too large for segment with current file position at " + size.get());

        int written = records.writeFullyTo(channel);
        size.getAndAdd(written);
        return written;
    }
    
    
    /**
     * Write all records to the given channel (including partial records).
     * @param channel The channel to write to
     * @return The number of bytes written
     * @throws IOException For any IO errors writing to the channel
     */
    public int writeFullyTo(GatheringByteChannel channel) throws IOException {
        buffer.mark();
        int written = 0;
        while (written < sizeInBytes())
            written += channel.write(buffer);
        buffer.reset();
        return written;
    }

 	/**
     * Atomically adds the given value to the current value.
     *
     * @param delta the value to add
     * @return the previous value
     */
    public final int getAndAdd(int delta) {
        return unsafe.getAndAddInt(this, valueOffset, delta);
    }
	
	  /**
     * AtomicInteger		
     * Gets the current value.
     *
     * @return the current value
     */
    public final int get() {
        return value;
    }


第四步:更新当前最大时间戳和所属消息位移

 // Update the in memory max timestamp and corresponding offset.
 // 更新当前最大时间戳和所属消息位移
 if (largestTimestamp > maxTimestampSoFar) {
     maxTimestampSoFar = largestTimestamp
     offsetOfMaxTimestampSoFar = shallowOffsetOfMaxTimestamp
 }
      
  /* The maximum timestamp we see so far */
  // 到目前为止,我们看到的最大时间戳
  @volatile private var _maxTimestampSoFar: Option[Long] = None
  def maxTimestampSoFar_=(timestamp: Long): Unit = _maxTimestampSoFar = Some(timestamp)
  def maxTimestampSoFar: Long = {
    if (_maxTimestampSoFar.isEmpty)
      _maxTimestampSoFar = Some(timeIndex.lastEntry.timestamp)
    _maxTimestampSoFar.get
  }

 @volatile private var _offsetOfMaxTimestampSoFar: Option[Long] = None
  def offsetOfMaxTimestampSoFar_=(offset: Long): Unit = _offsetOfMaxTimestampSoFar = Some(offset)
  def offsetOfMaxTimestampSoFar: Long = {
    if (_offsetOfMaxTimestampSoFar.isEmpty)
      _offsetOfMaxTimestampSoFar = Some(timeIndex.lastEntry.offset)
    _offsetOfMaxTimestampSoFar.get
  }

第五步:更新索引项和写入的字节数了。

if (bytesSinceLastIndexEntry > indexIntervalBytes) {
    offsetIndex.append(largestOffset, physicalPosition)
    timeIndex.maybeAppend(maxTimestampSoFar, offsetOfMaxTimestampSoFar)
    bytesSinceLastIndexEntry = 0
}
bytesSinceLastIndexEntry += records.sizeInBytes

/* the number of bytes since we last added an entry in the offset index */
private var bytesSinceLastIndexEntry = 0
// 多少字节插入一个索引
val indexIntervalBytes: Int,
//
def offsetIndex: OffsetIndex = lazyOffsetIndex.get
//
def timeIndex: TimeIndex = lazyTimeIndex.get

Log Segment append方法完整源码

/**
   * Append the given messages starting with the given offset. Add
   * an entry to the index if needed.
   *
   * It is assumed this method is being called from within a lock.
   *
   * @param largestOffset The last offset in the message set
   * @param largestTimestamp The largest timestamp in the message set.
   * @param shallowOffsetOfMaxTimestamp The offset of the message that has the largest timestamp in the messages to append.
   * @param records The log entries to append.
   * @return the physical position in the file of the appended records
   * @throws LogSegmentOffsetOverflowException if the largest offset causes index offset overflow
   */
  @nonthreadsafe
  def append(largestOffset: Long, //当前这批消息里面最大的位移值
             largestTimestamp: Long, //当前这批消息里面最大的时间戳
             shallowOffsetOfMaxTimestamp: Long, // 当前这批消息最大时间戳对应的位移值
             records: MemoryRecords): Unit = {
    if (records.sizeInBytes > 0) {
      trace(s"Inserting ${records.sizeInBytes} bytes at end offset $largestOffset at position ${log.sizeInBytes} " +
            s"with largest timestamp $largestTimestamp at shallow offset $shallowOffsetOfMaxTimestamp")
      //
      /**
        * 第一步:
        * 判断当前日志段是否为空
        * val log: FileRecords, 实际保存消息的对象
        *
        * */
      val physicalPosition = log.sizeInBytes() //当前日志段位移
      if (physicalPosition == 0) //如果是空,记录用于切分日志段依据
        rollingBasedTimestamp = Some(largestTimestamp)

      /**
        * 第二步:
        * 确保输入参数最大位移值是合法的,看它与日志段起始位移的差值是否在整数范围内。
        * 即 largestOffset - baseOffset 的值是不是介于 [0,Int.MAXVALUE] 之间
        *
        * largestOffset  当前这批消息里面最大的位移值
      * */
      ensureOffsetInRange(largestOffset)

      // append the messages
      /**
        * 第三步:
        * 调用FileRecords的append方法执行真正的写入,
        * FileRecords类的工作是将内存中的消息对象写入到操作系统的页缓存
      * */
      val appendedBytes = log.append(records)
      trace(s"Appended $appendedBytes to ${log.file} at end offset $largestOffset")
      // Update the in memory max timestamp and corresponding offset.
      /**
        * 第四步:
        * 更新当前最大时间戳 和 所属消息位移
        * 每个日志段 Log Segment都需要保存当前最大时间戳信息和所属信息的位移信息
        *
        Broker端提供的定时删除日志功能?当前最大时间戳是判断依据,
        最大时间戳位移 用于 时间戳索引项。时间戳索引项保存时间戳与消息位移的对应关系
      * */
      if (largestTimestamp > maxTimestampSoFar) {
        maxTimestampSoFar = largestTimestamp
        offsetOfMaxTimestampSoFar = shallowOffsetOfMaxTimestamp
      }
      // append an entry to the index (if needed)
      /**
        * 第五步:
        * 更新索引项和写入的字节数了。
        * 日志段 每写入4KB 数据就要写入一个索引项。
        * 当已写入字节数据超过4kB,append会调用索引对象的append方法新增索引项,同时清空已写入字节数,以备下次重新积累计算
        *
      * */
      if (bytesSinceLastIndexEntry > indexIntervalBytes) {
        offsetIndex.append(largestOffset, physicalPosition)
        timeIndex.maybeAppend(maxTimestampSoFar, offsetOfMaxTimestampSoFar)
        bytesSinceLastIndexEntry = 0
      }
      bytesSinceLastIndexEntry += records.sizeInBytes
    }
  }
已标记关键词 清除标记
相关推荐
©️2020 CSDN 皮肤主题: 大白 设计师:CSDN官方博客 返回首页