ByteBufferMessageSet底层使用ByteBuffer保存数据,它主要提供以下三种功能:
# 将Message Set按照指定的压缩类型进行压缩,此功能主要用于构建ByteBufferMessageSet对象,通过create方法完成
# 提供迭代器,实现深层迭代和浅层迭代2种方式
# 提供消息验证和offset分配功能
create方法
private def create(offsetAssigner: OffsetAssigner, compressionCodec: CompressionCodec, wrapperMessageTimestamp: Option[Long],
timestampType: TimestampType, messages: Message*): ByteBuffer = {
// 如果消息为空返回空的ByteBuffer
if (messages.isEmpty)
MessageSet.Empty.buffer
// 如果不对消息压缩,则通过OffsetAssigner分配每一个每一个消息的offset
// 在将消息写入ByteBuffer之后,返回ByteBuffer
else if (compressionCodec == NoCompressionCodec) {
val buffer = ByteBuffer.allocate(MessageSet.messageSetSize(messages))
// 为每一个消息分配offset并写入ByteBuffer
for (message <- messages) writeMessage(buffer, message, offsetAssigner.nextAbsoluteOffset())
buffer.rewind()
buffer
} else {
// 得到magic和时间戳
val magicAndTimestamp = wrapperMessageTimestampmatch {
case Some(ts) => MagicAndTimestamp(messages.head.magic, ts)
case None => MessageSet.magicAndLargestTimestamp(messages)
}
var offset = -1L
// 底层使用byte数组保存写入的压缩数据
// 构建一个MessageWriter对象
val messageWriter = new MessageWriter(math.min(math.max(MessageSet.messageSetSize(messages) / 2, 1024), 1 << 16))
messageWriter.write(codec = compressionCodec, timestamp = magicAndTimestamp.timestamp, timestampType = timestampType, magicValue = magicAndTimestamp.magic) { outputStream =>
// 创建指定压缩类型的输出流
val output = new DataOutputStream(CompressionFactory(compressionCodec, magicAndTimestamp.magic, outputStream))
try {// 遍历写入内层消息压缩
for (message <- messages) {
offset = offsetAssigner.nextAbsoluteOffset()
// 如果magic的值为1,写入的是相对的offset,magic为0,写入offset
if (message.magic != magicAndTimestamp.magic)
throw new IllegalArgumentException("Messagesin the message set must have same magic value")
// Use inneroffset if magic value is greater than 0
if (magicAndTimestamp.magic > Message.MagicValue_V0)
output.writeLong(offsetAssigner.toInnerOffset(offset))
else
output.writeLong(offset)
output.writeInt(message.size)// 写入size
// 写入message数据
output.write(message.buffer.array, message.buffer.arrayOffset, message.buffer.limit)
}
} finally {
output.close()
}
}
val buffer = ByteBuffer.allocate(messageWriter.size + MessageSet.LogOverhead)
// 按照消息格式,写入整个外层的消息,注意外层消息的offset是最后一个内层消息的offset
writeMessage(buffer, messageWriter, offset)
buffer.rewind()
buffer
}
}
validateMessagesAndAssignOffsets 该方法主要用户验证消息和分配offset
需要验证哪些东西呢?
# 检查Magic Value
# 检查时间戳与时间戳类型
# 对于压缩消息需要检查它是否有key
# 可以重新设定时间戳类型和时间戳
# 进行offset分配
# 如果消息压缩类型与broker指定压缩类型不一致,需要重新压缩
private[kafka] def validateMessagesAndAssignOffsets(offsetCounter: LongRef,
now: Long,
sourceCodec: CompressionCodec,
targetCodec: CompressionCodec,
compactedTopic: Boolean = false,
messageFormatVersion: Byte = Message.CurrentMagicValue,
messageTimestampType: TimestampType,
messageTimestampDiffMaxMs: Long): ValidationAndOffsetAssignResult = {
// 源压缩类型和目标压缩类型都没有
if (sourceCodec == NoCompressionCodec && targetCodec == NoCompressionCodec) {
// 检查所有的message的magic value是否与指定的一样
if (!isMagicValueInAllWrapperMessages(messageFormatVersion))
// 因为存在message的magic value不一致,则需要进行统一,可能导致消息总长度变化
// 需要创建新的ByteBufferMessageSet,同时还会进行offset的分配,验证并更新CRC323,时间戳等信息
convertNonCompressedMessages(offsetCounter, compactedTopic, now, messageTimestampType, messageTimestampDiffMaxMs,
messageFormatVersion)
else
// 处理非压缩消息且magic值统一的情况,长度不会改变,主要是进行offset的分配,验真并更新CRC32 时间戳等信息
validateNonCompressedMessagesAndAssignOffsetInPlace(offsetCounter, now, compactedTopic, messageTimestampType,
messageTimestampDiffMaxMs)
} else { // 处理消息压缩情况
// 不能复用当前的ByteBufferMessage的情况
// 1. 消息当前的压缩类型与指定的压缩类型不一致,需要重新压缩
// 2. magic为0时需要重写消息的offset为绝对offset
// 3. 当magic大于0,但是内部压缩消息某些字段需要修改,例如时间戳
// 4. 需要转换消息格式
// 是否可以直接复用当前的ByteBufferMessage
var inPlaceAssignment = sourceCodec == targetCodec && messageFormatVersion > Message.MagicValue_V0
var maxTimestamp = Message.NoTimestamp
var offsetOfMaxTimestamp = -1L
val expectedInnerOffset = new LongRef(0)
val validatedMessages = new mutable.ArrayBuffer[Message]
this.internalIterator(isShallow = false, ensureMatchingMagic = true).foreach { messageAndOffset =>
val message = messageAndOffset.message
validateMessageKey(message, compactedTopic)// 校验消息的key
if (message.magic > Message.MagicValue_V0 && messageFormatVersion > Message.MagicValue_V0) {
// No in place assignment situation 3
// 校验时间戳
validateTimestamp(message, now, messageTimestampType, messageTimestampDiffMaxMs)
// 检查情况3 内部offset是否正常
if (messageAndOffset.offset != expectedInnerOffset.getAndIncrement())
inPlaceAssignment = false
if (message.timestamp > maxTimestamp) {
maxTimestamp = message.timestamp
offsetOfMaxTimestamp = offsetCounter.value + expectedInnerOffset.value - 1
}
}
if (sourceCodec != NoCompressionCodec && message.compressionCodec != NoCompressionCodec)
throw new InvalidMessageException("Compressed outer message should not have an inner message with a " +
s"compression attribute set: $message")
// 检查情况4
if (message.magic != messageFormatVersion)
inPlaceAssignment = false
// 保存通过上述检测和转换的Message集合
validatedMessages += message.toFormatVersion(messageFormatVersion)
}
// 不能复用当前的ByteBufferMessage的场景
if (!inPlaceAssignment) {
// Cannot do in place assignment.
val (largestTimestampOfMessageSet, offsetOfMaxTimestampInMessageSet) = {
if (messageFormatVersion == Message.MagicValue_V0)
(Some(Message.NoTimestamp), -1L)
else if (messageTimestampType == TimestampType.CREATE_TIME)
(Some(maxTimestamp), {if (targetCodec == NoCompressionCodec) offsetOfMaxTimestamp else offsetCounter.value + validatedMessages.length - 1})
else // Log append time
(Some(now), {if (targetCodec == NoCompressionCodec) offsetCounter.value else offsetCounter.value + validatedMessages.length - 1})
}
// 创建新的ByteBufferMessageSet对象。重新压缩
ValidationAndOffsetAssignResult(validatedMessages = new ByteBufferMessageSet(compressionCodec = targetCodec,
offsetCounter = offsetCounter,
wrapperMessageTimestamp = largestTimestampOfMessageSet,
timestampType = messageTimestampType,
messages = validatedMessages: _*),
maxTimestamp = largestTimestampOfMessageSet.get,
offsetOfMaxTimestamp = offsetOfMaxTimestampInMessageSet,
messageSizeMaybeChanged = true)
} else {// 复用当前的ByteBufferMessage对象,可以减少一次压缩操作
// 更新外层消息的offset,将其offset更新为内部最后一条压缩消息的offset
buffer.putLong(0, offsetCounter.addAndGet(validatedMessages.size) - 1)
// validate the messages
validatedMessages.foreach(_.ensureValid())
var crcUpdateNeeded = true
val timestampOffset = MessageSet.LogOverhead + Message.TimestampOffset
val attributeOffset = MessageSet.LogOverhead + Message.AttributesOffset
val timestamp = buffer.getLong(timestampOffset)
val attributes = buffer.get(attributeOffset)
// 更新外层的时间戳等
buffer.putLong(timestampOffset, maxTimestamp)
if (messageTimestampType == TimestampType.CREATE_TIME && timestamp == maxTimestamp)
// We don't need to recompute crc if the timestamp is not updated.
crcUpdateNeeded = false
else if (messageTimestampType == TimestampType.LOG_APPEND_TIME) {
// Set timestamp type and timestamp
buffer.putLong(timestampOffset, now)
buffer.put(attributeOffset, messageTimestampType.updateAttributes(attributes))
}
if (crcUpdateNeeded) {
// need to recompute the crc value
buffer.position(MessageSet.LogOverhead)
val wrapperMessage = new Message(buffer.slice())
Utils.writeUnsignedInt(buffer, MessageSet.LogOverhead + Message.CrcOffset, wrapperMessage.computeChecksum)
}
buffer.rewind()
// For compressed messages,
ValidationAndOffsetAssignResult(validatedMessages = this,
maxTimestamp = buffer.getLong(timestampOffset),
offsetOfMaxTimestamp = buffer.getLong(0),
messageSizeMaybeChanged = false)
}
}
}