在LogManager初始化的过程中,除了初始化介绍的三个定时任务,还会完成相关的恢复操作和Log加载。如下:
private def loadLogs(): Unit = {
info("Loading logs.")
//保存所有log目录对应的线程池
val threadPools = mutable.ArrayBuffer.empty[ExecutorService]
val jobs = mutable.Map.empty[File, Seq[Future[_]]]
for (dir <- this.logDirs) {
//遍历所有的log目录,给每个目录创建指定的线程数的线程池
val pool = Executors.newFixedThreadPool(ioThreads)
threadPools.append(pool)
val cleanShutdownFile = new File(dir, Log.CleanShutdownFile)
//检测broker上次是否正常关闭
if (cleanShutdownFile.exists) {
debug(
"Found clean shutdown file. " +
"Skipping recovery for all logs in data directory: " +
dir.getAbsolutePath)
} else {
// 修改brokerState
brokerState.newState(RecoveringFromUncleanShutdown)
}
//读取每个log目录下的RecoveryPointCheckPoint文件,生成TopicAndPartition和revoceryPoint对应关系
var recoveryPoints = Map[TopicAndPartition, Long]()
try {//载入revoceryPoint
recoveryPoints = this.recoveryPointCheckpoints(dir).read
} catch {
case e: Exception => {
warn("Error occured while reading recovery-point-offset-checkpoint file of directory " + dir, e)
warn("Resetting the recovery checkpoint to 0")
}
}
val jobsForDir = for {
//遍历所有的log目录的子文件,把文件过滤掉,只保留目录。
dirContent <- Option(dir.listFiles).toList
logDir <- dirContent if logDir.isDirectory
} yield {//给每个Log文件创建一个Runnable任务
CoreUtils.runnable {
debug("Loading log '" + logDir.getName + "'")
//从目录名可以解析出topic名称和分区编号
val topicPartition = Log.parseTopicPartitionName(logDir)
//获取Log对应的配置
val config = topicConfigs.getOrElse(topicPartition.topic, defaultConfig)
//获取Log对应的recoveryPoint
val logRecoveryPoint = recoveryPoints.getOrElse(topicPartition, 0L)
//创建Log对象
val current = new Log(logDir, config, logRecoveryPoint, scheduler, time)
//把Log对象保存到logs集合中,所有的分区Log成功加载完成。
val previous = this.logs.put(topicPartition, current)
if (previous != null) {
throw new IllegalArgumentException(
"Duplicate log directories found: %s, %s!".format(
current.dir.getAbsolutePath, previous.dir.getAbsolutePath))
}
}
}
//把jobsForDir中所有的任务放在线程池中执行
jobs(cleanShutdownFile) = jobsForDir.map(pool.submit).toSeq
}
try {
//等待jobs的runnable完成
for ((cleanShutdownFile, dirJobs) <- jobs) {
dirJobs.foreach(_.get)
//删除cleanShutdownFile文件
cleanShutdownFile.delete()
}
} catch {
case e: ExecutionException => {
error("There was an error in one of the threads during logs loading: " + e.getCause)
throw e.getCause
}
} finally {
threadPools.foreach(_.shutdown())//关闭所有线程池。
}
info("Logs loading complete.")
}
这里只是创建了Log对象放入LogManager.logs中进行管理,除此之外,它还会调用Log.loadSegments()方法,步骤如下:
private def loadSegments() {
// create the log directory if it doesn't exist
dir.mkdirs()
var swapFiles = Set[File]()
步骤一:删除.delete.clean文件
for(file <- dir.listFiles if file.isFile) {
if(!file.canRead)
throw new IOException("Could not read file " + file)
val filename = file.getName
if(filename.endsWith(DeletedFileSuffix) || filename.endsWith(CleanedFileSuffix)) {
// 删除.delete和.clean文件,存在.clean表示在日志压缩过程中宕机的,.clean中状态不明确,无法进行恢复。
file.delete()
} else if(filename.endsWith(SwapFileSuffix)) {
// .swap表示日志已经压缩完成了,在swap中宕机,可以进行恢复
val baseName = new File(CoreUtils.replaceSuffix(file.getPath, SwapFileSuffix, ""))
// 去掉.swap后发现是index文件,则直接删除重新构建
if(baseName.getPath.endsWith(IndexFileSuffix)) {
file.delete()
} else if(baseName.getPath.endsWith(LogFileSuffix)){
// 发现是日志文件,则要进行恢复。
val index = new File(CoreUtils.replaceSuffix(baseName.getPath, LogFileSuffix, IndexFileSuffix))
index.delete()
swapFiles += file
}
}
}
//步骤二:加载全部的日志文件和索引文件。如果索引文件没有配对的日志文件,则进行删除。如果日志文件没有对应的索引文件,就进行恢复索引文件
// 遍历所有文件
for(file <- dir.listFiles if file.isFile) {
val filename = file.getName
if(filename.endsWith(IndexFileSuffix)) {//处理索引文件
val logFile = new File(file.getAbsolutePath.replace(IndexFileSuffix, LogFileSuffix))
if(!logFile.exists) {
warn("Found an orphaned index file, %s, with no corresponding log file.".format(file.getAbsolutePath))
//如果这个索引文件没有对应的日志文件,就删除
file.delete()
}
} else if(filename.endsWith(LogFileSuffix)) {
// 处理日志文件
val start = filename.substring(0, filename.length - LogFileSuffix.length).toLong
val indexFile = Log.indexFilename(dir, start)
// 创建LogSegment
val segment = new LogSegment(dir = dir,
startOffset = start,
indexIntervalBytes = config.indexInterval,
maxIndexSize = config.maxIndexSize,
rollJitterMs = config.randomSegmentJitter,
time = time,
fileAlreadyExists = true)
//检测索引文件是否存在
if(indexFile.exists()) {
try {
//检测索引文件的完整性
segment.index.sanityCheck()
} catch {
case e: java.lang.IllegalArgumentException =>
warn("Found a corrupted index file, %s, deleting and rebuilding index...".format(indexFile.getAbsolutePath))
indexFile.delete()
segment.recover(config.maxMessageSize)
}
}
else {
error("Could not find index file corresponding to log file %s, rebuilding index...".format(segment.log.file.getAbsolutePath))
//如果没有对应的索引文件,就重建
segment.recover(config.maxMessageSize)
}
segments.put(start, segment)
}
}
// 处理.swap文件
for (swapFile <- swapFiles) {
val logFile = new File(CoreUtils.replaceSuffix(swapFile.getPath, SwapFileSuffix, ""))
val fileName = logFile.getName
// 根据日志文件名称得到baseOffset
val startOffset = fileName.substring(0, fileName.length - LogFileSuffix.length).toLong
val indexFile = new File(CoreUtils.replaceSuffix(logFile.getPath, LogFileSuffix, IndexFileSuffix) + SwapFileSuffix)
val index = new OffsetIndex(indexFile, baseOffset = startOffset, maxIndexSize = config.maxIndexSize)
// 创建LogSegment
val swapSegment = new LogSegment(new FileMessageSet(file = swapFile),
index = index,
baseOffset = startOffset,
indexIntervalBytes = config.indexInterval,
rollJitterMs = config.randomSegmentJitter,
time = time)
info("Found log file %s from interrupted swap operation, repairing.".format(swapFile.getPath))
// 重建索引文件并验证日志文件
swapSegment.recover(config.maxMessageSize)
// 查找swapSegment对应的日志压缩钱的LogSegment集合
val oldSegments = logSegments(swapSegment.baseOffset, swapSegment.nextOffset)
// 在Log.replaceSegments中把swapSegment对象加入到segments跳表中管理。
replaceSegments(swapSegment, oldSegments.toSeq, isRecoveredSwapFile = true)
}
//对于空的Log,需要创建activeSegment,保证Log中至少有一个LogSegment,对于非空的Log,就需要进行恢复。
if(logSegments.size == 0) {
// no existing segments, create a new mutable segment beginning at offset 0
segments.put(0L, new LogSegment(dir = dir,
startOffset = 0,
indexIntervalBytes = config.indexInterval,
maxIndexSize = config.maxIndexSize,
rollJitterMs = config.randomSegmentJitter,
time = time,
fileAlreadyExists = false,
initFileSize = this.initFileSize(),
preallocate = config.preallocate))
} else {
recoverLog()
// reset the index size of the currently active log segment to allow more entries
activeSegment.index.resize(config.maxIndexSize)
}
}
recoverLog主要处理Broker非正常关闭时导致的消息异常。
private def recoverLog() {
// 如果Brokers上次是正常关闭的,则不需要恢复,更新recoveryPoint
if(hasCleanShutdownFile) {
this.recoveryPoint = activeSegment.nextOffset
return
}
//如果上次是正常关闭的,就需要恢复
//获取所有没有刷新的LogSegment,即recoveryPoint之后的全部LogSegment。
val unflushed = logSegments(this.recoveryPoint, Long.MaxValue).iterator
while(unflushed.hasNext) {
val curr = unflushed.next
info("Recovering unflushed segment %d in log %s.".format(curr.baseOffset, name))
val truncatedBytes =
try {
//使用LogSegment.recover()方法重建索引文件并验证日志文件。失败的部分就截掉。
curr.recover(config.maxMessageSize)
} catch {
case e: InvalidOffsetException =>
val startOffset = curr.baseOffset
warn("Found invalid offset during recovery for log " + dir.getName +". Deleting the corrupt segment and " +
"creating an empty one with starting offset " + startOffset)
curr.truncateTo(startOffset)
}
if(truncatedBytes > 0) {
// LogSegment中有验证失败的消息
warn("Corruption found in segment %d of log %s, truncating to offset %d.".format(curr.baseOffset, name, curr.nextOffset))
unflushed.foreach(deleteSegment)
}
}
}
LogManager还有createLog()、deleteLog()和getLog方法。
def createLog(topicAndPartition: TopicAndPartition, config: LogConfig): Log = {
logCreationOrDeletionLock synchronized {
var log = logs.get(topicAndPartition)
// check if the log has already been created in another thread
if(log != null)
return log
// 选择Log最少的log目录
val dataDir = nextLogDir()
//创建Log对应的文件夹
val dir = new File(dataDir, topicAndPartition.topic + "-" + topicAndPartition.partition)
dir.mkdirs()
//创建Log对象,同时创建activeSegment
log = new Log(dir,
config,
recoveryPoint = 0L,
scheduler,
time)
logs.put(topicAndPartition, log)
info("Created log for partition [%s,%d] in %s with properties {%s}."
.format(topicAndPartition.topic,
topicAndPartition.partition,
dataDir.getAbsolutePath,
{import JavaConversions._; config.originals.mkString(", ")}))
log
}
}
private def nextLogDir(): File = {
if(logDirs.size == 1) {//只有一个log目录
logDirs(0)
} else {
// 指定多个log目录,计算数量
val logCounts = allLogs.groupBy(_.dir.getParent).mapValues(_.size)
val zeros = logDirs.map(dir => (dir.getPath, 0)).toMap
var dirCounts = (zeros ++ logCounts).toBuffer
// 选择Log最少的log目录
val leastLoaded = dirCounts.sortBy(_._2).head
new File(leastLoaded._1)
}
}
/**
* Delete a log.
*/
def deleteLog(topicAndPartition: TopicAndPartition) {
var removedLog: Log = null
logCreationOrDeletionLock synchronized {
removedLog = logs.remove(topicAndPartition)
}
if (removedLog != null) {
//停止对此目录进行压缩
if (cleaner != null) {
cleaner.abortCleaning(topicAndPartition)
//更新cleaner-offset-checkpoint文件
cleaner.updateCheckpoints(removedLog.dir.getParentFile)
}
removedLog.delete()//删除相关的日志文件、索引文件
info("Deleted log for partition [%s,%d] in %s."
.format(topicAndPartition.topic,
topicAndPartition.partition,
removedLog.dir.getAbsolutePath))
}
}