kafka日志存储(七):LogManager初始化

在LogManager初始化的过程中,除了初始化介绍的三个定时任务,还会完成相关的恢复操作和Log加载。如下:
 

  private def loadLogs(): Unit = {
    info("Loading logs.")
    //保存所有log目录对应的线程池
    val threadPools = mutable.ArrayBuffer.empty[ExecutorService]
    val jobs = mutable.Map.empty[File, Seq[Future[_]]]

    for (dir <- this.logDirs) {
      //遍历所有的log目录,给每个目录创建指定的线程数的线程池
      val pool = Executors.newFixedThreadPool(ioThreads)
      threadPools.append(pool)

      val cleanShutdownFile = new File(dir, Log.CleanShutdownFile)
      //检测broker上次是否正常关闭
      if (cleanShutdownFile.exists) {
        debug(
          "Found clean shutdown file. " +
          "Skipping recovery for all logs in data directory: " +
          dir.getAbsolutePath)
      } else {
        // 修改brokerState
        brokerState.newState(RecoveringFromUncleanShutdown)
      }
      //读取每个log目录下的RecoveryPointCheckPoint文件,生成TopicAndPartition和revoceryPoint对应关系
      var recoveryPoints = Map[TopicAndPartition, Long]()
      try {//载入revoceryPoint
        recoveryPoints = this.recoveryPointCheckpoints(dir).read
      } catch {
        case e: Exception => {
          warn("Error occured while reading recovery-point-offset-checkpoint file of directory " + dir, e)
          warn("Resetting the recovery checkpoint to 0")
        }
      }

      val jobsForDir = for {
        //遍历所有的log目录的子文件,把文件过滤掉,只保留目录。
        dirContent <- Option(dir.listFiles).toList
        logDir <- dirContent if logDir.isDirectory
      } yield {//给每个Log文件创建一个Runnable任务
        CoreUtils.runnable {
          debug("Loading log '" + logDir.getName + "'")
          //从目录名可以解析出topic名称和分区编号  
          val topicPartition = Log.parseTopicPartitionName(logDir)
          //获取Log对应的配置
          val config = topicConfigs.getOrElse(topicPartition.topic, defaultConfig)
          //获取Log对应的recoveryPoint
          val logRecoveryPoint = recoveryPoints.getOrElse(topicPartition, 0L)
          //创建Log对象
          val current = new Log(logDir, config, logRecoveryPoint, scheduler, time)
          //把Log对象保存到logs集合中,所有的分区Log成功加载完成。
          val previous = this.logs.put(topicPartition, current)

          if (previous != null) {
            throw new IllegalArgumentException(
              "Duplicate log directories found: %s, %s!".format(
              current.dir.getAbsolutePath, previous.dir.getAbsolutePath))
          }
        }
      }
      //把jobsForDir中所有的任务放在线程池中执行
      jobs(cleanShutdownFile) = jobsForDir.map(pool.submit).toSeq
    }


    try {
      //等待jobs的runnable完成
      for ((cleanShutdownFile, dirJobs) <- jobs) {
        dirJobs.foreach(_.get)
        //删除cleanShutdownFile文件
        cleanShutdownFile.delete()
      }
    } catch {
      case e: ExecutionException => {
        error("There was an error in one of the threads during logs loading: " + e.getCause)
        throw e.getCause
      }
    } finally {
      threadPools.foreach(_.shutdown())//关闭所有线程池。
    }

    info("Logs loading complete.")
  }

这里只是创建了Log对象放入LogManager.logs中进行管理,除此之外,它还会调用Log.loadSegments()方法,步骤如下:
 

  private def loadSegments() {
    // create the log directory if it doesn't exist
    dir.mkdirs()
    var swapFiles = Set[File]()
    步骤一:删除.delete.clean文件
    for(file <- dir.listFiles if file.isFile) {
      if(!file.canRead)
        throw new IOException("Could not read file " + file)
      val filename = file.getName
      if(filename.endsWith(DeletedFileSuffix) || filename.endsWith(CleanedFileSuffix)) {
        // 删除.delete和.clean文件,存在.clean表示在日志压缩过程中宕机的,.clean中状态不明确,无法进行恢复。
        file.delete()
      } else if(filename.endsWith(SwapFileSuffix)) {
        // .swap表示日志已经压缩完成了,在swap中宕机,可以进行恢复
        val baseName = new File(CoreUtils.replaceSuffix(file.getPath, SwapFileSuffix, ""))
        // 去掉.swap后发现是index文件,则直接删除重新构建
        if(baseName.getPath.endsWith(IndexFileSuffix)) {
          file.delete()
        } else if(baseName.getPath.endsWith(LogFileSuffix)){
          // 发现是日志文件,则要进行恢复。
          val index = new File(CoreUtils.replaceSuffix(baseName.getPath, LogFileSuffix, IndexFileSuffix))
          index.delete()
          swapFiles += file
        }
      }
    }
    //步骤二:加载全部的日志文件和索引文件。如果索引文件没有配对的日志文件,则进行删除。如果日志文件没有对应的索引文件,就进行恢复索引文件
    // 遍历所有文件
    for(file <- dir.listFiles if file.isFile) {
      val filename = file.getName
      if(filename.endsWith(IndexFileSuffix)) {//处理索引文件
        val logFile = new File(file.getAbsolutePath.replace(IndexFileSuffix, LogFileSuffix))
        if(!logFile.exists) {
          warn("Found an orphaned index file, %s, with no corresponding log file.".format(file.getAbsolutePath))
          //如果这个索引文件没有对应的日志文件,就删除
          file.delete()
        }
      } else if(filename.endsWith(LogFileSuffix)) {
        // 处理日志文件
        val start = filename.substring(0, filename.length - LogFileSuffix.length).toLong
        val indexFile = Log.indexFilename(dir, start)
        // 创建LogSegment
        val segment = new LogSegment(dir = dir,
                                     startOffset = start,
                                     indexIntervalBytes = config.indexInterval,
                                     maxIndexSize = config.maxIndexSize,
                                     rollJitterMs = config.randomSegmentJitter,
                                     time = time,
                                     fileAlreadyExists = true)
        //检测索引文件是否存在
        if(indexFile.exists()) {
          try {
              //检测索引文件的完整性
              segment.index.sanityCheck()
          } catch {
            case e: java.lang.IllegalArgumentException =>
              warn("Found a corrupted index file, %s, deleting and rebuilding index...".format(indexFile.getAbsolutePath))
              indexFile.delete()
              segment.recover(config.maxMessageSize)
          }
        }
        else {
          error("Could not find index file corresponding to log file %s, rebuilding index...".format(segment.log.file.getAbsolutePath))
          //如果没有对应的索引文件,就重建
          segment.recover(config.maxMessageSize)
        }
        segments.put(start, segment)
      }
    }

    // 处理.swap文件
    for (swapFile <- swapFiles) {
      val logFile = new File(CoreUtils.replaceSuffix(swapFile.getPath, SwapFileSuffix, ""))
      val fileName = logFile.getName
      // 根据日志文件名称得到baseOffset
      val startOffset = fileName.substring(0, fileName.length - LogFileSuffix.length).toLong
      val indexFile = new File(CoreUtils.replaceSuffix(logFile.getPath, LogFileSuffix, IndexFileSuffix) + SwapFileSuffix)
      val index =  new OffsetIndex(indexFile, baseOffset = startOffset, maxIndexSize = config.maxIndexSize)
      // 创建LogSegment
      val swapSegment = new LogSegment(new FileMessageSet(file = swapFile),
                                       index = index,
                                       baseOffset = startOffset,
                                       indexIntervalBytes = config.indexInterval,
                                       rollJitterMs = config.randomSegmentJitter,
                                       time = time)
      info("Found log file %s from interrupted swap operation, repairing.".format(swapFile.getPath))
      // 重建索引文件并验证日志文件
      swapSegment.recover(config.maxMessageSize)
      // 查找swapSegment对应的日志压缩钱的LogSegment集合
      val oldSegments = logSegments(swapSegment.baseOffset, swapSegment.nextOffset)
      // 在Log.replaceSegments中把swapSegment对象加入到segments跳表中管理。
      replaceSegments(swapSegment, oldSegments.toSeq, isRecoveredSwapFile = true)
    }
    //对于空的Log,需要创建activeSegment,保证Log中至少有一个LogSegment,对于非空的Log,就需要进行恢复。
    if(logSegments.size == 0) {
      // no existing segments, create a new mutable segment beginning at offset 0
      segments.put(0L, new LogSegment(dir = dir,
                                     startOffset = 0,
                                     indexIntervalBytes = config.indexInterval,
                                     maxIndexSize = config.maxIndexSize,
                                     rollJitterMs = config.randomSegmentJitter,
                                     time = time,
                                     fileAlreadyExists = false,
                                     initFileSize = this.initFileSize(),
                                     preallocate = config.preallocate))
    } else {
      recoverLog()
      // reset the index size of the currently active log segment to allow more entries
      activeSegment.index.resize(config.maxIndexSize)
    }
  }

recoverLog主要处理Broker非正常关闭时导致的消息异常。
 

  private def recoverLog() {
    // 如果Brokers上次是正常关闭的,则不需要恢复,更新recoveryPoint
    if(hasCleanShutdownFile) {
      this.recoveryPoint = activeSegment.nextOffset
      return
    }

    //如果上次是正常关闭的,就需要恢复
    //获取所有没有刷新的LogSegment,即recoveryPoint之后的全部LogSegment。
    val unflushed = logSegments(this.recoveryPoint, Long.MaxValue).iterator
    while(unflushed.hasNext) {
      val curr = unflushed.next
      info("Recovering unflushed segment %d in log %s.".format(curr.baseOffset, name))
      val truncatedBytes =
        try {
          //使用LogSegment.recover()方法重建索引文件并验证日志文件。失败的部分就截掉。
          curr.recover(config.maxMessageSize)
        } catch {
          case e: InvalidOffsetException =>
            val startOffset = curr.baseOffset
            warn("Found invalid offset during recovery for log " + dir.getName +". Deleting the corrupt segment and " +
                 "creating an empty one with starting offset " + startOffset)
            curr.truncateTo(startOffset)
        }
      if(truncatedBytes > 0) {
        // LogSegment中有验证失败的消息
        warn("Corruption found in segment %d of log %s, truncating to offset %d.".format(curr.baseOffset, name, curr.nextOffset))
        unflushed.foreach(deleteSegment)
      }
    }
  }

LogManager还有createLog()、deleteLog()和getLog方法。
 

  def createLog(topicAndPartition: TopicAndPartition, config: LogConfig): Log = {
    logCreationOrDeletionLock synchronized {
      var log = logs.get(topicAndPartition)
      
      // check if the log has already been created in another thread
      if(log != null)
        return log
      
      // 选择Log最少的log目录
      val dataDir = nextLogDir()
      //创建Log对应的文件夹
      val dir = new File(dataDir, topicAndPartition.topic + "-" + topicAndPartition.partition)
      dir.mkdirs()
      //创建Log对象,同时创建activeSegment
      log = new Log(dir, 
                    config,
                    recoveryPoint = 0L,
                    scheduler,
                    time)
      logs.put(topicAndPartition, log)
      info("Created log for partition [%s,%d] in %s with properties {%s}."
           .format(topicAndPartition.topic, 
                   topicAndPartition.partition, 
                   dataDir.getAbsolutePath,
                   {import JavaConversions._; config.originals.mkString(", ")}))
      log
    }
  }
  private def nextLogDir(): File = {
    if(logDirs.size == 1) {//只有一个log目录
      logDirs(0)
    } else {
      // 指定多个log目录,计算数量
      val logCounts = allLogs.groupBy(_.dir.getParent).mapValues(_.size)
      val zeros = logDirs.map(dir => (dir.getPath, 0)).toMap
      var dirCounts = (zeros ++ logCounts).toBuffer
    
      // 选择Log最少的log目录
      val leastLoaded = dirCounts.sortBy(_._2).head
      new File(leastLoaded._1)
    }
  }
  
  /**
   *  Delete a log.
   */
  def deleteLog(topicAndPartition: TopicAndPartition) {
    var removedLog: Log = null
    logCreationOrDeletionLock synchronized {
      removedLog = logs.remove(topicAndPartition)
    }
    if (removedLog != null) {
      //停止对此目录进行压缩
      if (cleaner != null) {
        cleaner.abortCleaning(topicAndPartition)
        //更新cleaner-offset-checkpoint文件
        cleaner.updateCheckpoints(removedLog.dir.getParentFile)
      }
      removedLog.delete()//删除相关的日志文件、索引文件
      info("Deleted log for partition [%s,%d] in %s."
           .format(topicAndPartition.topic,
                   topicAndPartition.partition,
                   removedLog.dir.getAbsolutePath))
    }
  }

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值