Spark-BlockManagerMaster、BlockManagerMasterEndpoint 源码解析

abstract class BlockId

这个类是 Block的 抽象基类,一共有10个实现类 每个实现类 都会表明 这个BlockId属于什么BlockId。

//BlockId的 抽象基类 一共有10个实现类 每个实现类 都会表明 这个BlockId属于什么BlockId
sealed abstract class BlockId {
  /** A globally unique identifier for this Block. Can be used for ser/de. */
  def name: String

  // convenience methods
  def asRDDId: Option[RDDBlockId] = if (isRDD) Some(asInstanceOf[RDDBlockId]) else None
  def isRDD: Boolean = isInstanceOf[RDDBlockId]
  def isShuffle: Boolean = isInstanceOf[ShuffleBlockId]
  def isBroadcast: Boolean = isInstanceOf[BroadcastBlockId]

  override def toString: String = name
}

object BlockId

这个是 abstract class BlockId 的伴生类,里面有一些 类方法。

object BlockId {
  val RDD = "rdd_([0-9]+)_([0-9]+)".r
  val SHUFFLE = "shuffle_([0-9]+)_([0-9]+)_([0-9]+)".r
  val SHUFFLE_DATA = "shuffle_([0-9]+)_([0-9]+)_([0-9]+).data".r
  val SHUFFLE_INDEX = "shuffle_([0-9]+)_([0-9]+)_([0-9]+).index".r
  val BROADCAST = "broadcast_([0-9]+)([_A-Za-z0-9]*)".r
  val TASKRESULT = "taskresult_([0-9]+)".r
  val STREAM = "input-([0-9]+)-([0-9]+)".r
  val TEMP_LOCAL = "temp_local_([-A-Fa-f0-9]+)".r
  val TEMP_SHUFFLE = "temp_shuffle_([-A-Fa-f0-9]+)".r
  val TEST = "test_(.*)".r
  //根据 BlockId的name 返回对应的BlockId的实现类
  def apply(name: String): BlockId = name match {
    case RDD(rddId, splitIndex) =>
      RDDBlockId(rddId.toInt, splitIndex.toInt)
    case SHUFFLE(shuffleId, mapId, reduceId) =>
      ShuffleBlockId(shuffleId.toInt, mapId.toInt, reduceId.toInt)
    case SHUFFLE_DATA(shuffleId, mapId, reduceId) =>
      ShuffleDataBlockId(shuffleId.toInt, mapId.toInt, reduceId.toInt)
    case SHUFFLE_INDEX(shuffleId, mapId, reduceId) =>
      ShuffleIndexBlockId(shuffleId.toInt, mapId.toInt, reduceId.toInt)
    case BROADCAST(broadcastId, field) =>
      BroadcastBlockId(broadcastId.toLong, field.stripPrefix("_"))
    case TASKRESULT(taskId) =>
      TaskResultBlockId(taskId.toLong)
    case STREAM(streamId, uniqueId) =>
      StreamBlockId(streamId.toInt, uniqueId.toLong)
    case TEMP_LOCAL(uuid) =>
      TempLocalBlockId(UUID.fromString(uuid))
    case TEMP_SHUFFLE(uuid) =>
      TempShuffleBlockId(UUID.fromString(uuid))
    case TEST(value) =>
      TestBlockId(value)
    case _ =>
      throw new UnrecognizedBlockId(name)
  }
}

RDDBlockId

case class RDDBlockId(rddId: Int, splitIndex: Int) extends BlockId {
  override def name: String = "rdd_" + rddId + "_" + splitIndex
}

ShuffleBlockId

case class ShuffleBlockId(shuffleId: Int, mapId: Int, reduceId: Int) extends BlockId {
  override def name: String = "shuffle_" + shuffleId + "_" + mapId + "_" + reduceId
}

ShuffleDataBlockId

case class ShuffleDataBlockId(shuffleId: Int, mapId: Int, reduceId: Int) extends BlockId {
  override def name: String = "shuffle_" + shuffleId + "_" + mapId + "_" + reduceId + ".data"
}

ShuffleIndexBlockId

case class ShuffleIndexBlockId(shuffleId: Int, mapId: Int, reduceId: Int) extends BlockId {
  override def name: String = "shuffle_" + shuffleId + "_" + mapId + "_" + reduceId + ".index"
}

BroadcastBlockId

case class BroadcastBlockId(broadcastId: Long, field: String = "") extends BlockId {
  override def name: String = "broadcast_" + broadcastId + (if (field == "") "" else "_" + field)
}

TaskResultBlockId

case class TaskResultBlockId(taskId: Long) extends BlockId {
  override def name: String = "taskresult_" + taskId
}

StreamBlockId

case class StreamBlockId(streamId: Int, uniqueId: Long) extends BlockId {
  override def name: String = "input-" + streamId + "-" + uniqueId
}

TempLocalBlockId

private[spark] case class TempLocalBlockId(id: UUID) extends BlockId {
  override def name: String = "temp_local_" + id
}

TempShuffleBlockId

private[spark] case class TempShuffleBlockId(id: UUID) extends BlockId {
  override def name: String = "temp_shuffle_" + id
}

TestBlockId

private[spark] case class TestBlockId(id: String) extends BlockId {
  override def name: String = "test_" + id
}

BlockManagerId

这个类主要 代表一个 executor。所以这个类里面有 主机hostname,port等信息

class BlockManagerId private (
    private var executorId_ : String,
    private var host_ : String, //hostname 而不是 IP
    private var port_ : Int,
    private var topologyInfo_ : Option[String])
  extends Externalizable {

  private def this() = this(null, null, 0, None)  // For deserialization only

  def executorId: String = executorId_

  if (null != host_) {
    Utils.checkHost(host_)
    assert (port_ > 0)
  }

  def hostPort: String = {
    // DEBUG code
    Utils.checkHost(host)
    assert (port > 0)
    host + ":" + port
  }

  def host: String = host_

  def port: Int = port_

  def topologyInfo: Option[String] = topologyInfo_

  def isDriver: Boolean = { //获取是否 是 driver 节点
    executorId == SparkContext.DRIVER_IDENTIFIER ||
      executorId == SparkContext.LEGACY_DRIVER_IDENTIFIER
  }

  //序列化 BlockManagerId
  override def writeExternal(out: ObjectOutput): Unit = Utils.tryOrIOException {
    out.writeUTF(executorId_)
    out.writeUTF(host_)
    out.writeInt(port_)
    out.writeBoolean(topologyInfo_.isDefined)
    // we only write topologyInfo if we have it
    topologyInfo.foreach(out.writeUTF(_))
  }

  //反序列化 BlockManagerId
  override def readExternal(in: ObjectInput): Unit = Utils.tryOrIOException {
    executorId_ = in.readUTF()
    host_ = in.readUTF()
    port_ = in.readInt()
    val isTopologyInfoAvailable = in.readBoolean()
    topologyInfo_ = if (isTopologyInfoAvailable) Option(in.readUTF()) else None
  }

  @throws(classOf[IOException])
  private def readResolve(): Object = BlockManagerId.getCachedBlockManagerId(this)

  override def toString: String = s"BlockManagerId($executorId, $host, $port, $topologyInfo)"

  override def hashCode: Int =
    ((executorId.hashCode * 41 + host.hashCode) * 41 + port) * 41 + topologyInfo.hashCode

  override def equals(that: Any): Boolean = that match {
    case id: BlockManagerId =>
      executorId == id.executorId &&
        port == id.port &&
        host == id.host &&
        topologyInfo == id.topologyInfo
    case _ =>
      false
  }
}

object BlockManagerId

BlockManagerId class 的伴生对象。

private[spark] object BlockManagerId {

  /**
   * Returns a [[org.apache.spark.storage.BlockManagerId]] for the given configuration.
   *
   * @param execId ID of the executor.
   * @param host Host name of the block manager.
   * @param port Port of the block manager.
   * @param topologyInfo topology information for the blockmanager, if available
   *                     This can be network topology information for use while choosing peers
   *                     while replicating data blocks. More information available here:
   *                     [[org.apache.spark.storage.TopologyMapper]]
   * @return A new [[org.apache.spark.storage.BlockManagerId]].
   */
    //从缓存中获取 优先
  def apply(
      execId: String,
      host: String,
      port: Int,
      topologyInfo: Option[String] = None): BlockManagerId =
    getCachedBlockManagerId(new BlockManagerId(execId, host, port, topologyInfo))
  //从缓存中获取 优先
  def apply(in: ObjectInput): BlockManagerId = {
    val obj = new BlockManagerId()
    obj.readExternal(in)
    getCachedBlockManagerId(obj)
  }

  /**
   * The max cache size is hardcoded to 10000, since the size of a BlockManagerId
   * object is about 48B, the total memory cost should be below 1MB which is feasible.
   */
    //使用缓存系统 用来缓存BlockManagerId 最大容量为 10000
  val blockManagerIdCache = CacheBuilder.newBuilder()
    .maximumSize(10000)
    .build(new CacheLoader[BlockManagerId, BlockManagerId]() {
      override def load(id: BlockManagerId) = id
    })

  def getCachedBlockManagerId(id: BlockManagerId): BlockManagerId = {
    blockManagerIdCache.get(id)
  }
}

BlockStatus class & object

case class BlockStatus(storageLevel: StorageLevel, memSize: Long, diskSize: Long) {
  def isCached: Boolean = memSize + diskSize > 0
}

@DeveloperApi
object BlockStatus {
  def empty: BlockStatus = BlockStatus(StorageLevel.NONE, memSize = 0L, diskSize = 0L)
}

BlockManagerInfo

这个类的主要功能在于 保存了这个 executor上有哪些 block,所以这个类 里面会有 executor 的主机信息,比如BlockManagerId和_blocks的缓存信息。

private[spark] class BlockManagerInfo(
    val blockManagerId: BlockManagerId, //BlockManagerId 这个标志哪个executor 那台机器 哪个 port
    timeMs: Long,
    val maxOnHeapMem: Long, //机器的堆上大小
    val maxOffHeapMem: Long, //机器的堆外大小
    val slaveEndpoint: RpcEndpointRef) //从 endPoint
  extends Logging {

  val maxMem = maxOnHeapMem + maxOffHeapMem // 堆上大小 + 堆外大小

  private var _lastSeenMs: Long = timeMs // 更新时间
  private var _remainingMem: Long = maxMem // 堆上大小 + 堆外大小 remainingMem 剩余内存 初始时的 剩余内存 就是 = 堆上大小 + 堆外大小

  // Mapping from block id to its status.
  //这个executor 的所有 BlockId 和状态的 键值对
  private val _blocks = new JHashMap[BlockId, BlockStatus] //BlockId 和状态的 键值对

  // Cached blocks held by this BlockManager. This does not include broadcast blocks.
  //这个executor 的所有 BlockId 集合
  private val _cachedBlocks = new mutable.HashSet[BlockId] //BlockId

  def getStatus(blockId: BlockId): Option[BlockStatus] = Option(_blocks.get(blockId))

  //更新时间戳
  def updateLastSeenMs() {
    _lastSeenMs = System.currentTimeMillis()
  }

  //更新 一个block的信息 可能会 变化 storage 级别
  def updateBlockInfo(
      blockId: BlockId,
      storageLevel: StorageLevel,
      memSize: Long,
      diskSize: Long) {

    updateLastSeenMs() //更新时间戳

    val blockExists = _blocks.containsKey(blockId) // _blocks 是否已经 有这个 block
    var originalMemSize: Long = 0
    var originalDiskSize: Long = 0
    var originalLevel: StorageLevel = StorageLevel.NONE

    if (blockExists) { // _blocks 已经 有这个 block 的话 获取存在的信息
      // The block exists on the slave already.
      val blockStatus: BlockStatus = _blocks.get(blockId)
      originalLevel = blockStatus.storageLevel
      originalMemSize = blockStatus.memSize
      originalDiskSize = blockStatus.diskSize

      if (originalLevel.useMemory) {
        _remainingMem += originalMemSize  //剩余内存 更新
      }
    }

    if (storageLevel.isValid) { //目标的 storage 的级别 有效 就是 已经 保存在 内存或者磁盘上了
      /* isValid means it is either stored in-memory or on-disk.
       * The memSize here indicates the data size in or dropped from memory,
       * externalBlockStoreSize here indicates the data size in or dropped from externalBlockStore,
       * and the diskSize here indicates the data size in or dropped to disk.
       * They can be both larger than 0, when a block is dropped from memory to disk.
       * Therefore, a safe way to set BlockStatus is to set its info in accurate modes. */
      var blockStatus: BlockStatus = null
      if (storageLevel.useMemory) { //想要 保存在内存中
        blockStatus = BlockStatus(storageLevel, memSize = memSize, diskSize = 0)
        _blocks.put(blockId, blockStatus) //更新
        _remainingMem -= memSize //剩余内存 更新
        if (blockExists) {
          logInfo(s"Updated $blockId in memory on ${blockManagerId.hostPort}" +
            s" (current size: ${Utils.bytesToString(memSize)}," +
            s" original size: ${Utils.bytesToString(originalMemSize)}," +
            s" free: ${Utils.bytesToString(_remainingMem)})")
        } else {
          logInfo(s"Added $blockId in memory on ${blockManagerId.hostPort}" +
            s" (size: ${Utils.bytesToString(memSize)}," +
            s" free: ${Utils.bytesToString(_remainingMem)})")
        }
      }
      if (storageLevel.useDisk) {
        blockStatus = BlockStatus(storageLevel, memSize = 0, diskSize = diskSize)
        _blocks.put(blockId, blockStatus) //更新
        if (blockExists) {
          logInfo(s"Updated $blockId on disk on ${blockManagerId.hostPort}" +
            s" (current size: ${Utils.bytesToString(diskSize)}," +
            s" original size: ${Utils.bytesToString(originalDiskSize)})")
        } else {
          logInfo(s"Added $blockId on disk on ${blockManagerId.hostPort}" +
            s" (size: ${Utils.bytesToString(diskSize)})")
        }
      }
      if (!blockId.isBroadcast && blockStatus.isCached) {
        _cachedBlocks += blockId
      }
    }
    else if (blockExists) { //目标的storage 的级别 有效 就是 已经 保存在 内存或者磁盘上了 无效的话 就要 取消这个 block的缓存了
      // If isValid is not true, drop the block.
      _blocks.remove(blockId)
      _cachedBlocks -= blockId
      if (originalLevel.useMemory) { //打印 信息
        logInfo(s"Removed $blockId on ${blockManagerId.hostPort} in memory" +
          s" (size: ${Utils.bytesToString(originalMemSize)}," +
          s" free: ${Utils.bytesToString(_remainingMem)})")
      }
      if (originalLevel.useDisk) { //打印 信息
        logInfo(s"Removed $blockId on ${blockManagerId.hostPort} on disk" +
          s" (size: ${Utils.bytesToString(originalDiskSize)})")
      }
    }
  }

  //从这个 executor中清除 这个 blockId
  def removeBlock(blockId: BlockId) {
    if (_blocks.containsKey(blockId)) {
      _remainingMem += _blocks.get(blockId).memSize
      _blocks.remove(blockId)
    }
    _cachedBlocks -= blockId
  }
  // 获取 剩余内存
  def remainingMem: Long = _remainingMem
  // 获取 时间戳
  def lastSeenMs: Long = _lastSeenMs //最后更新时间
  // 获取所有的Block
  def blocks: JHashMap[BlockId, BlockStatus] = _blocks

  // This does not include broadcast blocks.
  //所有的 缓存的Block 不包括 广播的 block
  def cachedBlocks: collection.Set[BlockId] = _cachedBlocks

  override def toString: String = "BlockManagerInfo " + timeMs + " " + _remainingMem

  def clear() {
    _blocks.clear() //清空内部的_blocks map
  }
}

BlockManagerMasterEndpoint

这是一个 endPoint,并且只会在 driver 端 存在。用来管理 全局的 block,用来响应 driver和executor的 block和BlockManager 对block的管理。
BlockManagerMasterEndPoint 的receiveAndReply 是总的响应 方法入口。这个endPoint主要和 BlockManagerMaster 对象交互,而BlockManagerMaster对象会在 driver和executor都存在的。

//这是一个 endPoint,并且只会在 driver 端 存在
private[spark]
class BlockManagerMasterEndpoint(
    override val rpcEnv: RpcEnv,//在 driver 上 是 endPoint的直接使用,在 executor上则是 driver的 ref
    val isLocal: Boolean,
    conf: SparkConf,
    listenerBus: LiveListenerBus)
  extends ThreadSafeRpcEndpoint with Logging {

  // Mapping from block manager id to the block manager's information.
  //保存 BlockManagerId 和 BlockManagerInfo 的映射关系 BlockManagerInfo中已经存在与BlockId 的关系
  private val blockManagerInfo = new mutable.HashMap[BlockManagerId, BlockManagerInfo]

  // Mapping from executor ID to block manager ID.
  //mapping  executor ID 和 BlockManagerId 的关系
  private val blockManagerIdByExecutor = new mutable.HashMap[String, BlockManagerId]

  // Mapping from block id to the set of block managers that have the block.
  // 缓存Block与BlockManagerId的映射关系 即这些BlockId 在那些 executor 存在
  private val blockLocations = new JHashMap[BlockId, mutable.HashSet[BlockManagerId]]

  private val askThreadPool = ThreadUtils.newDaemonCachedThreadPool("block-manager-ask-thread-pool")
  private implicit val askExecutionContext = ExecutionContext.fromExecutorService(askThreadPool)

  /**
   * 通过反射创建的TopologyMapper类实例,用来记录节点对应的拓扑信息。
   * 默认的DefaultTopologyMapper是空实现,
   * 另外还有FileBasedTopologyMapper可以通过文件指定拓扑。它可能是方便今后来做机架感知等功能的
   */
  private val topologyMapper = {
    val topologyMapperClassName = conf.get(
      "spark.storage.replication.topologyMapper", classOf[DefaultTopologyMapper].getName)
    val clazz = Utils.classForName(topologyMapperClassName)
    val mapper =
      clazz.getConstructor(classOf[SparkConf]).newInstance(conf).asInstanceOf[TopologyMapper]
    logInfo(s"Using $topologyMapperClassName for getting topology information")
    mapper
  }

  val proactivelyReplicate = conf.get("spark.storage.replication.proactive", "false").toBoolean

  logInfo("BlockManagerMasterEndpoint up")

  //响应 请求处理
  override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = {
        //注册 BlockManagerInfo ,BlockManager
    case RegisterBlockManager(blockManagerId, maxOnHeapMemSize, maxOffHeapMemSize, slaveEndpoint) =>
      context.reply(register(blockManagerId, maxOnHeapMemSize, maxOffHeapMemSize, slaveEndpoint)) //回复 这个BlockManagerInfo
    // 更新BlockInfo, 可能会更新 Storage 的 级别
    case _updateBlockInfo @
        UpdateBlockInfo(blockManagerId, blockId, storageLevel, deserializedSize, size) =>
      context.reply(updateBlockInfo(blockManagerId, blockId, storageLevel, deserializedSize, size)) //返回值 是 Boolean 类型
      listenerBus.post(SparkListenerBlockUpdated(BlockUpdatedInfo(_updateBlockInfo))) //通知 消息总线
    //获取blockId 的位置
    case GetLocations(blockId) => // :BlockId
      context.reply(getLocations(blockId)) // Seq[BlockManagerId]
    // 这个blockId 的BlockLocationsAndStatus 信息
    case GetLocationsAndStatus(blockId) =>
      context.reply(getLocationsAndStatus(blockId))
    // 获取 blockIds 在那些 BlockManagerId 的序列
    case GetLocationsMultipleBlockIds(blockIds) =>
      context.reply(getLocationsMultipleBlockIds(blockIds))
  // 取得 blockManagerInfo 中的 key值 不在 driver上 和 不是 这个blockManagerId 的 序列
    case GetPeers(blockManagerId) =>
      context.reply(getPeers(blockManagerId))
    // 获取 executor 上的 slaveEndpoint
    case GetExecutorEndpointRef(executorId) =>
      context.reply(getExecutorEndpointRef(executorId))
    // 获取 BlockManagerId 的最大内存 和 剩余内存
    case GetMemoryStatus =>
      context.reply(memoryStatus)
    //获取 BlockManagerId 的最大内存 和 剩余内存 等信息的 StorageStatus array
    case GetStorageStatus =>
      context.reply(storageStatus)
    //获取这个 blockId  的 blockStatus, askSlaves 为 TRUE的话 从 info.slaveEndpoint 中获取
    case GetBlockStatus(blockId, askSlaves) =>
      context.reply(blockStatus(blockId, askSlaves))
    // askSlaves 是 TRUE的话 会从 blockManagerInfo 的 slaveEndpoint 中获取
    case GetMatchingBlockIds(filter, askSlaves) =>
      context.reply(getMatchingBlockIds(filter, askSlaves))
    //从 blockLocations 和 blockManagerInfo(这个可能又从节点)主从 中清除 这个id是rddId 的 RDDBlockId
    case RemoveRdd(rddId) =>
      context.reply(removeRdd(rddId))
    //remove ShuffleBlockId 的话 从BlockManagerInfo的 从节点移除即可
    case RemoveShuffle(shuffleId) =>
      context.reply(removeShuffle(shuffleId))
    //removeFromDriver 如果是true,从driver 和 executor 移除 这个ShuffleBlockId
    //false 的话 仅从 executor 的从节点 上移除 这个ShuffleBlockId
    case RemoveBroadcast(broadcastId, removeFromDriver) =>
      context.reply(removeBroadcast(broadcastId, removeFromDriver))
    //移除 block 从 slaves上
    case RemoveBlock(blockId) =>
      removeBlockFromWorkers(blockId)
      context.reply(true)
    //移除一个 executor
    case RemoveExecutor(execId) =>
      removeExecutor(execId)
      context.reply(true)
    //Stop BlockManagerMaster
    case StopBlockManagerMaster =>
      context.reply(true)
      stop() // 在 NettyEnv 中的 dispather 中 注销这个 EndPoint
    //如果 driver blockManagerInfo 中存在这个 blockManagerId 或者 不存在这个但是blockManagerId在driver节点上 都会返回true
    case BlockManagerHeartbeat(blockManagerId) =>
      context.reply(heartbeatReceived(blockManagerId))
    // 这个 executorId 的 BlockManagerId 是否缓存了 blocks
    case HasCachedBlocks(executorId) =>
      blockManagerIdByExecutor.get(executorId) match { // blockManagerIdByExecutor:mutable.HashMap[String, BlockManagerId]
        case Some(bm) => //如果存在 这个 executorId 的 键值对
          if (blockManagerInfo.contains(bm)) { // blockManagerInfo:mutable.HashMap[BlockManagerId, BlockManagerInfo]
            val bmInfo = blockManagerInfo(bm) // 且存在 这个blockManagerInfo 中存在
            context.reply(bmInfo.cachedBlocks.nonEmpty) // 且 BlockManagerInfo 中 缓存的 blocks 不为空 返回 true 其余返回false
          } else {
            context.reply(false)
          }
        case None => context.reply(false)
      }
  }

  //从 blockLocations 和 blockManagerInfo(这个可能又从节点)主从 中清除 这个id是rddId 的 RDDBlockId
  private def removeRdd(rddId: Int): Future[Seq[Int]] = {
    // First remove the metadata for the given RDD, and then asynchronously remove the blocks
    // from the slaves.

    // Find all blocks for the given RDD, remove the block from both blockLocations and
    // the blockManagerInfo that is tracking the blocks.
    val blocks = blockLocations.asScala.keys.flatMap(_.asRDDId).filter(_.rddId == rddId) //从blockLocations中 找出所有的 RDDBlockId 且 RDDBlockId的 id是rddId
    blocks.foreach { blockId =>
      val bms: mutable.HashSet[BlockManagerId] = blockLocations.get(blockId) //得到 保存这个 RDDBlockId 的所有的 executor
      bms.foreach(bm => blockManagerInfo.get(bm).foreach(_.removeBlock(blockId))) //从blockManagerInfo对象中 清除 RDDBlockId
      blockLocations.remove(blockId) //从 blockLocations 中移除 这个 RDDBlockId
    }

    // Ask the slaves to remove the RDD, and put the result in a sequence of Futures.
    // The dispatcher is used as an implicit argument into the Future sequence construction.
    val removeMsg = RemoveRdd(rddId)

    //从 从的blockManagerInfo中出清这个 RDDBlockId
    val futures = blockManagerInfo.values.map { bm =>
      bm.slaveEndpoint.ask[Int](removeMsg).recover {
        case e: IOException =>
          logWarning(s"Error trying to remove RDD $rddId from block manager ${bm.blockManagerId}",
            e)
          0 // zero blocks were removed
      }
    }.toSeq

    Future.sequence(futures)
  }

  //remove ShuffleBlockId 的话 从BlockManagerInfo的 从节点移除即可
  private def removeShuffle(shuffleId: Int): Future[Seq[Boolean]] = {
    // Nothing to do in the BlockManagerMasterEndpoint data structures
    val removeMsg = RemoveShuffle(shuffleId)
    Future.sequence(
      blockManagerInfo.values.map { bm =>
        bm.slaveEndpoint.ask[Boolean](removeMsg)
      }.toSeq
    )
  }

  /**
   * Delegate RemoveBroadcast messages to each BlockManager because the master may not notified
   * of all broadcast blocks. If removeFromDriver is false, broadcast blocks are only removed
   * from the executors, but not from the driver.
   */
  //removeFromDriver 如果是true,从driver 和 executor 移除 这个ShuffleBlockId
  //false 的话 仅从 executor 的从节点 上移除 这个ShuffleBlockId
  private def removeBroadcast(broadcastId: Long, removeFromDriver: Boolean): Future[Seq[Int]] = {
    val removeMsg = RemoveBroadcast(broadcastId, removeFromDriver)
    val requiredBlockManagers = blockManagerInfo.values.filter { info =>
      removeFromDriver || !info.blockManagerId.isDriver
      //removeFromDriver 如果是true,从driver 和 executor 移除 这个ShuffleBlockId
      //false 的话 仅从 executor 的从节点 上移除 这个ShuffleBlockId
    }
    val futures = requiredBlockManagers.map { bm =>
      bm.slaveEndpoint.ask[Int](removeMsg).recover {
        case e: IOException =>
          logWarning(s"Error trying to remove broadcast $broadcastId from block manager " +
            s"${bm.blockManagerId}", e)
          0 // zero blocks were removed
      }
    }.toSeq

    Future.sequence(futures)
  }

  //根据blockManagerId 移除BlockManager
  private def removeBlockManager(blockManagerId: BlockManagerId) {
    val info = blockManagerInfo(blockManagerId) //获取到 对应的 BlockManagerInfo 对象

    // Remove the block manager from blockManagerIdByExecutor.
    blockManagerIdByExecutor -= blockManagerId.executorId //清除 blockManagerIdByExecutor 的记录

    // Remove it from blockManagerInfo and remove all the blocks.
    blockManagerInfo.remove(blockManagerId) //清除 blockManagerInfo 的记录

    val iterator = info.blocks.keySet.iterator //遍历这个 blockManagerInfo 的 所有的 Block
    while (iterator.hasNext) {
      val blockId = iterator.next
      //blockManagerId有位置信息,可能存在多份在不同的executor 获取这个 blockId 的位置信息 即在那个 executor 上的 BlockManagerId信息
      val locations: mutable.Set[BlockManagerId] = blockLocations.get(blockId)
      locations -= blockManagerId //去掉这个 blockManagerId
      // De-register the block if none of the block managers have it. Otherwise, if pro-active
      // replication is enabled, and a block is either an RDD or a test block (the latter is used
      // for unit testing), we send a message to a randomly chosen executor location to replicate
      // the given block. Note that we ignore other block types (such as broadcast/shuffle blocks
      // etc.) as replication doesn't make much sense in that context.
      if (locations.size == 0) {
        blockLocations.remove(blockId) //如果没有一份这个 BlockId 信息,则从 blockLocations去清新这个信息
        logWarning(s"No more replicas available for $blockId !")
      } else if (proactivelyReplicate && (blockId.isRDD || blockId.isInstanceOf[TestBlockId])) { // spark.storage.replication.proactive一般是 false
        // As a heursitic, assume single executor failure to find out the number of replicas that
        // existed before failure
        val maxReplicas = locations.size + 1
        val i = (new Random(blockId.hashCode)).nextInt(locations.size)
        val blockLocations = locations.toSeq
        val candidateBMId = blockLocations(i)
        blockManagerInfo.get(candidateBMId).foreach { bm =>
          val remainingLocations = locations.toSeq.filter(bm => bm != candidateBMId)
          val replicateMsg = ReplicateBlock(blockId, remainingLocations, maxReplicas)
          bm.slaveEndpoint.ask[Boolean](replicateMsg)
        }
      }
    }
    //向消息总线 发送 SparkListenerBlockManagerRemoved 消息
    listenerBus.post(SparkListenerBlockManagerRemoved(System.currentTimeMillis(), blockManagerId))
    logInfo(s"Removing block manager $blockManagerId")

  }

  //移除一个 executor
  private def removeExecutor(execId: String) {
    logInfo("Trying to remove executor " + execId + " from BlockManagerMaster.")
    blockManagerIdByExecutor.get(execId).foreach(removeBlockManager) //从 blockManagerIdByExecutor mapping  executor ID 和 BlockManagerId 的关系
    //中 获取这个executor上的 所有的 BlockManagerId,一次移除 这些 BlockManager
  }

  /**
   * Return true if the driver knows about the given block manager. Otherwise, return false,
   * indicating that the block manager should re-register.
   */
    //如果 driver blockManagerInfo 中存在这个 blockManagerId 或者 不存在这个但是blockManagerId在driver节点上 都会返回true
  private def heartbeatReceived(blockManagerId: BlockManagerId): Boolean = {
    if (!blockManagerInfo.contains(blockManagerId)) {
      blockManagerId.isDriver && !isLocal
    } else {
      blockManagerInfo(blockManagerId).updateLastSeenMs() //更新这个 blockManagerInfo的 时间戳
      true
    }
  }

  // Remove a block from the slaves that have it. This can only be used to remove
  // blocks that the master knows about.
  //移除 block 从 slaves上
  private def removeBlockFromWorkers(blockId: BlockId) {
    val locations: mutable.Set[BlockManagerId] = blockLocations.get(blockId)
    if (locations != null) {
      locations.foreach { blockManagerId: BlockManagerId =>
        val blockManager = blockManagerInfo.get(blockManagerId)
        if (blockManager.isDefined) {
          // Remove the block from the slave's BlockManager.
          // Doesn't actually wait for a confirmation and the message might get lost.
          // If message loss becomes frequent, we should add retry logic here.
          blockManager.get.slaveEndpoint.ask[Boolean](RemoveBlock(blockId))
        }
      }
    }
  }

  // Return a map from the block manager id to max memory and remaining memory.
  //返回 BlockManagerId 的最大内存 和 剩余内存
  private def memoryStatus: Map[BlockManagerId, (Long, Long)] = {
    blockManagerInfo.map { case(blockManagerId, info) =>
      (blockManagerId, (info.maxMem, info.remainingMem))
    }.toMap
  }
  //返回 BlockManagerId 的最大内存 和 剩余内存 等信息的 StorageStatus array
  private def storageStatus: Array[StorageStatus] = {
    blockManagerInfo.map { case (blockManagerId, info) =>
      new StorageStatus(blockManagerId, info.maxMem, Some(info.maxOnHeapMem),
        Some(info.maxOffHeapMem), info.blocks.asScala)
    }.toArray
  }

  /**
   * Return the block's status for all block managers, if any. NOTE: This is a
   * potentially expensive operation and should only be used for testing.
   *
   * If askSlaves is true, the master queries each block manager for the most updated block
   * statuses. This is useful when the master is not informed of the given block by all block
   * managers.
   */
    //获取这个 blockId  的 blockStatus, askSlaves 为 TRUE的话 从 info.slaveEndpoint 中获取
  private def blockStatus(
      blockId: BlockId,
      askSlaves: Boolean): Map[BlockManagerId, Future[Option[BlockStatus]]] = {
    val getBlockStatus = GetBlockStatus(blockId)
    /*
     * Rather than blocking on the block status query, master endpoint should simply return
     * Futures to avoid potential deadlocks. This can arise if there exists a block manager
     * that is also waiting for this master endpoint's response to a previous message.
     */
    blockManagerInfo.values.map { info =>
      val blockStatusFuture =
        if (askSlaves) {
          info.slaveEndpoint.ask[Option[BlockStatus]](getBlockStatus)
        } else {
          Future { info.getStatus(blockId) }
        }
      (info.blockManagerId, blockStatusFuture)
    }.toMap
  }

  /**
   * Return the ids of blocks present in all the block managers that match the given filter.
   * NOTE: This is a potentially expensive operation and should only be used for testing.
   *
   * If askSlaves is true, the master queries each block manager for the most updated block
   * statuses. This is useful when the master is not informed of the given block by all block
   * managers.
   */
    // askSlaves 是 TRUE的话 会从 blockManagerInfo 的 slaveEndpoint 中获取
  private def getMatchingBlockIds(
      filter: BlockId => Boolean,
      askSlaves: Boolean): Future[Seq[BlockId]] = {
    val getMatchingBlockIds = GetMatchingBlockIds(filter) //case class GetMatchingBlockIds(filter: BlockId => Boolean, askSlaves: Boolean = true extends ToBlockManagerMaster
    Future.sequence(
      blockManagerInfo.values.map { info => // blockManagerInfo =  mutable.HashMap[BlockManagerId, BlockManagerInfo]
        val future =
          if (askSlaves) {
            info.slaveEndpoint.ask[Seq[BlockId]](getMatchingBlockIds)
          } else {
            Future { info.blocks.asScala.keys.filter(filter).toSeq }
          }
        future
      }
    ).map(_.flatten.toSeq)
  }

  /**
   * Returns the BlockManagerId with topology information populated, if available.
   */
    //注册 BlockManagerInfo
  private def register(
      idWithoutTopologyInfo: BlockManagerId, // 此 executor 的 host port
      maxOnHeapMemSize: Long, //此 executor 堆上最大内存
      maxOffHeapMemSize: Long, //此 executor 堆外最大内存
      slaveEndpoint: RpcEndpointRef): BlockManagerId = { // 从 endPoint
    // the dummy id is not expected to contain the topology information.
    // we get that info here and respond back with a more fleshed out block manager id
    val id = BlockManagerId( //组装 BlockManagerId 对象
      idWithoutTopologyInfo.executorId,
      idWithoutTopologyInfo.host,
      idWithoutTopologyInfo.port,
      topologyMapper.getTopologyForHost(idWithoutTopologyInfo.host)) // 最后一个参数 一般是 None

    val time = System.currentTimeMillis()
    if (!blockManagerInfo.contains(id)) { // 只有其中 blockManagerID 没有这个 BlockManagerId 才会注册这个BlockManagerId
      blockManagerIdByExecutor.get(id.executorId) match { // blockManagerIdByExecutor mutable.HashMap[String, BlockManagerId]
        case Some(oldId) =>
          // A block manager of the same executor already exists, so remove it (assumed dead)
          logError("Got two different block manager registrations on same executor - "
              + s" will replace old one $oldId with new one $id")
          removeExecutor(id.executorId)
        case None =>
      }
      logInfo("Registering block manager %s with %s RAM, %s".format(
        id.hostPort, Utils.bytesToString(maxOnHeapMemSize + maxOffHeapMemSize), id))

      blockManagerIdByExecutor(id.executorId) = id //注册新的 BlockManagerId

      blockManagerInfo(id) = new BlockManagerInfo( //blockManagerInfo  [BlockManagerId, BlockManagerInfo] 注册新的 BlockManagerInfo
        id, System.currentTimeMillis(), maxOnHeapMemSize, maxOffHeapMemSize, slaveEndpoint)
    }
      //通知 消息总线 SparkListenerBlockManagerAdded
    listenerBus.post(SparkListenerBlockManagerAdded(time, id, maxOnHeapMemSize + maxOffHeapMemSize,
        Some(maxOnHeapMemSize), Some(maxOffHeapMemSize)))
    id
  }

  //更新BlockInfo, 可能会更新 Storage 的 级别
  private def updateBlockInfo(
      blockManagerId: BlockManagerId,
      blockId: BlockId,
      storageLevel: StorageLevel,
      memSize: Long,
      diskSize: Long): Boolean = {
    // blockManagerInfo 中不包含这个 blockManagerId
    if (!blockManagerInfo.contains(blockManagerId)) { // blockManagerInfo = mutable.HashMap[BlockManagerId, BlockManagerInfo]
      if (blockManagerId.isDriver && !isLocal) { //这个blockManagerId 在 driver 上 且是 cluster 模式 返回true
        // We intentionally do not register the master (except in local mode),
        // so we should not indicate failure.
        return true
      } else {
        return false
      }
    }

    if (blockId == null) {
      blockManagerInfo(blockManagerId).updateLastSeenMs()
      return true
    }

    blockManagerInfo(blockManagerId).updateBlockInfo(blockId, storageLevel, memSize, diskSize) //在 blockManagerInfo 中更新 BlockInfo

    var locations: mutable.HashSet[BlockManagerId] = null
    if (blockLocations.containsKey(blockId)) {
      locations = blockLocations.get(blockId)
    } else {
      locations = new mutable.HashSet[BlockManagerId]
      blockLocations.put(blockId, locations)
    }

    if (storageLevel.isValid) {
      locations.add(blockManagerId)
    } else {
      locations.remove(blockManagerId)
    }

    // Remove the block from master tracking if it has been removed on all slaves.
    if (locations.size == 0) {
      blockLocations.remove(blockId)
    }
    true
  }

  //获取 这个 blockId 在那个 BlockManagerId 上
  private def getLocations(blockId: BlockId): Seq[BlockManagerId] = {
    if (blockLocations.containsKey(blockId)) blockLocations.get(blockId).toSeq else Seq.empty
  }

  //这个blockId 的BlockLocationsAndStatus 信息
  private def getLocationsAndStatus(blockId: BlockId): Option[BlockLocationsAndStatus] = {
    val locations: Seq[BlockManagerId] = Option(blockLocations.get(blockId)).map(_.toSeq).getOrElse(Seq.empty)
    val status = locations.headOption.flatMap { bmId => blockManagerInfo(bmId).getStatus(blockId) }

    if (locations.nonEmpty && status.isDefined) {
      Some(BlockLocationsAndStatus(locations, status.get))
    } else {
      None
    }
  }

  //获取 blockIds 在那些 BlockManagerId 的序列
  private def getLocationsMultipleBlockIds(
      blockIds: Array[BlockId]): IndexedSeq[Seq[BlockManagerId]] = {
    blockIds.map(blockId => getLocations(blockId))
  }

  /** Get the list of the peers of the given block manager */
    //取得 blockManagerInfo 中的 key值 不在 driver上 和 不是 这个blockManagerId 的 序列
  private def getPeers(blockManagerId: BlockManagerId): Seq[BlockManagerId] = {
    val blockManagerIds = blockManagerInfo.keySet //blockManagerInfo =  mutable.HashMap[BlockManagerId, BlockManagerInfo]
    if (blockManagerIds.contains(blockManagerId)) {
      blockManagerIds.filterNot { _.isDriver }.filterNot { _ == blockManagerId }.toSeq
    } else {
      Seq.empty
    }
  }

  /**
   * Returns an [[RpcEndpointRef]] of the [[BlockManagerSlaveEndpoint]] for sending RPC messages.
   */
    //获取 executor 上的 slaveEndpoint
  private def getExecutorEndpointRef(executorId: String): Option[RpcEndpointRef] = {
    for (
      blockManagerId <- blockManagerIdByExecutor.get(executorId);
      info <- blockManagerInfo.get(blockManagerId)
    ) yield {
      info.slaveEndpoint
    }
  }
 //stop askThreadPool
  override def onStop(): Unit = {
    askThreadPool.shutdownNow()
  }
}

BlockManagerMaster

这个类的 对象在 driver 和executor端都会存在。只不过driverEndpoint 在 driver 端的话是 endPoint, 在 executor 端是一个 driver 的ref 。 进一步封装了 driver或者executor 和 BlockManagerMasterEndPoint的交互细节。

/**
 * BlockManagerMaster 在 driver 和 executor 端都存在
 * @param driverEndpoint 在 driver 端的话是 endPoint, 在 executor 端是一个 driver 的ref
 *                       就是 BlockManagerMasterEndpoint 这个 endPoint
 * @param conf
 * @param isDriver 在 driver 端的话是 true,  在 executor 端是 false
 */
private[spark]
class BlockManagerMaster(
    var driverEndpoint: RpcEndpointRef, //在 driver 上 是 endPoint的直接使用,在 executor上则是 driver的 ref
    conf: SparkConf,
    isDriver: Boolean)
  extends Logging {

  val timeout = RpcUtils.askRpcTimeout(conf) //RPC 通信超时时间

  /** Remove a dead executor from the driver endpoint. This is only called on the driver side. */
    //这个方法只限于 driver 端调用
  def removeExecutor(execId: String) { //向 BlockManagerMasterEndPoint 移除一个 executor
    tell(RemoveExecutor(execId))
    logInfo("Removed " + execId + " successfully in removeExecutor")
  }

  /** Request removal of a dead executor from the driver endpoint.
   *  This is only called on the driver side. Non-blocking
   */
  //异步 移除一个 executor
  def removeExecutorAsync(execId: String) {//向 BlockManagerMasterEndPoint 移除一个 executor
    driverEndpoint.ask[Boolean](RemoveExecutor(execId))
    logInfo("Removal of executor " + execId + " requested")
  }

  /**
   * Register the BlockManager's id with the driver. The input BlockManagerId does not contain
   * topology information. This information is obtained from the master and we respond with an
   * updated BlockManagerId fleshed out with this information.
   */
  def registerBlockManager(
      blockManagerId: BlockManagerId,
      maxOnHeapMemSize: Long,
      maxOffHeapMemSize: Long,
      slaveEndpoint: RpcEndpointRef): BlockManagerId = {
    logInfo(s"Registering BlockManager $blockManagerId")
    val updatedId = driverEndpoint.askSync[BlockManagerId](
      RegisterBlockManager(blockManagerId, maxOnHeapMemSize, maxOffHeapMemSize, slaveEndpoint)) //向 BlockManagerMasterEndPoint 注册 BlockManagerInfo
    logInfo(s"Registered BlockManager $updatedId")
    updatedId
  }

  def updateBlockInfo(
      blockManagerId: BlockManagerId,
      blockId: BlockId,
      storageLevel: StorageLevel,
      memSize: Long,
      diskSize: Long): Boolean = {
    val res = driverEndpoint.askSync[Boolean](
      UpdateBlockInfo(blockManagerId, blockId, storageLevel, memSize, diskSize)) //通知BlockManagerMasterEndPoint  UpdateBlockInfo
    logDebug(s"Updated info of block $blockId")
    res
  }

  /** Get locations of the blockId from the driver */
  def getLocations(blockId: BlockId): Seq[BlockManagerId] = {
    driverEndpoint.askSync[Seq[BlockManagerId]](GetLocations(blockId)) //向BlockManagerMasterEndPoint 获取blockId 的位置
  }

  /** Get locations as well as status of the blockId from the driver */
  def getLocationsAndStatus(blockId: BlockId): Option[BlockLocationsAndStatus] = { // 向BlockManagerMasterEndPoint 这个blockId 的BlockLocationsAndStatus 信息
    driverEndpoint.askSync[Option[BlockLocationsAndStatus]](
      GetLocationsAndStatus(blockId))
  }

  /** Get locations of multiple blockIds from the driver */
  def getLocations(blockIds: Array[BlockId]): IndexedSeq[Seq[BlockManagerId]] = { // 向BlockManagerMasterEndPoint 获取 blockIds 在那些 BlockManagerId 的序列
    driverEndpoint.askSync[IndexedSeq[Seq[BlockManagerId]]](
      GetLocationsMultipleBlockIds(blockIds))
  }

  /**
   * Check if block manager master has a block. Note that this can be used to check for only
   * those blocks that are reported to block manager master.
   */
  def contains(blockId: BlockId): Boolean = {
    !getLocations(blockId).isEmpty
  }

  /** Get ids of other nodes in the cluster from the driver */
  def getPeers(blockManagerId: BlockManagerId): Seq[BlockManagerId] = { // 向BlockManagerMasterEndPoint 取得 blockManagerInfo 中的 key值 不在 driver上 和 不是 这个blockManagerId 的 序列
    driverEndpoint.askSync[Seq[BlockManagerId]](GetPeers(blockManagerId))
  }

  def getExecutorEndpointRef(executorId: String): Option[RpcEndpointRef] = { // 向BlockManagerMasterEndPoint 获取 executor 上的 slaveEndpoint
    driverEndpoint.askSync[Option[RpcEndpointRef]](GetExecutorEndpointRef(executorId))
  }

  /**
   * Remove a block from the slaves that have it. This can only be used to remove
   * blocks that the driver knows about.
   */
  def removeBlock(blockId: BlockId) { // 向BlockManagerMasterEndPoint 从移除 block 从 slaves上
    driverEndpoint.askSync[Boolean](RemoveBlock(blockId))
  }

  /** Remove all blocks belonging to the given RDD. */
  def removeRdd(rddId: Int, blocking: Boolean) { // 向BlockManagerMasterEndPoint 从 blockLocations 和 blockManagerInfo(这个可能又从节点)主从 中清除 这个id是rddId 的 RDDBlockId
    val future = driverEndpoint.askSync[Future[Seq[Int]]](RemoveRdd(rddId))
    future.failed.foreach(e =>
      logWarning(s"Failed to remove RDD $rddId - ${e.getMessage}", e)
    )(ThreadUtils.sameThread)
    if (blocking) {
      timeout.awaitResult(future)
    }
  }

  /** Remove all blocks belonging to the given shuffle. */
  def removeShuffle(shuffleId: Int, blocking: Boolean) { // 向BlockManagerMasterEndPoint remove ShuffleBlockId 的话 从BlockManagerInfo的 从节点移除即可
    val future = driverEndpoint.askSync[Future[Seq[Boolean]]](RemoveShuffle(shuffleId))
    future.failed.foreach(e =>
      logWarning(s"Failed to remove shuffle $shuffleId - ${e.getMessage}", e)
    )(ThreadUtils.sameThread)
    if (blocking) {
      timeout.awaitResult(future)
    }
  }

  /** Remove all blocks belonging to the given broadcast. */
  def removeBroadcast(broadcastId: Long, removeFromMaster: Boolean, blocking: Boolean) {
    val future = driverEndpoint.askSync[Future[Seq[Int]]]( // 向BlockManagerMasterEndPoint removeFromDriver 如果是true,从driver 和 executor 移除 这个ShuffleBlockId
      RemoveBroadcast(broadcastId, removeFromMaster))      // false 的话 仅从 executor 的从节点 上移除 这个ShuffleBlockId
    future.failed.foreach(e =>
      logWarning(s"Failed to remove broadcast $broadcastId" +
        s" with removeFromMaster = $removeFromMaster - ${e.getMessage}", e)
    )(ThreadUtils.sameThread)
    if (blocking) {
      timeout.awaitResult(future)
    }
  }

  /**
   * Return the memory status for each block manager, in the form of a map from
   * the block manager's id to two long values. The first value is the maximum
   * amount of memory allocated for the block manager, while the second is the
   * amount of remaining memory.
   */
  def getMemoryStatus: Map[BlockManagerId, (Long, Long)] = { //  向BlockManagerMasterEndPoint 获取 BlockManagerId 的最大内存 和 剩余内存
    driverEndpoint.askSync[Map[BlockManagerId, (Long, Long)]](GetMemoryStatus)
  }

  def getStorageStatus: Array[StorageStatus] = { // 向BlockManagerMasterEndPoint 获取 BlockManagerId 的最大内存 和 剩余内存 等信息的 StorageStatus array
    driverEndpoint.askSync[Array[StorageStatus]](GetStorageStatus)
  }

  /**
   * Return the block's status on all block managers, if any. NOTE: This is a
   * potentially expensive operation and should only be used for testing.
   *
   * If askSlaves is true, this invokes the master to query each block manager for the most
   * updated block statuses. This is useful when the master is not informed of the given block
   * by all block managers.
   */
  def getBlockStatus(
      blockId: BlockId,
      askSlaves: Boolean = true): Map[BlockManagerId, BlockStatus] = {
    val msg = GetBlockStatus(blockId, askSlaves)
    /*
     * To avoid potential deadlocks, the use of Futures is necessary, because the master endpoint
     * should not block on waiting for a block manager, which can in turn be waiting for the
     * master endpoint for a response to a prior message.
     */
    val response = driverEndpoint.
      askSync[Map[BlockManagerId, Future[Option[BlockStatus]]]](msg) // 向BlockManagerMasterEndPoint 获取这个 blockId  的 blockStatus, askSlaves 为 TRUE的话 从 info.slaveEndpoint 中获取
    val (blockManagerIds, futures) = response.unzip
    implicit val sameThread = ThreadUtils.sameThread
    val cbf =
      implicitly[
        CanBuildFrom[Iterable[Future[Option[BlockStatus]]],
        Option[BlockStatus],
        Iterable[Option[BlockStatus]]]]
    val blockStatus = timeout.awaitResult(
      Future.sequence[Option[BlockStatus], Iterable](futures)(cbf, ThreadUtils.sameThread))
    if (blockStatus == null) {
      throw new SparkException("BlockManager returned null for BlockStatus query: " + blockId)
    }
    blockManagerIds.zip(blockStatus).flatMap { case (blockManagerId, status) =>
      status.map { s => (blockManagerId, s) }
    }.toMap
  }

  /**
   * Return a list of ids of existing blocks such that the ids match the given filter. NOTE: This
   * is a potentially expensive operation and should only be used for testing.
   *
   * If askSlaves is true, this invokes the master to query each block manager for the most
   * updated block statuses. This is useful when the master is not informed of the given block
   * by all block managers.
   */
  def getMatchingBlockIds(
      filter: BlockId => Boolean,
      askSlaves: Boolean): Seq[BlockId] = {
    val msg = GetMatchingBlockIds(filter, askSlaves)
    val future = driverEndpoint.askSync[Future[Seq[BlockId]]](msg) // 向BlockManagerMasterEndPoint askSlaves 是 TRUE的话 会从 blockManagerInfo 的 slaveEndpoint 中获取
    timeout.awaitResult(future)
  }

  /**
   * Find out if the executor has cached blocks. This method does not consider broadcast blocks,
   * since they are not reported the master.
   */
  def hasCachedBlocks(executorId: String): Boolean = { // 向BlockManagerMasterEndPoint  这个 executorId 的 BlockManagerId 是否缓存了 blocks
    driverEndpoint.askSync[Boolean](HasCachedBlocks(executorId))
  }

  /** Stop the driver endpoint, called only on the Spark driver node */
    // 只能在 driver 端 调用这个 方法
  def stop() {
    if (driverEndpoint != null && isDriver) { // 只能在 driver 端 且 driverEndpoint 不为空的时候  调用这个 方法
      tell(StopBlockManagerMaster) // 在 NettyEnv 中的 dispather 中 注销这个 EndPoint(BlockManagerMasterEndpoint)
      driverEndpoint = null
      logInfo("BlockManagerMaster stopped")
    }
  }

  /** Send a one-way message to the master endpoint, to which we expect it to reply with true. */
  private def tell(message: Any) {
    if (!driverEndpoint.askSync[Boolean](message)) {
      throw new SparkException("BlockManagerMasterEndpoint returned false, expected true.")
    }
  }

}

private[spark] object BlockManagerMaster {
  val DRIVER_ENDPOINT_NAME = "BlockManagerMaster"
}

总结

在 SparkEnv中是怎么使用的BlockManagerMaster的啦,如下:

/**
     * driver 节点 的话 install 的是  BlockManagerMaster 的 endPoint
     * executor 节点的话 BlockManagerMaster 的 driver 的 ref
     */
    val blockManagerMaster = new BlockManagerMaster(
      registerOrLookupEndpoint(
      BlockManagerMaster.DRIVER_ENDPOINT_NAME, // BlockManagerMaster.DRIVER_ENDPOINT_NAME = BlockManagerMaster
      new BlockManagerMasterEndpoint(rpcEnv, isLocal, conf, listenerBus)),
      //这里如果是driver的话 BlockManagerMasterEndpoint对象是有用的, executor的话 这个对象就是 不可达的对象 可能直接GC回收掉的
      conf, isDriver)
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
Spark SQL 是 Apache Spark 生态系统中的一个组件,它提供了用于处理结构化数据的 API。Spark SQL 的执行源码解读主要包括以下几个方面: 1. 解析器(Parser):Spark SQL 使用开源项目 ANTLR 生成的解析器来将 SQL 语句解析为抽象语法树(AST)。该解析器支持 ANSI SQL 标准,可以将 SQL 语句转换为内部的逻辑计划。 2. 优化器(Optimizer):Spark SQL 使用 Catalyst 优化器来对 AST 进行一系列的优化操作。其中包括常量折叠、谓词下推、投影下推等优化规则。通过这些优化规则,Spark SQL 可以将逻辑计划转换为更高效的物理计划。 3. 物理计划生成(Physical Plan Generation):一旦逻辑计划优化完成,Spark SQL 就会根据数据的存储格式和分布式计算模型生成物理计划。这个过程包括将逻辑计划转换为数据流图、选择最优的执行策略(如 Shuffle 与 Broadcast Join)等。 4. 执行引擎(Execution Engine):Spark SQL 的执行引擎负责将物理计划转换为可执行的任务,并在集群上执行这些任务。Spark SQL 支持两种执行模式:本地模式和集群模式。在本地模式下,Spark SQL 会在单个节点上执行任务;而在集群模式下,Spark SQL 会将任务分布到多个节点上进行并行计算。 5. 数据存取(Data Access):Spark SQL 支持多种数据源的读取和写入操作。它可以直接读取 Hadoop 分布式文件系统(HDFS)上的数据,还可以通过适配器支持其他数据存储系统,如 Apache Hive、Apache HBase、MySQL 等。 总的来说,Spark SQL 的执行源码解读涵盖了解析器、优化器、物理计划生成、执行引擎以及数据存取等方面的内容。通过深入了解这些内容,可以更好地理解 Spark SQL 的内部工作原理,并对其进行二次开发和优化。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值