Spark-BlockManagerMaster、BlockManagerMasterEndpoint 源码解析
abstract class BlockId
这个类是 Block的 抽象基类,一共有10个实现类 每个实现类 都会表明 这个BlockId属于什么BlockId。
//BlockId的 抽象基类 一共有10个实现类 每个实现类 都会表明 这个BlockId属于什么BlockId
sealed abstract class BlockId {
/** A globally unique identifier for this Block. Can be used for ser/de. */
def name: String
// convenience methods
def asRDDId: Option[RDDBlockId] = if (isRDD) Some(asInstanceOf[RDDBlockId]) else None
def isRDD: Boolean = isInstanceOf[RDDBlockId]
def isShuffle: Boolean = isInstanceOf[ShuffleBlockId]
def isBroadcast: Boolean = isInstanceOf[BroadcastBlockId]
override def toString: String = name
}
object BlockId
这个是 abstract class BlockId 的伴生类,里面有一些 类方法。
object BlockId {
val RDD = "rdd_([0-9]+)_([0-9]+)".r
val SHUFFLE = "shuffle_([0-9]+)_([0-9]+)_([0-9]+)".r
val SHUFFLE_DATA = "shuffle_([0-9]+)_([0-9]+)_([0-9]+).data".r
val SHUFFLE_INDEX = "shuffle_([0-9]+)_([0-9]+)_([0-9]+).index".r
val BROADCAST = "broadcast_([0-9]+)([_A-Za-z0-9]*)".r
val TASKRESULT = "taskresult_([0-9]+)".r
val STREAM = "input-([0-9]+)-([0-9]+)".r
val TEMP_LOCAL = "temp_local_([-A-Fa-f0-9]+)".r
val TEMP_SHUFFLE = "temp_shuffle_([-A-Fa-f0-9]+)".r
val TEST = "test_(.*)".r
//根据 BlockId的name 返回对应的BlockId的实现类
def apply(name: String): BlockId = name match {
case RDD(rddId, splitIndex) =>
RDDBlockId(rddId.toInt, splitIndex.toInt)
case SHUFFLE(shuffleId, mapId, reduceId) =>
ShuffleBlockId(shuffleId.toInt, mapId.toInt, reduceId.toInt)
case SHUFFLE_DATA(shuffleId, mapId, reduceId) =>
ShuffleDataBlockId(shuffleId.toInt, mapId.toInt, reduceId.toInt)
case SHUFFLE_INDEX(shuffleId, mapId, reduceId) =>
ShuffleIndexBlockId(shuffleId.toInt, mapId.toInt, reduceId.toInt)
case BROADCAST(broadcastId, field) =>
BroadcastBlockId(broadcastId.toLong, field.stripPrefix("_"))
case TASKRESULT(taskId) =>
TaskResultBlockId(taskId.toLong)
case STREAM(streamId, uniqueId) =>
StreamBlockId(streamId.toInt, uniqueId.toLong)
case TEMP_LOCAL(uuid) =>
TempLocalBlockId(UUID.fromString(uuid))
case TEMP_SHUFFLE(uuid) =>
TempShuffleBlockId(UUID.fromString(uuid))
case TEST(value) =>
TestBlockId(value)
case _ =>
throw new UnrecognizedBlockId(name)
}
}
RDDBlockId
case class RDDBlockId(rddId: Int, splitIndex: Int) extends BlockId {
override def name: String = "rdd_" + rddId + "_" + splitIndex
}
ShuffleBlockId
case class ShuffleBlockId(shuffleId: Int, mapId: Int, reduceId: Int) extends BlockId {
override def name: String = "shuffle_" + shuffleId + "_" + mapId + "_" + reduceId
}
ShuffleDataBlockId
case class ShuffleDataBlockId(shuffleId: Int, mapId: Int, reduceId: Int) extends BlockId {
override def name: String = "shuffle_" + shuffleId + "_" + mapId + "_" + reduceId + ".data"
}
ShuffleIndexBlockId
case class ShuffleIndexBlockId(shuffleId: Int, mapId: Int, reduceId: Int) extends BlockId {
override def name: String = "shuffle_" + shuffleId + "_" + mapId + "_" + reduceId + ".index"
}
BroadcastBlockId
case class BroadcastBlockId(broadcastId: Long, field: String = "") extends BlockId {
override def name: String = "broadcast_" + broadcastId + (if (field == "") "" else "_" + field)
}
TaskResultBlockId
case class TaskResultBlockId(taskId: Long) extends BlockId {
override def name: String = "taskresult_" + taskId
}
StreamBlockId
case class StreamBlockId(streamId: Int, uniqueId: Long) extends BlockId {
override def name: String = "input-" + streamId + "-" + uniqueId
}
TempLocalBlockId
private[spark] case class TempLocalBlockId(id: UUID) extends BlockId {
override def name: String = "temp_local_" + id
}
TempShuffleBlockId
private[spark] case class TempShuffleBlockId(id: UUID) extends BlockId {
override def name: String = "temp_shuffle_" + id
}
TestBlockId
private[spark] case class TestBlockId(id: String) extends BlockId {
override def name: String = "test_" + id
}
BlockManagerId
这个类主要 代表一个 executor。所以这个类里面有 主机hostname,port等信息
class BlockManagerId private (
private var executorId_ : String,
private var host_ : String, //hostname 而不是 IP
private var port_ : Int,
private var topologyInfo_ : Option[String])
extends Externalizable {
private def this() = this(null, null, 0, None) // For deserialization only
def executorId: String = executorId_
if (null != host_) {
Utils.checkHost(host_)
assert (port_ > 0)
}
def hostPort: String = {
// DEBUG code
Utils.checkHost(host)
assert (port > 0)
host + ":" + port
}
def host: String = host_
def port: Int = port_
def topologyInfo: Option[String] = topologyInfo_
def isDriver: Boolean = { //获取是否 是 driver 节点
executorId == SparkContext.DRIVER_IDENTIFIER ||
executorId == SparkContext.LEGACY_DRIVER_IDENTIFIER
}
//序列化 BlockManagerId
override def writeExternal(out: ObjectOutput): Unit = Utils.tryOrIOException {
out.writeUTF(executorId_)
out.writeUTF(host_)
out.writeInt(port_)
out.writeBoolean(topologyInfo_.isDefined)
// we only write topologyInfo if we have it
topologyInfo.foreach(out.writeUTF(_))
}
//反序列化 BlockManagerId
override def readExternal(in: ObjectInput): Unit = Utils.tryOrIOException {
executorId_ = in.readUTF()
host_ = in.readUTF()
port_ = in.readInt()
val isTopologyInfoAvailable = in.readBoolean()
topologyInfo_ = if (isTopologyInfoAvailable) Option(in.readUTF()) else None
}
@throws(classOf[IOException])
private def readResolve(): Object = BlockManagerId.getCachedBlockManagerId(this)
override def toString: String = s"BlockManagerId($executorId, $host, $port, $topologyInfo)"
override def hashCode: Int =
((executorId.hashCode * 41 + host.hashCode) * 41 + port) * 41 + topologyInfo.hashCode
override def equals(that: Any): Boolean = that match {
case id: BlockManagerId =>
executorId == id.executorId &&
port == id.port &&
host == id.host &&
topologyInfo == id.topologyInfo
case _ =>
false
}
}
object BlockManagerId
BlockManagerId class 的伴生对象。
private[spark] object BlockManagerId {
/**
* Returns a [[org.apache.spark.storage.BlockManagerId]] for the given configuration.
*
* @param execId ID of the executor.
* @param host Host name of the block manager.
* @param port Port of the block manager.
* @param topologyInfo topology information for the blockmanager, if available
* This can be network topology information for use while choosing peers
* while replicating data blocks. More information available here:
* [[org.apache.spark.storage.TopologyMapper]]
* @return A new [[org.apache.spark.storage.BlockManagerId]].
*/
//从缓存中获取 优先
def apply(
execId: String,
host: String,
port: Int,
topologyInfo: Option[String] = None): BlockManagerId =
getCachedBlockManagerId(new BlockManagerId(execId, host, port, topologyInfo))
//从缓存中获取 优先
def apply(in: ObjectInput): BlockManagerId = {
val obj = new BlockManagerId()
obj.readExternal(in)
getCachedBlockManagerId(obj)
}
/**
* The max cache size is hardcoded to 10000, since the size of a BlockManagerId
* object is about 48B, the total memory cost should be below 1MB which is feasible.
*/
//使用缓存系统 用来缓存BlockManagerId 最大容量为 10000
val blockManagerIdCache = CacheBuilder.newBuilder()
.maximumSize(10000)
.build(new CacheLoader[BlockManagerId, BlockManagerId]() {
override def load(id: BlockManagerId) = id
})
def getCachedBlockManagerId(id: BlockManagerId): BlockManagerId = {
blockManagerIdCache.get(id)
}
}
BlockStatus class & object
case class BlockStatus(storageLevel: StorageLevel, memSize: Long, diskSize: Long) {
def isCached: Boolean = memSize + diskSize > 0
}
@DeveloperApi
object BlockStatus {
def empty: BlockStatus = BlockStatus(StorageLevel.NONE, memSize = 0L, diskSize = 0L)
}
BlockManagerInfo
这个类的主要功能在于 保存了这个 executor上有哪些 block,所以这个类 里面会有 executor 的主机信息,比如BlockManagerId和_blocks的缓存信息。
private[spark] class BlockManagerInfo(
val blockManagerId: BlockManagerId, //BlockManagerId 这个标志哪个executor 那台机器 哪个 port
timeMs: Long,
val maxOnHeapMem: Long, //机器的堆上大小
val maxOffHeapMem: Long, //机器的堆外大小
val slaveEndpoint: RpcEndpointRef) //从 endPoint
extends Logging {
val maxMem = maxOnHeapMem + maxOffHeapMem // 堆上大小 + 堆外大小
private var _lastSeenMs: Long = timeMs // 更新时间
private var _remainingMem: Long = maxMem // 堆上大小 + 堆外大小 remainingMem 剩余内存 初始时的 剩余内存 就是 = 堆上大小 + 堆外大小
// Mapping from block id to its status.
//这个executor 的所有 BlockId 和状态的 键值对
private val _blocks = new JHashMap[BlockId, BlockStatus] //BlockId 和状态的 键值对
// Cached blocks held by this BlockManager. This does not include broadcast blocks.
//这个executor 的所有 BlockId 集合
private val _cachedBlocks = new mutable.HashSet[BlockId] //BlockId
def getStatus(blockId: BlockId): Option[BlockStatus] = Option(_blocks.get(blockId))
//更新时间戳
def updateLastSeenMs() {
_lastSeenMs = System.currentTimeMillis()
}
//更新 一个block的信息 可能会 变化 storage 级别
def updateBlockInfo(
blockId: BlockId,
storageLevel: StorageLevel,
memSize: Long,
diskSize: Long) {
updateLastSeenMs() //更新时间戳
val blockExists = _blocks.containsKey(blockId) // _blocks 是否已经 有这个 block
var originalMemSize: Long = 0
var originalDiskSize: Long = 0
var originalLevel: StorageLevel = StorageLevel.NONE
if (blockExists) { // _blocks 已经 有这个 block 的话 获取存在的信息
// The block exists on the slave already.
val blockStatus: BlockStatus = _blocks.get(blockId)
originalLevel = blockStatus.storageLevel
originalMemSize = blockStatus.memSize
originalDiskSize = blockStatus.diskSize
if (originalLevel.useMemory) {
_remainingMem += originalMemSize //剩余内存 更新
}
}
if (storageLevel.isValid) { //目标的 storage 的级别 有效 就是 已经 保存在 内存或者磁盘上了
/* isValid means it is either stored in-memory or on-disk.
* The memSize here indicates the data size in or dropped from memory,
* externalBlockStoreSize here indicates the data size in or dropped from externalBlockStore,
* and the diskSize here indicates the data size in or dropped to disk.
* They can be both larger than 0, when a block is dropped from memory to disk.
* Therefore, a safe way to set BlockStatus is to set its info in accurate modes. */
var blockStatus: BlockStatus = null
if (storageLevel.useMemory) { //想要 保存在内存中
blockStatus = BlockStatus(storageLevel, memSize = memSize, diskSize = 0)
_blocks.put(blockId, blockStatus) //更新
_remainingMem -= memSize //剩余内存 更新
if (blockExists) {
logInfo(s"Updated $blockId in memory on ${blockManagerId.hostPort}" +
s" (current size: ${Utils.bytesToString(memSize)}," +
s" original size: ${Utils.bytesToString(originalMemSize)}," +
s" free: ${Utils.bytesToString(_remainingMem)})")
} else {
logInfo(s"Added $blockId in memory on ${blockManagerId.hostPort}" +
s" (size: ${Utils.bytesToString(memSize)}," +
s" free: ${Utils.bytesToString(_remainingMem)})")
}
}
if (storageLevel.useDisk) {
blockStatus = BlockStatus(storageLevel, memSize = 0, diskSize = diskSize)
_blocks.put(blockId, blockStatus) //更新
if (blockExists) {
logInfo(s"Updated $blockId on disk on ${blockManagerId.hostPort}" +
s" (current size: ${Utils.bytesToString(diskSize)}," +
s" original size: ${Utils.bytesToString(originalDiskSize)})")
} else {
logInfo(s"Added $blockId on disk on ${blockManagerId.hostPort}" +
s" (size: ${Utils.bytesToString(diskSize)})")
}
}
if (!blockId.isBroadcast && blockStatus.isCached) {
_cachedBlocks += blockId
}
}
else if (blockExists) { //目标的storage 的级别 有效 就是 已经 保存在 内存或者磁盘上了 无效的话 就要 取消这个 block的缓存了
// If isValid is not true, drop the block.
_blocks.remove(blockId)
_cachedBlocks -= blockId
if (originalLevel.useMemory) { //打印 信息
logInfo(s"Removed $blockId on ${blockManagerId.hostPort} in memory" +
s" (size: ${Utils.bytesToString(originalMemSize)}," +
s" free: ${Utils.bytesToString(_remainingMem)})")
}
if (originalLevel.useDisk) { //打印 信息
logInfo(s"Removed $blockId on ${blockManagerId.hostPort} on disk" +
s" (size: ${Utils.bytesToString(originalDiskSize)})")
}
}
}
//从这个 executor中清除 这个 blockId
def removeBlock(blockId: BlockId) {
if (_blocks.containsKey(blockId)) {
_remainingMem += _blocks.get(blockId).memSize
_blocks.remove(blockId)
}
_cachedBlocks -= blockId
}
// 获取 剩余内存
def remainingMem: Long = _remainingMem
// 获取 时间戳
def lastSeenMs: Long = _lastSeenMs //最后更新时间
// 获取所有的Block
def blocks: JHashMap[BlockId, BlockStatus] = _blocks
// This does not include broadcast blocks.
//所有的 缓存的Block 不包括 广播的 block
def cachedBlocks: collection.Set[BlockId] = _cachedBlocks
override def toString: String = "BlockManagerInfo " + timeMs + " " + _remainingMem
def clear() {
_blocks.clear() //清空内部的_blocks map
}
}
BlockManagerMasterEndpoint
这是一个 endPoint,并且只会在 driver 端 存在。用来管理 全局的 block,用来响应 driver和executor的 block和BlockManager 对block的管理。
BlockManagerMasterEndPoint 的receiveAndReply 是总的响应 方法入口。这个endPoint主要和 BlockManagerMaster 对象交互,而BlockManagerMaster对象会在 driver和executor都存在的。
//这是一个 endPoint,并且只会在 driver 端 存在
private[spark]
class BlockManagerMasterEndpoint(
override val rpcEnv: RpcEnv,//在 driver 上 是 endPoint的直接使用,在 executor上则是 driver的 ref
val isLocal: Boolean,
conf: SparkConf,
listenerBus: LiveListenerBus)
extends ThreadSafeRpcEndpoint with Logging {
// Mapping from block manager id to the block manager's information.
//保存 BlockManagerId 和 BlockManagerInfo 的映射关系 BlockManagerInfo中已经存在与BlockId 的关系
private val blockManagerInfo = new mutable.HashMap[BlockManagerId, BlockManagerInfo]
// Mapping from executor ID to block manager ID.
//mapping executor ID 和 BlockManagerId 的关系
private val blockManagerIdByExecutor = new mutable.HashMap[String, BlockManagerId]
// Mapping from block id to the set of block managers that have the block.
// 缓存Block与BlockManagerId的映射关系 即这些BlockId 在那些 executor 存在
private val blockLocations = new JHashMap[BlockId, mutable.HashSet[BlockManagerId]]
private val askThreadPool = ThreadUtils.newDaemonCachedThreadPool("block-manager-ask-thread-pool")
private implicit val askExecutionContext = ExecutionContext.fromExecutorService(askThreadPool)
/**
* 通过反射创建的TopologyMapper类实例,用来记录节点对应的拓扑信息。
* 默认的DefaultTopologyMapper是空实现,
* 另外还有FileBasedTopologyMapper可以通过文件指定拓扑。它可能是方便今后来做机架感知等功能的
*/
private val topologyMapper = {
val topologyMapperClassName = conf.get(
"spark.storage.replication.topologyMapper", classOf[DefaultTopologyMapper].getName)
val clazz = Utils.classForName(topologyMapperClassName)
val mapper =
clazz.getConstructor(classOf[SparkConf]).newInstance(conf).asInstanceOf[TopologyMapper]
logInfo(s"Using $topologyMapperClassName for getting topology information")
mapper
}
val proactivelyReplicate = conf.get("spark.storage.replication.proactive", "false").toBoolean
logInfo("BlockManagerMasterEndpoint up")
//响应 请求处理
override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = {
//注册 BlockManagerInfo ,BlockManager
case RegisterBlockManager(blockManagerId, maxOnHeapMemSize, maxOffHeapMemSize, slaveEndpoint) =>
context.reply(register(blockManagerId, maxOnHeapMemSize, maxOffHeapMemSize, slaveEndpoint)) //回复 这个BlockManagerInfo
// 更新BlockInfo, 可能会更新 Storage 的 级别
case _updateBlockInfo @
UpdateBlockInfo(blockManagerId, blockId, storageLevel, deserializedSize, size) =>
context.reply(updateBlockInfo(blockManagerId, blockId, storageLevel, deserializedSize, size)) //返回值 是 Boolean 类型
listenerBus.post(SparkListenerBlockUpdated(BlockUpdatedInfo(_updateBlockInfo))) //通知 消息总线
//获取blockId 的位置
case GetLocations(blockId) => // :BlockId
context.reply(getLocations(blockId)) // Seq[BlockManagerId]
// 这个blockId 的BlockLocationsAndStatus 信息
case GetLocationsAndStatus(blockId) =>
context.reply(getLocationsAndStatus(blockId))
// 获取 blockIds 在那些 BlockManagerId 的序列
case GetLocationsMultipleBlockIds(blockIds) =>
context.reply(getLocationsMultipleBlockIds(blockIds))
// 取得 blockManagerInfo 中的 key值 不在 driver上 和 不是 这个blockManagerId 的 序列
case GetPeers(blockManagerId) =>
context.reply(getPeers(blockManagerId))
// 获取 executor 上的 slaveEndpoint
case GetExecutorEndpointRef(executorId) =>
context.reply(getExecutorEndpointRef(executorId))
// 获取 BlockManagerId 的最大内存 和 剩余内存
case GetMemoryStatus =>
context.reply(memoryStatus)
//获取 BlockManagerId 的最大内存 和 剩余内存 等信息的 StorageStatus array
case GetStorageStatus =>
context.reply(storageStatus)
//获取这个 blockId 的 blockStatus, askSlaves 为 TRUE的话 从 info.slaveEndpoint 中获取
case GetBlockStatus(blockId, askSlaves) =>
context.reply(blockStatus(blockId, askSlaves))
// askSlaves 是 TRUE的话 会从 blockManagerInfo 的 slaveEndpoint 中获取
case GetMatchingBlockIds(filter, askSlaves) =>
context.reply(getMatchingBlockIds(filter, askSlaves))
//从 blockLocations 和 blockManagerInfo(这个可能又从节点)主从 中清除 这个id是rddId 的 RDDBlockId
case RemoveRdd(rddId) =>
context.reply(removeRdd(rddId))
//remove ShuffleBlockId 的话 从BlockManagerInfo的 从节点移除即可
case RemoveShuffle(shuffleId) =>
context.reply(removeShuffle(shuffleId))
//removeFromDriver 如果是true,从driver 和 executor 移除 这个ShuffleBlockId
//false 的话 仅从 executor 的从节点 上移除 这个ShuffleBlockId
case RemoveBroadcast(broadcastId, removeFromDriver) =>
context.reply(removeBroadcast(broadcastId, removeFromDriver))
//移除 block 从 slaves上
case RemoveBlock(blockId) =>
removeBlockFromWorkers(blockId)
context.reply(true)
//移除一个 executor
case RemoveExecutor(execId) =>
removeExecutor(execId)
context.reply(true)
//Stop BlockManagerMaster
case StopBlockManagerMaster =>
context.reply(true)
stop() // 在 NettyEnv 中的 dispather 中 注销这个 EndPoint
//如果 driver blockManagerInfo 中存在这个 blockManagerId 或者 不存在这个但是blockManagerId在driver节点上 都会返回true
case BlockManagerHeartbeat(blockManagerId) =>
context.reply(heartbeatReceived(blockManagerId))
// 这个 executorId 的 BlockManagerId 是否缓存了 blocks
case HasCachedBlocks(executorId) =>
blockManagerIdByExecutor.get(executorId) match { // blockManagerIdByExecutor:mutable.HashMap[String, BlockManagerId]
case Some(bm) => //如果存在 这个 executorId 的 键值对
if (blockManagerInfo.contains(bm)) { // blockManagerInfo:mutable.HashMap[BlockManagerId, BlockManagerInfo]
val bmInfo = blockManagerInfo(bm) // 且存在 这个blockManagerInfo 中存在
context.reply(bmInfo.cachedBlocks.nonEmpty) // 且 BlockManagerInfo 中 缓存的 blocks 不为空 返回 true 其余返回false
} else {
context.reply(false)
}
case None => context.reply(false)
}
}
//从 blockLocations 和 blockManagerInfo(这个可能又从节点)主从 中清除 这个id是rddId 的 RDDBlockId
private def removeRdd(rddId: Int): Future[Seq[Int]] = {
// First remove the metadata for the given RDD, and then asynchronously remove the blocks
// from the slaves.
// Find all blocks for the given RDD, remove the block from both blockLocations and
// the blockManagerInfo that is tracking the blocks.
val blocks = blockLocations.asScala.keys.flatMap(_.asRDDId).filter(_.rddId == rddId) //从blockLocations中 找出所有的 RDDBlockId 且 RDDBlockId的 id是rddId
blocks.foreach { blockId =>
val bms: mutable.HashSet[BlockManagerId] = blockLocations.get(blockId) //得到 保存这个 RDDBlockId 的所有的 executor
bms.foreach(bm => blockManagerInfo.get(bm).foreach(_.removeBlock(blockId))) //从blockManagerInfo对象中 清除 RDDBlockId
blockLocations.remove(blockId) //从 blockLocations 中移除 这个 RDDBlockId
}
// Ask the slaves to remove the RDD, and put the result in a sequence of Futures.
// The dispatcher is used as an implicit argument into the Future sequence construction.
val removeMsg = RemoveRdd(rddId)
//从 从的blockManagerInfo中出清这个 RDDBlockId
val futures = blockManagerInfo.values.map { bm =>
bm.slaveEndpoint.ask[Int](removeMsg).recover {
case e: IOException =>
logWarning(s"Error trying to remove RDD $rddId from block manager ${bm.blockManagerId}",
e)
0 // zero blocks were removed
}
}.toSeq
Future.sequence(futures)
}
//remove ShuffleBlockId 的话 从BlockManagerInfo的 从节点移除即可
private def removeShuffle(shuffleId: Int): Future[Seq[Boolean]] = {
// Nothing to do in the BlockManagerMasterEndpoint data structures
val removeMsg = RemoveShuffle(shuffleId)
Future.sequence(
blockManagerInfo.values.map { bm =>
bm.slaveEndpoint.ask[Boolean](removeMsg)
}.toSeq
)
}
/**
* Delegate RemoveBroadcast messages to each BlockManager because the master may not notified
* of all broadcast blocks. If removeFromDriver is false, broadcast blocks are only removed
* from the executors, but not from the driver.
*/
//removeFromDriver 如果是true,从driver 和 executor 移除 这个ShuffleBlockId
//false 的话 仅从 executor 的从节点 上移除 这个ShuffleBlockId
private def removeBroadcast(broadcastId: Long, removeFromDriver: Boolean): Future[Seq[Int]] = {
val removeMsg = RemoveBroadcast(broadcastId, removeFromDriver)
val requiredBlockManagers = blockManagerInfo.values.filter { info =>
removeFromDriver || !info.blockManagerId.isDriver
//removeFromDriver 如果是true,从driver 和 executor 移除 这个ShuffleBlockId
//false 的话 仅从 executor 的从节点 上移除 这个ShuffleBlockId
}
val futures = requiredBlockManagers.map { bm =>
bm.slaveEndpoint.ask[Int](removeMsg).recover {
case e: IOException =>
logWarning(s"Error trying to remove broadcast $broadcastId from block manager " +
s"${bm.blockManagerId}", e)
0 // zero blocks were removed
}
}.toSeq
Future.sequence(futures)
}
//根据blockManagerId 移除BlockManager
private def removeBlockManager(blockManagerId: BlockManagerId) {
val info = blockManagerInfo(blockManagerId) //获取到 对应的 BlockManagerInfo 对象
// Remove the block manager from blockManagerIdByExecutor.
blockManagerIdByExecutor -= blockManagerId.executorId //清除 blockManagerIdByExecutor 的记录
// Remove it from blockManagerInfo and remove all the blocks.
blockManagerInfo.remove(blockManagerId) //清除 blockManagerInfo 的记录
val iterator = info.blocks.keySet.iterator //遍历这个 blockManagerInfo 的 所有的 Block
while (iterator.hasNext) {
val blockId = iterator.next
//blockManagerId有位置信息,可能存在多份在不同的executor 获取这个 blockId 的位置信息 即在那个 executor 上的 BlockManagerId信息
val locations: mutable.Set[BlockManagerId] = blockLocations.get(blockId)
locations -= blockManagerId //去掉这个 blockManagerId
// De-register the block if none of the block managers have it. Otherwise, if pro-active
// replication is enabled, and a block is either an RDD or a test block (the latter is used
// for unit testing), we send a message to a randomly chosen executor location to replicate
// the given block. Note that we ignore other block types (such as broadcast/shuffle blocks
// etc.) as replication doesn't make much sense in that context.
if (locations.size == 0) {
blockLocations.remove(blockId) //如果没有一份这个 BlockId 信息,则从 blockLocations去清新这个信息
logWarning(s"No more replicas available for $blockId !")
} else if (proactivelyReplicate && (blockId.isRDD || blockId.isInstanceOf[TestBlockId])) { // spark.storage.replication.proactive一般是 false
// As a heursitic, assume single executor failure to find out the number of replicas that
// existed before failure
val maxReplicas = locations.size + 1
val i = (new Random(blockId.hashCode)).nextInt(locations.size)
val blockLocations = locations.toSeq
val candidateBMId = blockLocations(i)
blockManagerInfo.get(candidateBMId).foreach { bm =>
val remainingLocations = locations.toSeq.filter(bm => bm != candidateBMId)
val replicateMsg = ReplicateBlock(blockId, remainingLocations, maxReplicas)
bm.slaveEndpoint.ask[Boolean](replicateMsg)
}
}
}
//向消息总线 发送 SparkListenerBlockManagerRemoved 消息
listenerBus.post(SparkListenerBlockManagerRemoved(System.currentTimeMillis(), blockManagerId))
logInfo(s"Removing block manager $blockManagerId")
}
//移除一个 executor
private def removeExecutor(execId: String) {
logInfo("Trying to remove executor " + execId + " from BlockManagerMaster.")
blockManagerIdByExecutor.get(execId).foreach(removeBlockManager) //从 blockManagerIdByExecutor mapping executor ID 和 BlockManagerId 的关系
//中 获取这个executor上的 所有的 BlockManagerId,一次移除 这些 BlockManager
}
/**
* Return true if the driver knows about the given block manager. Otherwise, return false,
* indicating that the block manager should re-register.
*/
//如果 driver blockManagerInfo 中存在这个 blockManagerId 或者 不存在这个但是blockManagerId在driver节点上 都会返回true
private def heartbeatReceived(blockManagerId: BlockManagerId): Boolean = {
if (!blockManagerInfo.contains(blockManagerId)) {
blockManagerId.isDriver && !isLocal
} else {
blockManagerInfo(blockManagerId).updateLastSeenMs() //更新这个 blockManagerInfo的 时间戳
true
}
}
// Remove a block from the slaves that have it. This can only be used to remove
// blocks that the master knows about.
//移除 block 从 slaves上
private def removeBlockFromWorkers(blockId: BlockId) {
val locations: mutable.Set[BlockManagerId] = blockLocations.get(blockId)
if (locations != null) {
locations.foreach { blockManagerId: BlockManagerId =>
val blockManager = blockManagerInfo.get(blockManagerId)
if (blockManager.isDefined) {
// Remove the block from the slave's BlockManager.
// Doesn't actually wait for a confirmation and the message might get lost.
// If message loss becomes frequent, we should add retry logic here.
blockManager.get.slaveEndpoint.ask[Boolean](RemoveBlock(blockId))
}
}
}
}
// Return a map from the block manager id to max memory and remaining memory.
//返回 BlockManagerId 的最大内存 和 剩余内存
private def memoryStatus: Map[BlockManagerId, (Long, Long)] = {
blockManagerInfo.map { case(blockManagerId, info) =>
(blockManagerId, (info.maxMem, info.remainingMem))
}.toMap
}
//返回 BlockManagerId 的最大内存 和 剩余内存 等信息的 StorageStatus array
private def storageStatus: Array[StorageStatus] = {
blockManagerInfo.map { case (blockManagerId, info) =>
new StorageStatus(blockManagerId, info.maxMem, Some(info.maxOnHeapMem),
Some(info.maxOffHeapMem), info.blocks.asScala)
}.toArray
}
/**
* Return the block's status for all block managers, if any. NOTE: This is a
* potentially expensive operation and should only be used for testing.
*
* If askSlaves is true, the master queries each block manager for the most updated block
* statuses. This is useful when the master is not informed of the given block by all block
* managers.
*/
//获取这个 blockId 的 blockStatus, askSlaves 为 TRUE的话 从 info.slaveEndpoint 中获取
private def blockStatus(
blockId: BlockId,
askSlaves: Boolean): Map[BlockManagerId, Future[Option[BlockStatus]]] = {
val getBlockStatus = GetBlockStatus(blockId)
/*
* Rather than blocking on the block status query, master endpoint should simply return
* Futures to avoid potential deadlocks. This can arise if there exists a block manager
* that is also waiting for this master endpoint's response to a previous message.
*/
blockManagerInfo.values.map { info =>
val blockStatusFuture =
if (askSlaves) {
info.slaveEndpoint.ask[Option[BlockStatus]](getBlockStatus)
} else {
Future { info.getStatus(blockId) }
}
(info.blockManagerId, blockStatusFuture)
}.toMap
}
/**
* Return the ids of blocks present in all the block managers that match the given filter.
* NOTE: This is a potentially expensive operation and should only be used for testing.
*
* If askSlaves is true, the master queries each block manager for the most updated block
* statuses. This is useful when the master is not informed of the given block by all block
* managers.
*/
// askSlaves 是 TRUE的话 会从 blockManagerInfo 的 slaveEndpoint 中获取
private def getMatchingBlockIds(
filter: BlockId => Boolean,
askSlaves: Boolean): Future[Seq[BlockId]] = {
val getMatchingBlockIds = GetMatchingBlockIds(filter) //case class GetMatchingBlockIds(filter: BlockId => Boolean, askSlaves: Boolean = true extends ToBlockManagerMaster
Future.sequence(
blockManagerInfo.values.map { info => // blockManagerInfo = mutable.HashMap[BlockManagerId, BlockManagerInfo]
val future =
if (askSlaves) {
info.slaveEndpoint.ask[Seq[BlockId]](getMatchingBlockIds)
} else {
Future { info.blocks.asScala.keys.filter(filter).toSeq }
}
future
}
).map(_.flatten.toSeq)
}
/**
* Returns the BlockManagerId with topology information populated, if available.
*/
//注册 BlockManagerInfo
private def register(
idWithoutTopologyInfo: BlockManagerId, // 此 executor 的 host port
maxOnHeapMemSize: Long, //此 executor 堆上最大内存
maxOffHeapMemSize: Long, //此 executor 堆外最大内存
slaveEndpoint: RpcEndpointRef): BlockManagerId = { // 从 endPoint
// the dummy id is not expected to contain the topology information.
// we get that info here and respond back with a more fleshed out block manager id
val id = BlockManagerId( //组装 BlockManagerId 对象
idWithoutTopologyInfo.executorId,
idWithoutTopologyInfo.host,
idWithoutTopologyInfo.port,
topologyMapper.getTopologyForHost(idWithoutTopologyInfo.host)) // 最后一个参数 一般是 None
val time = System.currentTimeMillis()
if (!blockManagerInfo.contains(id)) { // 只有其中 blockManagerID 没有这个 BlockManagerId 才会注册这个BlockManagerId
blockManagerIdByExecutor.get(id.executorId) match { // blockManagerIdByExecutor mutable.HashMap[String, BlockManagerId]
case Some(oldId) =>
// A block manager of the same executor already exists, so remove it (assumed dead)
logError("Got two different block manager registrations on same executor - "
+ s" will replace old one $oldId with new one $id")
removeExecutor(id.executorId)
case None =>
}
logInfo("Registering block manager %s with %s RAM, %s".format(
id.hostPort, Utils.bytesToString(maxOnHeapMemSize + maxOffHeapMemSize), id))
blockManagerIdByExecutor(id.executorId) = id //注册新的 BlockManagerId
blockManagerInfo(id) = new BlockManagerInfo( //blockManagerInfo [BlockManagerId, BlockManagerInfo] 注册新的 BlockManagerInfo
id, System.currentTimeMillis(), maxOnHeapMemSize, maxOffHeapMemSize, slaveEndpoint)
}
//通知 消息总线 SparkListenerBlockManagerAdded
listenerBus.post(SparkListenerBlockManagerAdded(time, id, maxOnHeapMemSize + maxOffHeapMemSize,
Some(maxOnHeapMemSize), Some(maxOffHeapMemSize)))
id
}
//更新BlockInfo, 可能会更新 Storage 的 级别
private def updateBlockInfo(
blockManagerId: BlockManagerId,
blockId: BlockId,
storageLevel: StorageLevel,
memSize: Long,
diskSize: Long): Boolean = {
// blockManagerInfo 中不包含这个 blockManagerId
if (!blockManagerInfo.contains(blockManagerId)) { // blockManagerInfo = mutable.HashMap[BlockManagerId, BlockManagerInfo]
if (blockManagerId.isDriver && !isLocal) { //这个blockManagerId 在 driver 上 且是 cluster 模式 返回true
// We intentionally do not register the master (except in local mode),
// so we should not indicate failure.
return true
} else {
return false
}
}
if (blockId == null) {
blockManagerInfo(blockManagerId).updateLastSeenMs()
return true
}
blockManagerInfo(blockManagerId).updateBlockInfo(blockId, storageLevel, memSize, diskSize) //在 blockManagerInfo 中更新 BlockInfo
var locations: mutable.HashSet[BlockManagerId] = null
if (blockLocations.containsKey(blockId)) {
locations = blockLocations.get(blockId)
} else {
locations = new mutable.HashSet[BlockManagerId]
blockLocations.put(blockId, locations)
}
if (storageLevel.isValid) {
locations.add(blockManagerId)
} else {
locations.remove(blockManagerId)
}
// Remove the block from master tracking if it has been removed on all slaves.
if (locations.size == 0) {
blockLocations.remove(blockId)
}
true
}
//获取 这个 blockId 在那个 BlockManagerId 上
private def getLocations(blockId: BlockId): Seq[BlockManagerId] = {
if (blockLocations.containsKey(blockId)) blockLocations.get(blockId).toSeq else Seq.empty
}
//这个blockId 的BlockLocationsAndStatus 信息
private def getLocationsAndStatus(blockId: BlockId): Option[BlockLocationsAndStatus] = {
val locations: Seq[BlockManagerId] = Option(blockLocations.get(blockId)).map(_.toSeq).getOrElse(Seq.empty)
val status = locations.headOption.flatMap { bmId => blockManagerInfo(bmId).getStatus(blockId) }
if (locations.nonEmpty && status.isDefined) {
Some(BlockLocationsAndStatus(locations, status.get))
} else {
None
}
}
//获取 blockIds 在那些 BlockManagerId 的序列
private def getLocationsMultipleBlockIds(
blockIds: Array[BlockId]): IndexedSeq[Seq[BlockManagerId]] = {
blockIds.map(blockId => getLocations(blockId))
}
/** Get the list of the peers of the given block manager */
//取得 blockManagerInfo 中的 key值 不在 driver上 和 不是 这个blockManagerId 的 序列
private def getPeers(blockManagerId: BlockManagerId): Seq[BlockManagerId] = {
val blockManagerIds = blockManagerInfo.keySet //blockManagerInfo = mutable.HashMap[BlockManagerId, BlockManagerInfo]
if (blockManagerIds.contains(blockManagerId)) {
blockManagerIds.filterNot { _.isDriver }.filterNot { _ == blockManagerId }.toSeq
} else {
Seq.empty
}
}
/**
* Returns an [[RpcEndpointRef]] of the [[BlockManagerSlaveEndpoint]] for sending RPC messages.
*/
//获取 executor 上的 slaveEndpoint
private def getExecutorEndpointRef(executorId: String): Option[RpcEndpointRef] = {
for (
blockManagerId <- blockManagerIdByExecutor.get(executorId);
info <- blockManagerInfo.get(blockManagerId)
) yield {
info.slaveEndpoint
}
}
//stop askThreadPool
override def onStop(): Unit = {
askThreadPool.shutdownNow()
}
}
BlockManagerMaster
这个类的 对象在 driver 和executor端都会存在。只不过driverEndpoint 在 driver 端的话是 endPoint, 在 executor 端是一个 driver 的ref 。 进一步封装了 driver或者executor 和 BlockManagerMasterEndPoint的交互细节。
/**
* BlockManagerMaster 在 driver 和 executor 端都存在
* @param driverEndpoint 在 driver 端的话是 endPoint, 在 executor 端是一个 driver 的ref
* 就是 BlockManagerMasterEndpoint 这个 endPoint
* @param conf
* @param isDriver 在 driver 端的话是 true, 在 executor 端是 false
*/
private[spark]
class BlockManagerMaster(
var driverEndpoint: RpcEndpointRef, //在 driver 上 是 endPoint的直接使用,在 executor上则是 driver的 ref
conf: SparkConf,
isDriver: Boolean)
extends Logging {
val timeout = RpcUtils.askRpcTimeout(conf) //RPC 通信超时时间
/** Remove a dead executor from the driver endpoint. This is only called on the driver side. */
//这个方法只限于 driver 端调用
def removeExecutor(execId: String) { //向 BlockManagerMasterEndPoint 移除一个 executor
tell(RemoveExecutor(execId))
logInfo("Removed " + execId + " successfully in removeExecutor")
}
/** Request removal of a dead executor from the driver endpoint.
* This is only called on the driver side. Non-blocking
*/
//异步 移除一个 executor
def removeExecutorAsync(execId: String) {//向 BlockManagerMasterEndPoint 移除一个 executor
driverEndpoint.ask[Boolean](RemoveExecutor(execId))
logInfo("Removal of executor " + execId + " requested")
}
/**
* Register the BlockManager's id with the driver. The input BlockManagerId does not contain
* topology information. This information is obtained from the master and we respond with an
* updated BlockManagerId fleshed out with this information.
*/
def registerBlockManager(
blockManagerId: BlockManagerId,
maxOnHeapMemSize: Long,
maxOffHeapMemSize: Long,
slaveEndpoint: RpcEndpointRef): BlockManagerId = {
logInfo(s"Registering BlockManager $blockManagerId")
val updatedId = driverEndpoint.askSync[BlockManagerId](
RegisterBlockManager(blockManagerId, maxOnHeapMemSize, maxOffHeapMemSize, slaveEndpoint)) //向 BlockManagerMasterEndPoint 注册 BlockManagerInfo
logInfo(s"Registered BlockManager $updatedId")
updatedId
}
def updateBlockInfo(
blockManagerId: BlockManagerId,
blockId: BlockId,
storageLevel: StorageLevel,
memSize: Long,
diskSize: Long): Boolean = {
val res = driverEndpoint.askSync[Boolean](
UpdateBlockInfo(blockManagerId, blockId, storageLevel, memSize, diskSize)) //通知BlockManagerMasterEndPoint UpdateBlockInfo
logDebug(s"Updated info of block $blockId")
res
}
/** Get locations of the blockId from the driver */
def getLocations(blockId: BlockId): Seq[BlockManagerId] = {
driverEndpoint.askSync[Seq[BlockManagerId]](GetLocations(blockId)) //向BlockManagerMasterEndPoint 获取blockId 的位置
}
/** Get locations as well as status of the blockId from the driver */
def getLocationsAndStatus(blockId: BlockId): Option[BlockLocationsAndStatus] = { // 向BlockManagerMasterEndPoint 这个blockId 的BlockLocationsAndStatus 信息
driverEndpoint.askSync[Option[BlockLocationsAndStatus]](
GetLocationsAndStatus(blockId))
}
/** Get locations of multiple blockIds from the driver */
def getLocations(blockIds: Array[BlockId]): IndexedSeq[Seq[BlockManagerId]] = { // 向BlockManagerMasterEndPoint 获取 blockIds 在那些 BlockManagerId 的序列
driverEndpoint.askSync[IndexedSeq[Seq[BlockManagerId]]](
GetLocationsMultipleBlockIds(blockIds))
}
/**
* Check if block manager master has a block. Note that this can be used to check for only
* those blocks that are reported to block manager master.
*/
def contains(blockId: BlockId): Boolean = {
!getLocations(blockId).isEmpty
}
/** Get ids of other nodes in the cluster from the driver */
def getPeers(blockManagerId: BlockManagerId): Seq[BlockManagerId] = { // 向BlockManagerMasterEndPoint 取得 blockManagerInfo 中的 key值 不在 driver上 和 不是 这个blockManagerId 的 序列
driverEndpoint.askSync[Seq[BlockManagerId]](GetPeers(blockManagerId))
}
def getExecutorEndpointRef(executorId: String): Option[RpcEndpointRef] = { // 向BlockManagerMasterEndPoint 获取 executor 上的 slaveEndpoint
driverEndpoint.askSync[Option[RpcEndpointRef]](GetExecutorEndpointRef(executorId))
}
/**
* Remove a block from the slaves that have it. This can only be used to remove
* blocks that the driver knows about.
*/
def removeBlock(blockId: BlockId) { // 向BlockManagerMasterEndPoint 从移除 block 从 slaves上
driverEndpoint.askSync[Boolean](RemoveBlock(blockId))
}
/** Remove all blocks belonging to the given RDD. */
def removeRdd(rddId: Int, blocking: Boolean) { // 向BlockManagerMasterEndPoint 从 blockLocations 和 blockManagerInfo(这个可能又从节点)主从 中清除 这个id是rddId 的 RDDBlockId
val future = driverEndpoint.askSync[Future[Seq[Int]]](RemoveRdd(rddId))
future.failed.foreach(e =>
logWarning(s"Failed to remove RDD $rddId - ${e.getMessage}", e)
)(ThreadUtils.sameThread)
if (blocking) {
timeout.awaitResult(future)
}
}
/** Remove all blocks belonging to the given shuffle. */
def removeShuffle(shuffleId: Int, blocking: Boolean) { // 向BlockManagerMasterEndPoint remove ShuffleBlockId 的话 从BlockManagerInfo的 从节点移除即可
val future = driverEndpoint.askSync[Future[Seq[Boolean]]](RemoveShuffle(shuffleId))
future.failed.foreach(e =>
logWarning(s"Failed to remove shuffle $shuffleId - ${e.getMessage}", e)
)(ThreadUtils.sameThread)
if (blocking) {
timeout.awaitResult(future)
}
}
/** Remove all blocks belonging to the given broadcast. */
def removeBroadcast(broadcastId: Long, removeFromMaster: Boolean, blocking: Boolean) {
val future = driverEndpoint.askSync[Future[Seq[Int]]]( // 向BlockManagerMasterEndPoint removeFromDriver 如果是true,从driver 和 executor 移除 这个ShuffleBlockId
RemoveBroadcast(broadcastId, removeFromMaster)) // false 的话 仅从 executor 的从节点 上移除 这个ShuffleBlockId
future.failed.foreach(e =>
logWarning(s"Failed to remove broadcast $broadcastId" +
s" with removeFromMaster = $removeFromMaster - ${e.getMessage}", e)
)(ThreadUtils.sameThread)
if (blocking) {
timeout.awaitResult(future)
}
}
/**
* Return the memory status for each block manager, in the form of a map from
* the block manager's id to two long values. The first value is the maximum
* amount of memory allocated for the block manager, while the second is the
* amount of remaining memory.
*/
def getMemoryStatus: Map[BlockManagerId, (Long, Long)] = { // 向BlockManagerMasterEndPoint 获取 BlockManagerId 的最大内存 和 剩余内存
driverEndpoint.askSync[Map[BlockManagerId, (Long, Long)]](GetMemoryStatus)
}
def getStorageStatus: Array[StorageStatus] = { // 向BlockManagerMasterEndPoint 获取 BlockManagerId 的最大内存 和 剩余内存 等信息的 StorageStatus array
driverEndpoint.askSync[Array[StorageStatus]](GetStorageStatus)
}
/**
* Return the block's status on all block managers, if any. NOTE: This is a
* potentially expensive operation and should only be used for testing.
*
* If askSlaves is true, this invokes the master to query each block manager for the most
* updated block statuses. This is useful when the master is not informed of the given block
* by all block managers.
*/
def getBlockStatus(
blockId: BlockId,
askSlaves: Boolean = true): Map[BlockManagerId, BlockStatus] = {
val msg = GetBlockStatus(blockId, askSlaves)
/*
* To avoid potential deadlocks, the use of Futures is necessary, because the master endpoint
* should not block on waiting for a block manager, which can in turn be waiting for the
* master endpoint for a response to a prior message.
*/
val response = driverEndpoint.
askSync[Map[BlockManagerId, Future[Option[BlockStatus]]]](msg) // 向BlockManagerMasterEndPoint 获取这个 blockId 的 blockStatus, askSlaves 为 TRUE的话 从 info.slaveEndpoint 中获取
val (blockManagerIds, futures) = response.unzip
implicit val sameThread = ThreadUtils.sameThread
val cbf =
implicitly[
CanBuildFrom[Iterable[Future[Option[BlockStatus]]],
Option[BlockStatus],
Iterable[Option[BlockStatus]]]]
val blockStatus = timeout.awaitResult(
Future.sequence[Option[BlockStatus], Iterable](futures)(cbf, ThreadUtils.sameThread))
if (blockStatus == null) {
throw new SparkException("BlockManager returned null for BlockStatus query: " + blockId)
}
blockManagerIds.zip(blockStatus).flatMap { case (blockManagerId, status) =>
status.map { s => (blockManagerId, s) }
}.toMap
}
/**
* Return a list of ids of existing blocks such that the ids match the given filter. NOTE: This
* is a potentially expensive operation and should only be used for testing.
*
* If askSlaves is true, this invokes the master to query each block manager for the most
* updated block statuses. This is useful when the master is not informed of the given block
* by all block managers.
*/
def getMatchingBlockIds(
filter: BlockId => Boolean,
askSlaves: Boolean): Seq[BlockId] = {
val msg = GetMatchingBlockIds(filter, askSlaves)
val future = driverEndpoint.askSync[Future[Seq[BlockId]]](msg) // 向BlockManagerMasterEndPoint askSlaves 是 TRUE的话 会从 blockManagerInfo 的 slaveEndpoint 中获取
timeout.awaitResult(future)
}
/**
* Find out if the executor has cached blocks. This method does not consider broadcast blocks,
* since they are not reported the master.
*/
def hasCachedBlocks(executorId: String): Boolean = { // 向BlockManagerMasterEndPoint 这个 executorId 的 BlockManagerId 是否缓存了 blocks
driverEndpoint.askSync[Boolean](HasCachedBlocks(executorId))
}
/** Stop the driver endpoint, called only on the Spark driver node */
// 只能在 driver 端 调用这个 方法
def stop() {
if (driverEndpoint != null && isDriver) { // 只能在 driver 端 且 driverEndpoint 不为空的时候 调用这个 方法
tell(StopBlockManagerMaster) // 在 NettyEnv 中的 dispather 中 注销这个 EndPoint(BlockManagerMasterEndpoint)
driverEndpoint = null
logInfo("BlockManagerMaster stopped")
}
}
/** Send a one-way message to the master endpoint, to which we expect it to reply with true. */
private def tell(message: Any) {
if (!driverEndpoint.askSync[Boolean](message)) {
throw new SparkException("BlockManagerMasterEndpoint returned false, expected true.")
}
}
}
private[spark] object BlockManagerMaster {
val DRIVER_ENDPOINT_NAME = "BlockManagerMaster"
}
总结
在 SparkEnv中是怎么使用的BlockManagerMaster的啦,如下:
/**
* driver 节点 的话 install 的是 BlockManagerMaster 的 endPoint
* executor 节点的话 BlockManagerMaster 的 driver 的 ref
*/
val blockManagerMaster = new BlockManagerMaster(
registerOrLookupEndpoint(
BlockManagerMaster.DRIVER_ENDPOINT_NAME, // BlockManagerMaster.DRIVER_ENDPOINT_NAME = BlockManagerMaster
new BlockManagerMasterEndpoint(rpcEnv, isLocal, conf, listenerBus)),
//这里如果是driver的话 BlockManagerMasterEndpoint对象是有用的, executor的话 这个对象就是 不可达的对象 可能直接GC回收掉的
conf, isDriver)