1. blockmanager概述
BlockManager是spark实现的存储系统,RDD-Cache、 Shuffle-output、broadcast 等都是基于BlockManager来实现的。BlockManager对外提供数据接口,可以将数据存储在多种介质上,如memory,disk,Alluxio等。
BlockManager也是采用master-slave模式的分布式架构,Executor上的blockmanager负责本节点上的Block管理,Driver上的blockManagerMaster负责集群Block元数据管理,每个节点上存储的block信息会汇报给blockManagerMaster,如下图所示。
组件说明:
BlockManagerMasterEndpoint:负责接收处理BlockManager发送过来的消息。
BlockManagerSlaveEndpoint:负责接收处理BlockManagerMaster发送过来的的消息
MemoryStore:负责内存Block的管理
DiskStore:负责磁盘Block的管理
BlockTransferService:用于远程Block的传输(Spark中支持Block远程备份)
2. blockmanager初始化
下面来分析下Driver端的blockmanagermaster和Executor端的blockmanager的初始化流程。
blockmanagermaster和blockmanager的构造是在SparkEnv.scala的create方法中完成的。
private def create(
conf: SparkConf,
executorId: String,
bindAddress: String,
advertiseAddress: String,
port: Option[Int],
isLocal: Boolean,
numUsableCores: Int,
ioEncryptionKey: Option[Array[Byte]],
listenerBus: LiveListenerBus = null,
mockOutputCommitCoordinator: Option[OutputCommitCoordinator] = None): SparkEnv = {
...
val blockManagerMaster = new BlockManagerMaster(registerOrLookupEndpoint(
BlockManagerMaster.DRIVER_ENDPOINT_NAME,
new BlockManagerMasterEndpoint(rpcEnv, isLocal, conf, listenerBus)),
conf, isDriver)
val blockManager = new BlockManager(executorId, rpcEnv, blockManagerMaster,
serializerManager, conf, memoryManager, mapOutputTracker, shuffleManager,
blockTransferService, securityManager, numUsableCores)
...
}
但是create调用的时机以及blockmanager的初始化在Driver端和Executor端是不一样的。
Driver端
Driver端create方法在SparkEnv.scala的createDriverEnv方法中调用。
private[spark] def createDriverEnv(
conf: SparkConf,
isLocal: Boolean,
listenerBus: LiveListenerBus,
numCores: Int,
mockOutputCommitCoordinator: Option[OutputCommitCoordinator] = None): SparkEnv = {
...
create(
conf,
SparkContext.DRIVER_IDENTIFIER,
bindAddress,
advertiseAddress,
Option(port),
isLocal,
numCores,
ioEncryptionKey,
listenerBus = listenerBus,
mockOutputCommitCoordinator = mockOutputCommitCoordinator
)
}
blockmanager的初始化在SparkContext的构造方法中。
_env.blockManager.initialize(_applicationId)
Executor端
Executor端create方法在SparkEnv.scala的createExecutorEnv方法中调用。
private[spark] def createExecutorEnv(
conf: SparkConf,
executorId: String,
hostname: String,
numCores: Int,
ioEncryptionKey: Option[Array[Byte]],
isLocal: Boolean): SparkEnv = {
val env = create(
conf,
executorId,
hostname,
hostname,
None,
isLocal,
numCores,
ioEncryptionKey
)
SparkEnv.set(env)
env
}
blockmaager的初始化在Executor.scala的构造方法中。
if (!isLocal) {
env.blockManager.initialize(conf.getAppId)
env.metricsSystem.registerSource(executorSource)
env.metricsSystem.registerSource(env.blockManager.shuffleMetricsSource)
}
3. blockmanager注册流程
blockmanager在初始化方法initialize中会向blockmanagermaster注册,通过调用registerBlockManager向BlockManagerMasterEndpoint发送RegisterBlockManager消息。
def initialize(appId: String): Unit = {
// 初始化blockTransferService (用于远程数据传输)
blockTransferService.init(this)
shuffleClient.init(appId)
//设置block的复制分片策略,由spark.storage.replication.policy指定
blockReplicationPolicy = {
val priorityClass = conf.get(
"spark.storage.replication.policy", classOf[RandomBlockReplicationPolicy].getName)
val clazz = Utils.classForName(priorityClass)
val ret = clazz.newInstance.asInstanceOf[BlockReplicationPolicy]
logInfo(s"Using $priorityClass for block replication policy")
ret
}
// 为block manager生成唯一id
val id =
BlockManagerId(executorId, blockTransferService.hostName, blockTransferService.port, None)
//向block manager master 注册
val idFromMaster = master.registerBlockManager(
id,
maxOnHeapMemory,
maxOffHeapMemory,
slaveEndpoint)
//更新BlockManagerId
blockManagerId = if (idFromMaster != null) idFromMaster else id
//判断是否开了外部shuffle服务
shuffleServerId = if (externalShuffleServiceEnabled) {
logInfo(s"external shuffle service port = $externalShuffleServicePort")
BlockManagerId(executorId, blockTransferService.hostName, externalShuffleServicePort)
} else {
blockManagerId
}
//如果开启了外部shuffle服务,并且该节点是Driver的话就调用registerWithExternalShuffleServer方法
//将BlockManager注册在本地
if (externalShuffleServiceEnabled && !blockManagerId.isDriver) {
registerWithExternalShuffleServer()
}
logInfo(s"Initialized BlockManager: $blockManagerId")
}
def registerBlockManager(
blockManagerId: BlockManagerId,
maxOnHeapMemSize: Long,
maxOffHeapMemSize: Long,
slaveEndpoint: RpcEndpointRef): BlockManagerId = {
logInfo(s"Registering BlockManager $blockManagerId")
//向Driver发送注册BlockManager注册请求
val updatedId = driverEndpoint.askSync[BlockManagerId](
RegisterBlockManager(blockManagerId, maxOnHeapMemSize, maxOffHeapMemSize, slaveEndpoint))
logInfo(s"Registered BlockManager $updatedId")
updatedId
}
当BlockManagerMasterEndpoint收到BlockManager消息时会调用register方法。
override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = {
// 收到来自Executor上的BlockManager注册请求的时调用register方法进行注册,
case RegisterBlockManager(blockManagerId, maxOnHeapMemSize, maxOffHeapMemSize, slaveEndpoint) =>
context.reply(register(blockManagerId, maxOnHeapMemSize, maxOffHeapMemSize, slaveEndpoint))
...
}
在register方法中会将新注册的blockmanager信息添加到blockmanagermaster的元信息中。
private def register(
idWithoutTopologyInfo: BlockManagerId,
maxOnHeapMemSize: Long,
maxOffHeapMemSize: Long,
slaveEndpoint: RpcEndpointRef): BlockManagerId = {
val id = BlockManagerId(
idWithoutTopologyInfo.executorId,
idWithoutTopologyInfo.host,
idWithoutTopologyInfo.port,
topologyMapper.getTopologyForHost(idWithoutTopologyInfo.host))
val time = System.currentTimeMillis()
// 先判断blockManagerInfo这个id的信息是否存在
if (!blockManagerInfo.contains(id)) {
blockManagerIdByExecutor.get(id.executorId) match {
case Some(oldId) =>
// A block manager of the same executor already exists, so remove it (assumed dead)
logError("Got two different block manager registrations on same executor - "
+ s" will replace old one $oldId with new one $id")
removeExecutor(id.executorId)
case None =>
}
logInfo("Registering block manager %s with %s RAM, %s".format(
id.hostPort, Utils.bytesToString(maxOnHeapMemSize + maxOffHeapMemSize), id))
// 向blockManagerIdByExecutor添加记录
blockManagerIdByExecutor(id.executorId) = id
// 向blockManagerInfo添加记录
blockManagerInfo(id) = new BlockManagerInfo(
id, System.currentTimeMillis(), maxOnHeapMemSize, maxOffHeapMemSize, slaveEndpoint)
}
listenerBus.post(SparkListenerBlockManagerAdded(time, id, maxOnHeapMemSize + maxOffHeapMemSize,
Some(maxOnHeapMemSize), Some(maxOffHeapMemSize)))
id
}
4. 删除Block
删除Block时BlockManagerMaster会向BlockManagerMasterEndpoint发送RemoveBlock,BlockManagerMasterEndpoint在收到消息后再向BlockManagerSlaveEndpoint发送RemoveBlock消息,最后blockmanager再删对应的block。
def removeBlock(blockId: BlockId) {
driverEndpoint.askSync[Boolean](RemoveBlock(blockId))
}
BlockManagerMasterEndpoint中调用removeBlockFromWorkers处理RemoveBlock消息。
override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = {
...
case RemoveBlock(blockId) =>
removeBlockFromWorkers(blockId)
context.reply(true)
...
}
private def removeBlockFromWorkers(blockId: BlockId) {
val locations = blockLocations.get(blockId)
if (locations != null) {
locations.foreach { blockManagerId: BlockManagerId =>
val blockManager = blockManagerInfo.get(blockManagerId)
if (blockManager.isDefined) {
// 向BlockManagerSlaveEndpoint发送RemoveBlock消息
blockManager.get.slaveEndpoint.ask[Boolean](RemoveBlock(blockId))
}
}
}
}
BlockManagerSlaveEndpoint收到RemoveBlock消息时会调用blockManager.removeBlock(blockId)移除Block。
override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = {
case RemoveBlock(blockId) =>
doAsync[Boolean]("removing block " + blockId, context) {
blockManager.removeBlock(blockId)
true
}
}
def removeBlock(blockId: BlockId, tellMaster: Boolean = true): Unit = {
logDebug(s"Removing block $blockId")
blockInfoManager.lockForWriting(blockId) match {
case None =>
logWarning(s"Asked to remove block $blockId, which does not exist")
case Some(info) =>
removeBlockInternal(blockId, tellMaster = tellMaster && info.tellMaster)
addUpdatedBlockStatusToTaskMetrics(blockId, BlockStatus.empty)
}
}
removeBlock中调用removeBlockInternal会删除memoryStore和diskStore中的Block。
private def removeBlockInternal(blockId: BlockId, tellMaster: Boolean): Unit = {
val removedFromMemory = memoryStore.remove(blockId)
val removedFromDisk = diskStore.remove(blockId)
if (!removedFromMemory && !removedFromDisk) {
logWarning(s"Block $blockId could not be removed as it was not found on disk or in memory")
}
blockInfoManager.removeBlock(blockId)
if (tellMaster) {
reportBlockStatus(blockId, BlockStatus.empty)
}
}