1、BlockManager原理示意图
①Driver上的BlockManagerMaster管理各个节点上BlockManager的元数据信息和维护block的状态信息。
②每个节点上BlockManager的每个组件:
DiskStore:负责磁盘上的数据读写
MemoryStore: 负责内存中的数据读写
BlockManagerWorker: 负责远程节点的数据读写
ConnectionMaster:负责建立远程BlockManager的通信连接
③BlockManager在进行数据的读写操作时,如RDD的运行中调用了presist()或中间生成一些数据,优先存入内存,内存存储不下,就存储到磁盘中
④Shuffle的读数据操作,从本地内存(MemoryStore)和磁盘(DiskStore)中读取数据,如果没有就从其他节点上使用ConnectionMaster建立连接,使用BlockManagerWorker下载数据
2、源码分析
①BlockManager的注册与维护
BlockManagerMaster使用BlockManagerMasterEndpoint(Actor)来负责executor和BlockManager的元数据管理
BlockManagerMasterEndpoint.scala
/**
* 负责维护各个executor和BlockManager的元数据 BlockManagerInfo、BlockStatus
*/
private[spark]
class BlockManagerMasterEndpoint(
override val rpcEnv: RpcEnv,
val isLocal: Boolean,
conf: SparkConf,
listenerBus: LiveListenerBus)
extends ThreadSafeRpcEndpoint with Logging {
// Mapping from block manager id to the block manager's information.
// BlockManagerId-BlockManagerInfo的映射
private val blockManagerInfo = new mutable.HashMap[BlockManagerId, BlockManagerInfo]
// Mapping from executor ID to block manager ID.
// executorId - blockManagerId映射 每个executor是和一个BlockManager关联的
private val blockManagerIdByExecutor = new mutable.HashMap[String, BlockManagerId]
// Mapping from block id to the set of block managers that have the block.
private val blockLocations = new JHashMap[BlockId, mutable.HashSet[BlockManagerId]]
...
}
注册BlockManagerInfo
// 注册blockManager
private def register(id: BlockManagerId, maxMemSize: Long, slaveEndpoint: RpcEndpointRef) {
val time = System.currentTimeMillis()
// 判断是否注册过BlocManager
if (!blockManagerInfo.contains(id)) {
// 根据executorId查找BlockManagerId
blockManagerIdByExecutor.get(id.executorId) match {
// 这里有一个安全判断,如果BlockManagerInfo map 中没有BlockManagerId
// 那么对应的blockManagerIdByExecutorId map 也必须没有
case Some(oldId) =>
// A block manager of the same executor already exists, so remove it (assumed dead)
logError("Got two different block manager registrations on same executor - "
+ s" will replace old one $oldId with new one $id")
// 所以,在这里做一下清理,移除executorId相关的BlockManagerInfo
removeExecutor(id.executorId)
case None =>
}
logInfo("Registering block manager %s with %s RAM, %s".format(
id.hostPort, Utils.bytesToString(maxMemSize), id))
// 保存一份executorId到BlockManagerId的映射
blockManagerIdByExecutor(id.executorId) = id
// 为BlockManagerId创建一个BlockManagerInfo
//并保存一份BlockManagerId到BlockManagerInfo的映射
blockManagerInfo(id) = new BlockManagerInfo(
id, System.currentTimeMillis(), maxMemSize, slaveEndpoint)
}
listenerBus.post(SparkListenerBlockManagerAdded(time, id, maxMemSize))
}
更新BlockManagerInfo
/ 更新blockInfo, 即每个BlockManager上的block发生了变化
// 都要发送updateBlockInfo请求,到BlockManagerMaster对BlockInfo进行更新
private def updateBlockInfo(
blockManagerId: BlockManagerId,
blockId: BlockId,
storageLevel: StorageLevel,
memSize: Long,
diskSize: Long,
externalBlockStoreSize: Long): Boolean = {
if (!blockManagerInfo.contains(blockManagerId)) {
if (blockManagerId.isDriver && !isLocal) {
// We intentionally do not register the master (except in local mode),
// so we should not indicate failure.
return true
} else {
return false
}
}
if (blockId == null) {
blockManagerInfo(blockManagerId).updateLastSeenMs()
return true
}
blockManagerInfo(blockManagerId).updateBlockInfo(
blockId, storageLevel, memSize, diskSize, externalBlockStoreSize)
// 每一个block可能会在多个BlockManager上
// 根据block的存储级别StoreLevel,设置为_2的,就需要将block 备份到其他BlockManager上
// location map 保存了每个blockId的对应的BlockManagerId集合
// 因为使用的是set存储,所以自动去重
var locations: mutable.HashSet[BlockManagerId] = null
if (blockLocations.containsKey(blockId)) {
locations = blockLocations.get(blockId)
} else {
locations = new mutable.HashSet[BlockManagerId]
blockLocations.put(blockId, locations)
}
if (storageLevel.isValid) {
locations.add(blockManagerId)
} else {
locations.remove(blockManagerId)
}
// Remove the block from master tracking if it has been removed on all slaves.
if (locations.size == 0) {
blockLocations.remove(blockId)
}
true
}
private[spark] class BlockManagerInfo(
val blockManagerId: BlockManagerId,
timeMs: Long,
val maxMem: Long,
val slaveEndpoint: RpcEndpointRef)
extends Logging {
...
// Mapping from block id to its status.
// blockId-BlockStatus的映射
private val _blocks = new JHashMap[BlockId, BlockStatus]
...
def updateBlockInfo(
blockId: BlockId,
storageLevel: StorageLevel,
memSize: Long,
diskSize: Long,
externalBlockStoreSize: Long) {
updateLastSeenMs()
//判断内部是否有block
if (_blocks.containsKey(blockId)) {
// The block exists on the slave already.
val blockStatus: BlockStatus = _blocks.get(blockId)
val originalLevel: StorageLevel = blockStatus.storageLevel
val originalMemSize: Long = blockStatus.memSize
// 判断storeLevel是否使用内存,是就给剩余内存数量加上当前内存数量
if (originalLevel.useMemory) {
_remainingMem += originalMemSize
}
}
// 给block创建一个BlockStatus,然后根据持久化级别,对相应的内存资源进行计算
if (storageLevel.isValid) {
/* isValid means it is either stored in-memory, on-disk or on-externalBlockStore.
* The memSize here indicates the data size in or dropped from memory,
* externalBlockStoreSize here indicates the data size in or dropped from externalBlockStore,
* and the diskSize here indicates the data size in or dropped to disk.
* They can be both larger than 0, when a block is dropped from memory to disk.
* Therefore, a safe way to set BlockStatus is to set its info in accurate modes. */
var blockStatus: BlockStatus = null
if (storageLevel.useMemory) {
blockStatus = BlockStatus(storageLevel, memSize, 0, 0)
_blocks.put(blockId, blockStatus)
_remainingMem -= memSize
logInfo("Added %s in memory on %s (size: %s, free: %s)".format(
blockId, blockManagerId.hostPort, Utils.bytesToString(memSize),
Utils.bytesToString(_remainingMem)))
}
if (storageLevel.useDisk) {
blockStatus = BlockStatus(storageLevel, 0, diskSize, 0)
_blocks.put(blockId, blockStatus)
logInfo("Added %s on disk on %s (size: %s)"