一、BlockManager初始化
在SparkEnv中创建
BlockManager负责对Block的管理, 只有在initialize()调用之后才会生效,
//不会立即生效,
// NB: blockManager is not valid until initialize() is called later.
val blockManager = new BlockManager(executorId, rpcEnv, blockManagerMaster,
serializerManager, conf, memoryManager, mapOutputTracker, shuffleManager,
blockTransferService, securityManager, numUsableCores)
二、BlockManager的实现
2.1、BlockManager的组成
- 磁盘块管理器DiskBlockManager;
- BlockInfoManager
- 持有一个HashMap, 维护blockId和BlockInfo的对应关系
- 内存存储 MemoryStore
- 磁盘存储 DiskStore
- ShuffleClient
- ExternalShuffleClient
- BlockTransferService
- 创建BlockManagerSlaveEndpoint, 持有对应的EndpointRef
- BlockManagerMaster 对存在与所有的Executor上的BlockManager统一管理
2.2、BlockManager的初始化
/**
* Initializes the BlockManager with the given appId. This is not performed in the constructor as
* the appId may not be known at BlockManager instantiation time (in particular for the driver,
* where it is only learned after registration with the TaskScheduler).
*
* This method initializes the BlockTransferService and ShuffleClient, registers with the
* BlockManagerMaster, starts the BlockManagerWorker endpoint, and registers with a local shuffle
* service if configured.
*/
def initialize(appId: String): Unit = {
//BlockTransferService初始化
blockTransferService.init(this)
// ShuffleClient的初始化, 默认是BlockTransferService, 当有外部ShuffleService是,调用外部ShuffleService的初始化方法
shuffleClient.init(appId)
// block副本策略
blockReplicationPolicy = {
val priorityClass = conf.get(
"spark.storage.replication.policy", classOf[RandomBlockReplicationPolicy].getName)
val clazz = Utils.classForName(priorityClass)
val ret = clazz.newInstance.asInstanceOf[BlockReplicationPolicy]
logInfo(s"Using $priorityClass for block replication policy")
ret
}
// BlockManagerId的创建, BlockManagerId的blockManagerIdCache维护已有的blockManagerId
val id =
BlockManagerId(executorId, blockTransferService.hostName, blockTransferService.port, None)
//向BlockManagerMaster注册 id saveEndpointRef
val idFromMaster = master.registerBlockManager(
id,
maxMemory,
slaveEndpoint) //传入的是BlockManagerSlaveEndpoint Ref
blockManagerId = if (idFromMaster != null) idFromMaster else id
// 创建ShuffleServerId
shuffleServerId = if (externalShuffleServiceEnabled) {
//外部shufflelient的host, 端口来初始化shuffleServerId
logInfo(s"external shuffle service port = $externalShuffleServicePort")
BlockManagerId(executorId, blockTransferService.hostName, externalShuffleServicePort)
} else {
blockManagerId
}
//注册外部ShuffleServer
// Register Executors' configuration with the local shuffle service, if one should exist.
if (externalShuffleServiceEnabled && !blockManagerId.isDriver) {
registerWithExternalShuffleServer()
}
logInfo(s"Initialized BlockManager: $blockManagerId")
}
2.2.1、向BlockManagerMaster注册BlockManager
//向BlockManagerMaster注册 id saveEndpointRef
val idFromMaster = master.registerBlockManager(
id,
maxMemory,
slaveEndpoint) //传入的是BlockManagerSlaveEndpoint Ref
2.2.1.1、实际调用的是BlockMangerMaster的registerBlockManager
/**
* Register the BlockManager's id with the driver. The input BlockManagerId does not contain
* topology information. This information is obtained from the master and we respond with an
* updated BlockManagerId fleshed out with this information.
*/
def registerBlockManager(
blockManagerId: BlockManagerId,
maxMemSize: Long,
slaveEndpoint: RpcEndpointRef): BlockManagerId = {
logInfo(s"Registering BlockManager $blockManagerId")
//通过BlockManagerMasterEndpoint更新blockManagerId
val updatedId = driverEndpoint.askWithRetry[BlockManagerId](
RegisterBlockManager(blockManagerId, maxMemSize, slaveEndpoint))
logInfo(s"Registered BlockManager $updatedId")
updatedId
}
三、block操作方法
3.1、移除内存方法 dropFromMemory()
/**
* Drop a block from memory, possibly putting it on disk if applicable. Called when the memory
* store reaches its limit and needs to free up space.
*
* If `data` is not put on disk, it won't be created.
*
* The caller of this method must hold a write lock on the block before calling this method.
* This method does not release the write lock.
*
* @return the block's new effective StorageLevel.
*/
private[storage] override def dropFromMemory[T: ClassTag](
blockId: BlockId,
data: () => Either[Array[T], ChunkedByteBuffer]): StorageLevel = {
logInfo(s"Dropping block $blockId from memory")
// 通过BlockInfoManager的infos(new mutable.HashMap[BlockId, BlockInfo]), 检查对应的BlockInfo
val info = blockInfoManager.assertBlockIsLockedForWriting(blockId)
var blockIsUpdated = false
//如果存在BlockInfo
val level = info.level
// Drop to disk, if storage level requires
//写到disk上, storeageLevel允许的化
if (level.useDisk && !diskStore.contains(blockId)) {
logInfo(s"Writing block $blockId to disk")
//
data() match {
case Left(elements) =>
diskStore.put(blockId) { fileOutputStream =>
serializerManager.dataSerializeStream(
blockId,
fileOutputStream,
elements.toIterator)(info.classTag.asInstanceOf[ClassTag[T]])
}
case Right(bytes) =>
diskStore.putBytes(blockId, bytes)
}
blockIsUpdated = true
}
// Actually drop from memory store
// 从内存中移除Block,
val droppedMemorySize =
if (memoryStore.contains(blockId)) memoryStore.getSize(blockId) else 0L
// 通过MemoryStore移除block
val blockIsRemoved = memoryStore.remove(blockId)
if (blockIsRemoved) {
blockIsUpdated = true
} else {
logWarning(s"Block $blockId could not be dropped from memory as it does not exist")
}
//获取Block的当前状态
val status = getCurrentBlockStatus(blockId, info)
if (info.tellMaster) {
// 上报block的状态给 BlockManagerMaster
reportBlockStatus(blockId, status, droppedMemorySize)
}
if (blockIsUpdated) {
//更新Task Memtircs
addUpdatedBlockStatusToTaskMetrics(blockId, status)
}
status.storageLevel
}
3.1.1、reportBlockStatus
reportBlockStatus用于向BlockManagerMasterEndpoint报告Block的状态并且重新注册BlockManager。
1.调用tryToReportBlockStatus方法,tryToReportBlockStatus调用了BlockManagerMasterEndpoint的updateBlockInfo,来发送更新信息的消息。
2.如果Blockmanager没有向BlockManagerMasterEndpoint注册,则调用了asyncReregister方法,asyncReregister调用了reregister,reregister实际调用了BlockManagerMasterEndpoint的registerBlockManager和reportAllBlocks,而reportAllBlocks也调用了tryToReportBlockStatus来汇报信息。
/**
* Tell the master about the current storage status of a block. This will send a block update
* message reflecting the current status, *not* the desired storage level in its block info.
* For example, a block with MEMORY_AND_DISK set might have fallen out to be only on disk.
*
* droppedMemorySize exists to account for when the block is dropped from memory to disk (so
* it is still valid). This ensures that update in master will compensate for the increase in
* memory on slave.
*/
private def reportBlockStatus(
blockId: BlockId,
status: BlockStatus,
droppedMemorySize: Long = 0L): Unit = {
val needReregister = !tryToReportBlockStatus(blockId, status, droppedMemorySize)
if (needReregister) {
logInfo(s"Got told to re-register updating block $blockId")
// Re-registering will report our new block for free.
asyncReregister()
}
logDebug(s"Told master about block $blockId")
}
3.2、putSinle实际调用putIterator
3.2.1、putIterator 实际调用doPutIterator
/**
* @return true if the block was stored or false if an error occurred.
*/
def putIterator[T: ClassTag](
blockId: BlockId,
values: Iterator[T],
level: StorageLevel,
tellMaster: Boolean = true): Boolean = {
require(values != null, "Values is null")
doPutIterator(blockId, () => values, level, implicitly[ClassTag[T]], tellMaster) match {
case None =>
true
case Some(iter) =>
// Caller doesn't care about the iterator values, so we can close the iterator here
// to free resources earlier
iter.close()
false
}
}
3.2.2、doPutIterator
优先写入MemoryStore, 如果内存不足, StoreageLevel允许useDisk, 将写入DiskStore
/**
* Put the given block according to the given level in one of the block stores, replicating
* the values if necessary.
*
* If the block already exists, this method will not overwrite it.
*
* @param keepReadLock if true, this method will hold the read lock when it returns (even if the
* block already exists). If false, this method will hold no locks when it
* returns.
* @return None if the block was already present or if the put succeeded, or Some(iterator)
* if the put failed.
*/
private def doPutIterator[T](
blockId: BlockId,
iterator: () => Iterator[T],
level: StorageLevel,
classTag: ClassTag[T],
tellMaster: Boolean = true,
keepReadLock: Boolean = false): Option[PartiallyUnrolledIterator[T]] = {
//实际调用doPut
doPut(blockId, level, classTag, tellMaster = tellMaster, keepReadLock = keepReadLock) { info =>
val startTimeMs = System.currentTimeMillis
var iteratorFromFailedMemoryStorePut: Option[PartiallyUnrolledIterator[T]] = None
// Size of the block in bytes
var size = 0L
if (level.useMemory) {
// Put it in memory first, even if it also has useDisk set to true;
// We will drop it to disk later if the memory store can't hold it.
if (level.deserialized) {
memoryStore.putIteratorAsValues(blockId, iterator(), classTag) match {
case Right(s) =>
size = s
case Left(iter) =>
// Not enough space to unroll this block; drop to disk if applicable
if (level.useDisk) {
logWarning(s"Persisting block $blockId to disk instead.")
diskStore.put(blockId) { fileOutputStream =>
serializerManager.dataSerializeStream(blockId, fileOutputStream, iter)(classTag)
}
size = diskStore.getSize(blockId)
} else {
iteratorFromFailedMemoryStorePut = Some(iter)
}
}
} else { // !level.deserialized
memoryStore.putIteratorAsBytes(blockId, iterator(), classTag, level.memoryMode) match {
case Right(s) =>
size = s
case Left(partiallySerializedValues) =>
// Not enough space to unroll this block; drop to disk if applicable
if (level.useDisk) {
logWarning(s"Persisting block $blockId to disk instead.")
diskStore.put(blockId) { fileOutputStream =>
partiallySerializedValues.finishWritingToStream(fileOutputStream)
}
size = diskStore.getSize(blockId)
} else {
iteratorFromFailedMemoryStorePut = Some(partiallySerializedValues.valuesIterator)
}
}
}
} else if (level.useDisk) {
diskStore.put(blockId) { fileOutputStream =>
serializerManager.dataSerializeStream(blockId, fileOutputStream, iterator())(classTag)
}
size = diskStore.getSize(blockId)
}
val putBlockStatus = getCurrentBlockStatus(blockId, info)
val blockWasSuccessfullyStored = putBlockStatus.storageLevel.isValid
if (blockWasSuccessfullyStored) {
// Now that the block is in either the memory or disk store, tell the master about it.
info.size = size
if (tellMaster && info.tellMaster) {
reportBlockStatus(blockId, putBlockStatus)
}
addUpdatedBlockStatusToTaskMetrics(blockId, putBlockStatus)
logDebug("Put block %s locally took %s".format(blockId, Utils.getUsedTimeMs(startTimeMs)))
if (level.replication > 1) {
val remoteStartTime = System.currentTimeMillis
val bytesToReplicate = doGetLocalBytes(blockId, info)
// [SPARK-16550] Erase the typed classTag when using default serialization, since
// NettyBlockRpcServer crashes when deserializing repl-defined classes.
// TODO(ekl) remove this once the classloader issue on the remote end is fixed.
val remoteClassTag = if (!serializerManager.canUseKryo(classTag)) {
scala.reflect.classTag[Any]
} else {
classTag
}
try {
replicate(blockId, bytesToReplicate, level, remoteClassTag)
} finally {
bytesToReplicate.unmap()
}
logDebug("Put block %s remotely took %s"
.format(blockId, Utils.getUsedTimeMs(remoteStartTime)))
}
}
assert(blockWasSuccessfullyStored == iteratorFromFailedMemoryStorePut.isEmpty)
iteratorFromFailedMemoryStorePut
}
}
3.2.3、最终调用的是doPut方法
/**
* Helper method used to abstract common code from [[doPutBytes()]] and [[doPutIterator()]].
*
* @param putBody a function which attempts the actual put() and returns None on success
* or Some on failure.
*/
private def doPut[T](
blockId: BlockId,
level: StorageLevel,
classTag: ClassTag[_],
tellMaster: Boolean,
keepReadLock: Boolean)(putBody: BlockInfo => Option[T]): Option[T] = {
require(blockId != null, "BlockId is null")
require(level != null && level.isValid, "StorageLevel is null or invalid")
val putBlockInfo = {
val newInfo = new BlockInfo(level, classTag, tellMaster)
if (blockInfoManager.lockNewBlockForWriting(blockId, newInfo)) {
newInfo
} else {
logWarning(s"Block $blockId already exists on this machine; not re-adding it")
if (!keepReadLock) {
// lockNewBlockForWriting returned a read lock on the existing block, so we must free it:
releaseLock(blockId)
}
return None
}
}
val startTimeMs = System.currentTimeMillis
var exceptionWasThrown: Boolean = true
val result: Option[T] = try {
val res = putBody(putBlockInfo)
exceptionWasThrown = false
if (res.isEmpty) {
// the block was successfully stored
if (keepReadLock) {
blockInfoManager.downgradeLock(blockId)
} else {
blockInfoManager.unlock(blockId)
}
} else {
removeBlockInternal(blockId, tellMaster = false)
logWarning(s"Putting block $blockId failed")
}
res
} finally {
// This cleanup is performed in a finally block rather than a `catch` to avoid having to
// catch and properly re-throw InterruptedException.
if (exceptionWasThrown) {
logWarning(s"Putting block $blockId failed due to an exception")
// If an exception was thrown then it's possible that the code in `putBody` has already
// notified the master about the availability of this block, so we need to send an update
// to remove this block location.
removeBlockInternal(blockId, tellMaster = tellMaster)
// The `putBody` code may have also added a new block status to TaskMetrics, so we need
// to cancel that out by overwriting it with an empty block status. We only do this if
// the finally block was entered via an exception because doing this unconditionally would
// cause us to send empty block statuses for every block that failed to be cached due to
// a memory shortage (which is an expected failure, unlike an uncaught exception).
addUpdatedBlockStatusToTaskMetrics(blockId, BlockStatus.empty)
}
}
if (level.replication > 1) {
logDebug("Putting block %s with replication took %s"
.format(blockId, Utils.getUsedTimeMs(startTimeMs)))
} else {
logDebug("Putting block %s without replication took %s"
.format(blockId, Utils.getUsedTimeMs(startTimeMs)))
}
result
}
3.3、复制副本方法replicate
/**
* 数据块备份
* Replicate block to another node. Note that this is a blocking call that returns after
* the block has been replicated.
*/
private def replicate(
blockId: BlockId,
data: ChunkedByteBuffer,
level: StorageLevel,
classTag: ClassTag[_]): Unit = {
//最大失败次数
val maxReplicationFailures = conf.getInt("spark.storage.maxReplicationFailures", 1)
val tLevel = StorageLevel(
useDisk = level.useDisk,
useMemory = level.useMemory,
useOffHeap = level.useOffHeap,
deserialized = level.deserialized,
replication = 1)
// 需要复制的副本
val numPeersToReplicateTo = level.replication - 1
val startTime = System.nanoTime
//已作为备份的BlockManagerId缓存
var peersReplicatedTo = mutable.HashSet.empty[BlockManagerId]
// 失败的BlockManagerId缓存
var peersFailedToReplicateTo = mutable.HashSet.empty[BlockManagerId]
// 失败次数
var numFailures = 0
// 可作为备份的BlockManagerId缓存
var peersForReplication = blockReplicationPolicy.prioritize(
blockManagerId, //当前的BlockManagerId
getPeers(false), //获取系统中的所有BlockManagerId
mutable.HashSet.empty,
blockId,
numPeersToReplicateTo)
// 开始复制副本
while (numFailures <= maxReplicationFailures &&
!peersForReplication.isEmpty &&
peersReplicatedTo.size != numPeersToReplicateTo) {
val peer = peersForReplication.head
try {
val onePeerStartTime = System.nanoTime
logTrace(s"Trying to replicate $blockId of ${data.size} bytes to $peer")
// 通过blockTransferService, 上传Block到远程节点
blockTransferService.uploadBlockSync(
peer.host,
peer.port,
peer.executorId,
blockId,
new NettyManagedBuffer(data.toNetty),
tLevel,
classTag)
logTrace(s"Replicated $blockId of ${data.size} bytes to $peer" +
s" in ${(System.nanoTime - onePeerStartTime).toDouble / 1e6} ms")
// 然后又从尾部的结点开始复制
peersForReplication = peersForReplication.tail
// 复制成功
peersReplicatedTo += peer
} catch {
case NonFatal(e) =>
logWarning(s"Failed to replicate $blockId to $peer, failure #$numFailures", e)
peersFailedToReplicateTo += peer
// we have a failed replication, so we get the list of peers again
// we don't want peers we have already replicated to and the ones that
// have failed previously
val filteredPeers = getPeers(true).filter { p =>
!peersFailedToReplicateTo.contains(p) && !peersReplicatedTo.contains(p)
}
numFailures += 1
peersForReplication = blockReplicationPolicy.prioritize(
blockManagerId,
filteredPeers,
peersReplicatedTo,
blockId,
numPeersToReplicateTo - peersReplicatedTo.size)
}
}
logDebug(s"Replicating $blockId of ${data.size} bytes to " +
s"${peersReplicatedTo.size} peer(s) took ${(System.nanoTime - startTime) / 1e6} ms")
if (peersReplicatedTo.size < numPeersToReplicateTo) {
logWarning(s"Block $blockId replicated to only " +
s"${peersReplicatedTo.size} peer(s) instead of $numPeersToReplicateTo peers")
}
logDebug(s"block $blockId replicated to ${peersReplicatedTo.mkString(", ")}")
}
3.3.1、获取BlockManagerId
/**
* Get peer block managers in the system.
*/
private def getPeers(forceFetch: Boolean): Seq[BlockManagerId] = {
peerFetchLock.synchronized {
val cachedPeersTtl = conf.getInt("spark.storage.cachedPeersTtl", 60 * 1000) // milliseconds
val timeout = System.currentTimeMillis - lastPeerFetchTime > cachedPeersTtl
if (cachedPeers == null || forceFetch || timeout) {
// 从BlockManagerMaster获取所有BlockManagerId
// 实际调用EndPointRef 向Endpoint请求
cachedPeers = master.getPeers(blockManagerId).sortBy(_.hashCode)
lastPeerFetchTime = System.currentTimeMillis
logDebug("Fetched peers from master: " + cachedPeers.mkString("[", ",", "]"))
}
cachedPeers
}
}
3.3.2、 blockTransferService.uploadBlockSync
实际调用的是BlockTransferService的 uploadBlock方法
3.4、getDiskWriter
用于创建DiskBlockObjectWriter, 直接将JVM对象写到disk上
- spark.shuffle.sync 决定操作是同步还是异步
/**
* A short circuited method to get a block writer that can write data directly to disk.
* The Block will be appended to the File specified by filename. Callers should handle error
* cases.
*/
def getDiskWriter(
blockId: BlockId,
file: File,
serializerInstance: SerializerInstance,
bufferSize: Int,
writeMetrics: ShuffleWriteMetrics): DiskBlockObjectWriter = {
// 同步 or 异步
val syncWrites = conf.getBoolean("spark.shuffle.sync", false)
//创建DiskBlockObjectWriter
new DiskBlockObjectWriter(file, serializerManager, serializerInstance, bufferSize,
syncWrites, writeMetrics, blockId)
}
3.5、getBlockData 获取本地Block数据方法
- Block是ShuffleMapTask的输出
- 多个partition的中间数据写入同一个文件, 如何读取多个partition的中间结果,IndexShuffleBlockManager的getBlockData()解决这个问题
- Block是ResultTask的输出
- 使用getLocalBytes获取本地中间结果数据
- blockId找不到, 上报blockId的状态
/**
* Interface to get local block data. Throws an exception if the block cannot be found or
* cannot be read successfully.
*/
override def getBlockData(blockId: BlockId): ManagedBuffer = {
if (blockId.isShuffle) { //ShuffleMapTask
shuffleManager.shuffleBlockResolver.getBlockData(blockId.asInstanceOf[ShuffleBlockId])
} else {
getLocalBytes(blockId) match {
case Some(buffer) => new BlockManagerManagedBuffer(blockInfoManager, blockId, buffer)
case None => //blockId找不到, 上报blockId的状态
// If this block manager receives a request for a block that it doesn't have then it's
// likely that the master has outdated block statuses for this block. Therefore, we send
// an RPC so that this block is marked as being unavailable from this block manager.
reportBlockStatus(blockId, BlockStatus.empty)
throw new BlockNotFoundException(blockId.toString)
}
}
}
3.5.1、多个partition的中间数据写入同一个文件, 如何读取多个partition的中间结果,IndexShuffleBlockManager的getBlockData()解决这个问题
3.5.2 Block是ResultTask的输出
/**
* Get block from the local block manager as serialized bytes.
*/
def getLocalBytes(blockId: BlockId): Option[ChunkedByteBuffer] = {
logDebug(s"Getting local block $blockId as bytes")
// As an optimization for map output fetches, if the block is for a shuffle, return it
// without acquiring a lock; the disk store never deletes (recent) items so this should work
if (blockId.isShuffle) { // 此处为甚麽再次判断blockId是shuffle输出? 3.5.1中不就是处理shuffle Block的吗? 而且此处的和3.5.1处理是一样,由shuffleBlockResolver处理
val shuffleBlockResolver = shuffleManager.shuffleBlockResolver
// TODO: This should gracefully handle case where local block is not available. Currently
// downstream code will throw an exception.
Option(
new ChunkedByteBuffer(
shuffleBlockResolver.getBlockData(blockId.asInstanceOf[ShuffleBlockId]).nioByteBuffer()))
} else {
blockInfoManager.lockForReading(blockId).map { info => doGetLocalBytes(blockId, info) }
}
}
3.5.2.1、实际调用doGetLocalBytes
当reduce任务与map任务处于同一节点, 不需要远程拉取
blockInfoManager.lockForReading(blockId).map { info => doGetLocalBytes(blockId, info) }
3.5.2.2、doGetLocalBytes
- block 为反序列化, 优先从DIskStore中获取
- 否则, 优先从MemoryStore中获取
/**
* Get block from the local block manager as serialized bytes.
*
* Must be called while holding a read lock on the block.
* Releases the read lock upon exception; keeps the read lock upon successful return.
*/
private def doGetLocalBytes(blockId: BlockId, info: BlockInfo): ChunkedByteBuffer = {
val level = info.level
logDebug(s"Level for block $blockId is $level")
// In order, try to read the serialized bytes from memory, then from disk, then fall back to
// serializing in-memory objects, and, finally, throw an exception if the block does not exist.
if (level.deserialized) {
// Try to avoid expensive serialization by reading a pre-serialized copy from disk:
if (level.useDisk && diskStore.contains(blockId)) {
// Note: we purposely do not try to put the block back into memory here. Since this branch
// handles deserialized blocks, this block may only be cached in memory as objects, not
// serialized bytes. Because the caller only requested bytes, it doesn't make sense to
// cache the block's deserialized objects since that caching may not have a payoff.
diskStore.getBytes(blockId)
} else if (level.useMemory && memoryStore.contains(blockId)) {
// The block was not found on disk, so serialize an in-memory copy:
serializerManager.dataSerializeWithExplicitClassTag(
blockId, memoryStore.getValues(blockId).get, info.classTag)
} else {
handleLocalReadFailure(blockId)
}
} else { // storage level is serialized
if (level.useMemory && memoryStore.contains(blockId)) {
memoryStore.getBytes(blockId).get
} else if (level.useDisk && diskStore.contains(blockId)) {
val diskBytes = diskStore.getBytes(blockId)
maybeCacheDiskBytesInMemory(info, blockId, level, diskBytes).getOrElse(diskBytes)
} else {
handleLocalReadFailure(blockId)
}
}
}
3.6、从远程拉取Block数据
3.6.1、getRemoteValues, 实际调用getRemoteBytes, 然后序列化, 返回BlockResult对象
/**
* Get block from remote block managers.
*
* This does not acquire a lock on this block in this JVM.
*/
private def getRemoteValues[T: ClassTag](blockId: BlockId): Option[BlockResult] = {
val ct = implicitly[ClassTag[T]]
getRemoteBytes(blockId).map { data =>
val values =
serializerManager.dataDeserializeStream(blockId, data.toInputStream(dispose = true))(ct)
new BlockResult(values, DataReadMethod.Network, data.size)
}
}
3.6.2、getRemoteBytes
- 1 通过 getLocations获取位置信息
- 2、遍历locationIterator, 通过blockTransferService.fetchBlockSync获取数据
- 3、如果失败, 重试下一个位置
/**
* Get block from remote block managers as serialized bytes.
*/
def getRemoteBytes(blockId: BlockId): Option[ChunkedByteBuffer] = {
logDebug(s"Getting remote block $blockId")
require(blockId != null, "BlockId is null")
var runningFailureCount = 0
var totalFailureCount = 0
val locations = getLocations(blockId)
val maxFetchFailures = locations.size
var locationIterator = locations.iterator
while (locationIterator.hasNext) {
val loc = locationIterator.next()
logDebug(s"Getting remote block $blockId from $loc")
val data = try {
blockTransferService.fetchBlockSync(
loc.host, loc.port, loc.executorId, blockId.toString).nioByteBuffer()
} catch {
case NonFatal(e) =>
runningFailureCount += 1
totalFailureCount += 1
if (totalFailureCount >= maxFetchFailures) {
// Give up trying anymore locations. Either we've tried all of the original locations,
// or we've refreshed the list of locations from the master, and have still
// hit failures after trying locations from the refreshed list.
logWarning(s"Failed to fetch block after $totalFailureCount fetch failures. " +
s"Most recent failure cause:", e)
return None
}
logWarning(s"Failed to fetch remote block $blockId " +
s"from $loc (failed attempt $runningFailureCount)", e)
// If there is a large number of executors then locations list can contain a
// large number of stale entries causing a large number of retries that may
// take a significant amount of time. To get rid of these stale entries
// we refresh the block locations after a certain number of fetch failures
if (runningFailureCount >= maxFailuresBeforeLocationRefresh) {
locationIterator = getLocations(blockId).iterator
logDebug(s"Refreshed locations from the driver " +
s"after ${runningFailureCount} fetch failures.")
runningFailureCount = 0
}
// This location failed, so we retry fetch from a different one by returning null here
null
}
if (data != null) {
return Some(new ChunkedByteBuffer(data))
}
logDebug(s"The value of block $blockId is null")
}
logDebug(s"Block $blockId not found")
None
}