Spark源码——BlockManager

最新推荐文章于 2024-02-08 07:45:00 发布

阿松0311

最新推荐文章于 2024-02-08 07:45:00 发布

阅读量2.1k

点赞数

分类专栏： Spark 文章标签： spark scala 大数据

本文链接：https://blog.csdn.net/weixin_44773984/article/details/121984679

版权

Spark 专栏收录该内容

16 篇文章 1 订阅

订阅专栏

在这里插入图片描述

（图片来源：北风网）

源码所在目录：
在这里插入图片描述

首先进入BlockManagerMasterEndpoint看看

/**
 * BlockManagerMasterEndpoint is an [[ThreadSafeRpcEndpoint]] on the master node to track statuses
 * of all slaves' block managers.
 */

BlockManagerMasterEndpoint 是主节点上的 [[ThreadSafeRpcEndpoint]]，用于跟踪所有从节点的BlockManager的状态。

一些数据结构

class BlockManagerMasterEndpoint(
    override val rpcEnv: RpcEnv,
    val isLocal: Boolean,
    conf: SparkConf,
    listenerBus: LiveListenerBus)
  extends ThreadSafeRpcEndpoint with Logging {

//一些数据结构
  // Mapping from block manager id to the block manager's information.
  //维护了blockManagerInfo，就是blockManager的元数据
  //管理了blockManager id到block manager的映射
  private val blockManagerInfo = new mutable.HashMap[BlockManagerId, BlockManagerInfo]

  // Mapping from executor ID to block manager ID.
  //维护了executor id到block manager 的映射，因为一个executor和一个block manager相关联的
  private val blockManagerIdByExecutor = new mutable.HashMap[String, BlockManagerId]

  // Mapping from block id to the set of block managers that have the block.
  //维护了block id到block集的映射，因为一个block可能在多个blockmanager上
  private val blockLocations = new JHashMap[BlockId, mutable.HashSet[BlockManagerId]]

  private val askThreadPool = ThreadUtils.newDaemonCachedThreadPool("block-manager-ask-thread-pool")
  private implicit val askExecutionContext = ExecutionContext.fromExecutorService(askThreadPool)

  private val topologyMapper = {
    val topologyMapperClassName = conf.get(
      "spark.storage.replication.topologyMapper", classOf[DefaultTopologyMapper].getName)
    val clazz = Utils.classForName(topologyMapperClassName)
    val mapper =
      clazz.getConstructor(classOf[SparkConf]).newInstance(conf).asInstanceOf[TopologyMapper]
    logInfo(s"Using $topologyMapperClassName for getting topology information")
    mapper
  }
//打日志，BlockManagerMasterEndpoint启动
  logInfo("BlockManagerMasterEndpoint up")

register方法，就是BlockManagerMaster注册BlockManager的代码

  /**
   * Returns the BlockManagerId with topology information populated, if available.
   * 如果可用的话，返回填充了拓扑信息的BlockManagerId
   */
  private def register(
      idWithoutTopologyInfo: BlockManagerId,
      maxMemSize: Long,
      slaveEndpoint: RpcEndpointRef): BlockManagerId = {
    // the dummy id is not expected to contain the topology information.
    // we get that info here and respond back with a more fleshed out block manager id
    //获取blockmanager id
    val id = BlockManagerId(
      idWithoutTopologyInfo.executorId,
      idWithoutTopologyInfo.host,
      idWithoutTopologyInfo.port,
      topologyMapper.getTopologyForHost(idWithoutTopologyInfo.host))

    val time = System.currentTimeMillis()
    
    if (!blockManagerInfo.contains(id)) {
    //这里其实是做安全检查
    //就是说，如果blockManagerInfo中没有blockmanagerId
    //那么blockManagerIdByExecutor中也应该没有blockmanagerId
    //如果有，应该去除
    //如果没有，就没事
      blockManagerIdByExecutor.get(id.executorId) match {
        case Some(oldId) =>
        //如果一个executor上出现两个block manager，就代替掉老的那个
          // A block manager of the same executor already exists, so remove it (assumed dead)
          logError("Got two different block manager registrations on same executor - "
              + s" will replace old one $oldId with new one $id")
          //移除掉executor id相关的block manager
          removeExecutor(id.executorId)
        case None =>
      }
      
      logInfo("Registering block manager %s with %s RAM, %s".format(
        id.hostPort, Utils.bytesToString(maxMemSize), id))
	//安全检查完事了
	//该注册了
	//保存一份executor和blockmanager的id映射
      blockManagerIdByExecutor(id.executorId) = id
	//保存blockmanager id到blockmanagerInfo里
      blockManagerInfo(id) = new BlockManagerInfo(
        id, System.currentTimeMillis(), maxMemSize, slaveEndpoint)
    }
    listenerBus.post(SparkListenerBlockManagerAdded(time, id, maxMemSize))
    id
  }

OK，注册完事

看看更新blockInfo，每个blockmanager上，如果block发生变化，那么就要发送updateBlockInfo请求来BlockManagerMaster这进行blockInfo的更新。

private def updateBlockInfo(
      blockManagerId: BlockManagerId,
      blockId: BlockId,
      storageLevel: StorageLevel,
      memSize: Long,
      diskSize: Long): Boolean = {

    if (!blockManagerInfo.contains(blockManagerId)) {
      if (blockManagerId.isDriver && !isLocal) {
        // We intentionally do not register the master (except in local mode),
        // so we should not indicate failure.
        return true
      } else {
        return false
      }
    }

    if (blockId == null) {
      blockManagerInfo(blockManagerId).updateLastSeenMs()
      return true
    }
	//在这更新
    blockManagerInfo(blockManagerId).updateBlockInfo(blockId, storageLevel, memSize, diskSize)
	//每个block可能存在多个的blockmanager中，因为如果将storagelevel设置成带_2的这种，就需要将block复制一份放到其他blockmanager上
	//blockLocations map保存了每个block id对应的blockmanager set集合
	//所以这里会更新blockLocations的信息，因为用set存储，可以自动去重
    var locations: mutable.HashSet[BlockManagerId] = null
    
    if (blockLocations.containsKey(blockId)) {
      locations = blockLocations.get(blockId)
    } else {
      locations = new mutable.HashSet[BlockManagerId]
      blockLocations.put(blockId, locations)
    }

    if (storageLevel.isValid) {
      locations.add(blockManagerId)
    } else {
      locations.remove(blockManagerId)
    }

    // Remove the block from master tracking if it has been removed on all slaves.
    if (locations.size == 0) {
      blockLocations.remove(blockId)
    }
    true
  }

进入blockManagerInfo.updateBlockInfo看看

 def updateBlockInfo(
      blockId: BlockId,
      storageLevel: StorageLevel,
      memSize: Long,
      diskSize: Long) {

    updateLastSeenMs()
	//如果内部已经有这个block
    if (_blocks.containsKey(blockId)) {
      // The block exists on the slave already.
      val blockStatus: BlockStatus = _blocks.get(blockId)
      val originalLevel: StorageLevel = blockStatus.storageLevel
      val originalMemSize: Long = blockStatus.memSize
//如果storeLevel是用内存，则剩余内存加上当前内存量
      if (originalLevel.useMemory) {
        _remainingMem += originalMemSize
      }
    }
//给block创建一份blockStatus，根据其持久化级别，对相应的资源进行计算
    if (storageLevel.isValid) {
      /* isValid means it is either stored in-memory or on-disk.
       * The memSize here indicates the data size in or dropped from memory,
       * externalBlockStoreSize here indicates the data size in or dropped from externalBlockStore,
       * and the diskSize here indicates the data size in or dropped to disk.
       * They can be both larger than 0, when a block is dropped from memory to disk.
       * Therefore, a safe way to set BlockStatus is to set its info in accurate modes. */
      var blockStatus: BlockStatus = null
      if (storageLevel.useMemory) {
      //如果内存
        blockStatus = BlockStatus(storageLevel, memSize = memSize, diskSize = 0)
        //把所占资源去掉
        _blocks.put(blockId, blockStatus)
        _remainingMem -= memSize
        logInfo("Added %s in memory on %s (size: %s, free: %s)".format(
          blockId, blockManagerId.hostPort, Utils.bytesToString(memSize),
          Utils.bytesToString(_remainingMem)))
      }
      if (storageLevel.useDisk) {
        blockStatus = BlockStatus(storageLevel, memSize = 0, diskSize = diskSize)
        _blocks.put(blockId, blockStatus)
        logInfo("Added %s on disk on %s (size: %s)".format(
          blockId, blockManagerId.hostPort, Utils.bytesToString(diskSize)))
      }	
      if (!blockId.isBroadcast && blockStatus.isCached) {
        _cachedBlocks += blockId
      }
    } else if (_blocks.containsKey(blockId)) {
    //如果storageLevel非法，且之前的blockid保存过
    //则把block remove掉
      // If isValid is not true, drop the block.
      val blockStatus: BlockStatus = _blocks.get(blockId)
      _blocks.remove(blockId)
      _cachedBlocks -= blockId
      if (blockStatus.storageLevel.useMemory) {
        logInfo("Removed %s on %s in memory (size: %s, free: %s)".format(
          blockId, blockManagerId.hostPort, Utils.bytesToString(blockStatus.memSize),
          Utils.bytesToString(_remainingMem)))
      }
      if (blockStatus.storageLevel.useDisk) {
        logInfo("Removed %s on %s on disk (size: %s)".format(
          blockId, blockManagerId.hostPort, Utils.bytesToString(blockStatus.diskSize)))
      }
    }
  }

总之，BlockManagerMasterEndPoint就是维护各个executor的BlockManager的元数据BlockManagerInfo，BlockStatus的

接下来看看BlockManager：

/**
 * Manager running on every node (driver and executors) which provides interfaces for putting and
 * retrieving blocks both locally and remotely into various stores (memory, disk, and off-heap).
 *
 * Note that [[initialize()]] must be called before the BlockManager is usable.
 */

运行在每个节点（driver和executor）上的manager，它提供用于在本地和远程将块放入和检索到各种存储（内存、磁盘和堆外）的接口。

首先看它的initialize方法

/**
   * Initializes the BlockManager with the given appId. This is not performed in the constructor as
   * the appId may not be known at BlockManager instantiation time (in particular for the driver,
   * where it is only learned after registration with the TaskScheduler).
   *使用给定的 appId 初始化 BlockManager。
   这不会在构造函数中执行，因为在 BlockManager 实例化时可能不知道 appId（特别是对于driver，只有在向 TaskScheduler 注册后才能获知）。
   * This method initializes the BlockTransferService and ShuffleClient, registers with the
   * BlockManagerMaster, starts the BlockManagerWorker endpoint, and registers with a local shuffle
   * service if configured.
   * 此方法初始化 BlockTransferService 和 ShuffleClient，
   * 向 BlockManagerMaster 注册，启动 BlockManagerWorker 端点，
   * 并在配置后向本地 shuffle 服务注册
   */
  def initialize(appId: String): Unit = {
  //首先初始化BlockTransferService 和 ShuffleClient
    blockTransferService.init(this)
    shuffleClient.init(appId)

    blockReplicationPolicy = {
      val priorityClass = conf.get(
        "spark.storage.replication.policy", classOf[RandomBlockReplicationPolicy].getName)
      val clazz = Utils.classForName(priorityClass)
      val ret = clazz.newInstance.asInstanceOf[BlockReplicationPolicy]
      logInfo(s"Using $priorityClass for block replication policy")
      ret
    }

//创建一个BlockManagerId
//使用到了executorId（每个block关联一个executor）,blockTransferService.hostName, blockTransferService.port
//从初始化就可以看到，一个block通过一个节点的executor唯一标识
    val id =
      BlockManagerId(executorId, blockTransferService.hostName, blockTransferService.port, None)
//调用master的注册blockmanager方法，会发送消息给BlockManagerMasterEndpoint
    val idFromMaster = master.registerBlockManager(
      id,
      maxMemory,
      slaveEndpoint)

    blockManagerId = if (idFromMaster != null) idFromMaster else id

    shuffleServerId = if (externalShuffleServiceEnabled) {
      logInfo(s"external shuffle service port = $externalShuffleServicePort")
      BlockManagerId(executorId, blockTransferService.hostName, externalShuffleServicePort)
    } else {
      blockManagerId
    }

    // Register Executors' configuration with the local shuffle service, if one should exist.
    if (externalShuffleServiceEnabled && !blockManagerId.isDriver) {
      registerWithExternalShuffleServer()
    }

    logInfo(s"Initialized BlockManager: $blockManagerId")
  }

这样，初始化过程完成了初始化 BlockTransferService 和 ShuffleClient，
向 BlockManagerMaster 注册，启动 BlockManagerWorker 端点。

由于BlockManager主要负责数据的存取，所以来看看关于数据存取的方法

读取分本地读取和远程读取

先看看本地读取getLocal

  /**
   * Get block from local block manager as an iterator of Java objects.
   */
  def getLocalValues(blockId: BlockId): Option[BlockResult] = {
    logDebug(s"Getting local block $blockId")
    //加锁，读可抢占，写要等待
    //如果block为空，返回null
    //否则，返回blockinfo
    blockInfoManager.lockForReading(blockId) match {
      case None =>
        logDebug(s"Block $blockId was not found")
        None
      case Some(info) =>
      //获取存储级别
        val level = info.level
        logDebug(s"Level for block $blockId is $level")
        //如果使用内存并且内存中存储了这个blockid
        if (level.useMemory && memoryStore.contains(blockId)) {
        //如果可以反序列化
          val iter: Iterator[Any] = if (level.deserialized) {
          //直接从内存中取
            memoryStore.getValues(blockId).get
          } else {
         //如果不能序列化
         //使用serializerManager来序列化
            serializerManager.dataDeserializeStream(
              blockId, memoryStore.getBytes(blockId).get.toInputStream())(info.classTag)
          }
          //序列化完后
          val ci = CompletionIterator[Any, Iterator[Any]](iter, releaseLock(blockId))
          Some(new BlockResult(ci, DataReadMethod.Memory, info.size))
          
        } else if (level.useDisk && diskStore.contains(blockId)) {
        //如果使用的是磁盘level,且磁盘中包含此block
          val iterToReturn: Iterator[Any] = {
            val diskBytes = diskStore.getBytes(blockId)
            if (level.deserialized) {
              val diskValues = serializerManager.dataDeserializeStream(
                blockId,
                diskBytes.toInputStream(dispose = true))(info.classTag)
              maybeCacheDiskValuesInMemory(info, blockId, level, diskValues)
            } else {
            
              val stream = maybeCacheDiskBytesInMemory(info, blockId, level, diskBytes)
                .map {_.toInputStream(dispose = false)}
                .getOrElse { diskBytes.toInputStream(dispose = true) }
              serializerManager.dataDeserializeStream(blockId, stream)(info.classTag)
            }
          }
          val ci = CompletionIterator[Any, Iterator[Any]](iterToReturn, releaseLock(blockId))
          Some(new BlockResult(ci, DataReadMethod.Disk, info.size))
        } else {
          handleLocalReadFailure(blockId)
        }
    }
  }

看看远程读取getRemote，它有重试机制和刷新locations机制，来提供一些容错办法



  /**
   * Get block from remote block managers.
   *
   * This does not acquire a lock on this block in this JVM.
   */
  private def getRemoteValues[T: ClassTag](blockId: BlockId): Option[BlockResult] = {
    val ct = implicitly[ClassTag[T]]
    getRemoteBytes(blockId).map { data =>
      val values =
        serializerManager.dataDeserializeStream(blockId, data.toInputStream(dispose = true))(ct)
      new BlockResult(values, DataReadMethod.Network, data.size)
    }
  }


  /**
   * Get block from remote block managers as serialized bytes.
   */
  def getRemoteBytes(blockId: BlockId): Option[ChunkedByteBuffer] = {
    logDebug(s"Getting remote block $blockId")
    require(blockId != null, "BlockId is null")
    //计数
    var runningFailureCount = 0
    var totalFailureCount = 0
    //获取位置
    val locations = getLocations(blockId)
   	/*
 /**
   * Return a list of locations for the given block, prioritizing the local machine since
   * multiple block managers can share the same host.
   
  private def getLocations(blockId: BlockId): Seq[BlockManagerId] = {
  //从blockmanagerMaster上获取block的blockManager信息
  //然后随机打乱
    val locs = Random.shuffle(master.getLocations(blockId))
    //优先先择本地机器，loc.host==blockMnagerId.host
    val (preferredLocs, otherLocs) = locs.partition { loc => blockManagerId.host == loc.host }
    //最佳位置++
    preferredLocs ++ otherLocs
  }

	*/
    val maxFetchFailures = locations.size
    var locationIterator = locations.iterator
    while (locationIterator.hasNext) {
      val loc = locationIterator.next()
      //从loc取block
      logDebug(s"Getting remote block $blockId from $loc")
      val data = try {
      //使用这个Service进行远程block获取
      //连接时使用的blockmanager的唯一标识，host，port，executorid
        blockTransferService.fetchBlockSync(
          loc.host, loc.port, loc.executorId, blockId.toString).nioByteBuffer()
      } catch {
        case NonFatal(e) =>
          runningFailureCount += 1
          totalFailureCount += 1


		//失败次数过多， 退出
          if (totalFailureCount >= maxFetchFailures) {
            // Give up trying anymore locations. Either we've tried all of the original locations,
            // or we've refreshed the list of locations from the master, and have still
            // hit failures after trying locations from the refreshed list.
            //放弃尝试更多地点。
          //要么我们已经尝试了所有原始位置，要么我们已经从主服务器刷新了位置列表，但在尝试了刷新列表中的位置后仍然失败。
            logWarning(s"Failed to fetch block after $totalFailureCount fetch failures. " +
              s"Most recent failure cause:", e)
            return None
          }

          logWarning(s"Failed to fetch remote block $blockId " +
            s"from $loc (failed attempt $runningFailureCount)", e)

          // If there is a large number of executors then locations list can contain a
          // large number of stale entries causing a large number of retries that may
          // take a significant amount of time. To get rid of these stale entries
          // we refresh the block locations after a certain number of fetch failures
          //如果有大量executor，则位置列表可能包含大量陈旧条目，
          //从而导致可能需要大量时间的大量重试。
          //为了摆脱这些陈旧的条目，我们在一定数量的获取失败后刷新块位置
         
          if (runningFailureCount >= maxFailuresBeforeLocationRefresh) {
            locationIterator = getLocations(blockId).iterator
            logDebug(s"Refreshed locations from the driver " +
              s"after ${runningFailureCount} fetch failures.")
            runningFailureCount = 0
          }

          // This location failed, so we retry fetch from a different one by returning null here
          null
      }

      if (data != null) {
        return Some(new ChunkedByteBuffer(data))
      }
      logDebug(s"The value of block $blockId is null")
    }
    logDebug(s"Block $blockId not found")
    None
  }

再来看看提交数据，核心操作就是先写内存，内存不够就写磁盘

  /**
   * Put the given bytes according to the given level in one of the block stores, replicating
   * the values if necessary.
   * 根据给定级别将给定字节放入块存储之一，如有必要，复制这些值。
   *
   * If the block already exists, this method will not overwrite it.
   *如果块已经存在，则此方法不会覆盖它。
   * '''Important!''' Callers must not mutate or release the data buffer underlying `bytes`. Doing
   * so may corrupt or change the data stored by the `BlockManager`.
   *'''重要！''' 调用者不得改变或释放 `bytes` 底层的数据缓冲区。这样做可能会损坏或更改“BlockManager”存储的数据。
   * @param keepReadLock if true, this method will hold the read lock when it returns (even if the
   *                     block already exists). If false, this method will hold no locks when it
   *                     returns.
   * @return true if the block was already present or if the put succeeded, false otherwise.
   */
  private def doPutBytes[T](
      blockId: BlockId,
      bytes: ChunkedByteBuffer,
      level: StorageLevel,
      classTag: ClassTag[T],
      tellMaster: Boolean = true,
      keepReadLock: Boolean = false): Boolean = {
      //这里调用了doPut方法
    doPut(blockId, level, classTag, tellMaster = tellMaster, keepReadLock = keepReadLock) { info =>
      val startTimeMs = System.currentTimeMillis
      // Since we're storing bytes, initiate the replication before storing them locally.
      // This is faster as data is already serialized and ready to send.
      val replicationFuture = if (level.replication > 1) {
        Future {
          // This is a blocking action and should run in futureExecutionContext which is a cached
          // thread pool
          //复制
          replicate(blockId, bytes, level, classTag)
        }(futureExecutionContext)
      } else {
        null
      }

      val size = bytes.size
	
	//如果使用的是内存级别
      if (level.useMemory) {
        // Put it in memory first, even if it also has useDisk set to true;
        // We will drop it to disk later if the memory store can't hold it.如果内存不够，则落盘
        val putSucceeded = if (level.deserialized) {
        //如果需要反序列化
          val values =
            serializerManager.dataDeserializeStream(blockId, bytes.toInputStream())(classTag)
          memoryStore.putIteratorAsValues(blockId, values, classTag) match {
          //尝试将给定的块作为值放入内存中
          //迭代器可能太大而无法在内存中实现和存储。
          //为了避免OOM异常，这个方法会在周期性的检查是否有足够的空闲内存的同时，逐步展开迭代器。
          //如果块被成功实现，那么在实现过程中使用的临时展开内存“转移”到存储内存，
          //因此我们不会获得比存储块实际需要的更多的内存
            case Right(_) => true//right表示写成功
            case Left(iter) =>//left表示写失败
              // If putting deserialized values in memory failed, we will put the bytes directly to
              // disk, so we don't need this iterator and can close it to free resources earlier.
              //如果写内存失败，就直接写磁盘
              //iterator直接关掉
              iter.close()
              false
          }
        } else {
        //不需要反序列化
          val memoryMode = level.memor 	yMode
          memoryStore.putBytes(blockId, size, memoryMode, () => {
            if (memoryMode == MemoryMode.OFF_HEAP &&
                bytes.chunks.exists(buffer => !buffer.isDirect)) {
              bytes.copy(Platform.allocateDirectBuffer)
            } else {
              bytes
            }
          })
        }
       //如果内存放不成功且支持放磁盘，则使用放磁盘
        if (!putSucceeded && level.useDisk) {
          logWarning(s"Persisting block $blockId to disk instead.")
          diskStore.putBytes(blockId, bytes)
        }
      } else if (level.useDisk) {
      //指明了用磁盘
        diskStore.putBytes(blockId, bytes)
      }

      val putBlockStatus = getCurrentBlockStatus(blockId, info)
      val blockWasSuccessfullyStored = putBlockStatus.storageLevel.isValid
      if (blockWasSuccessfullyStored) {
        // Now that the block is in either the memory or disk store,
        //现在block在内存或者磁盘中了，表示已经写入数据
        // tell the master about it.告诉master
        info.size = size
        if (tellMaster && info.tellMaster) {
          reportBlockStatus(blockId, putBlockStatus)
        }
        addUpdatedBlockStatusToTaskMetrics(blockId, putBlockStatus)
      }
      logDebug("Put block %s locally took %s".format(blockId, Utils.getUsedTimeMs(startTimeMs)))
      if (level.replication > 1) {
        // Wait for asynchronous replication to finish
        try {
          Await.ready(replicationFuture, Duration.Inf)
        } catch {
          case NonFatal(t) =>
            throw new Exception("Error occurred while waiting for replication to finish", t)
        }
      }
      if (blockWasSuccessfullyStored) {
        None
      } else {
        Some(bytes)
      }
    }.isEmpty
  }

看看doPut方法，主要是一些多线程并发同步的操作

  /**
   * Helper method used to abstract common code from [[doPutBytes()]] and [[doPutIterator()]].
   *
   * @param putBody a function which attempts the actual put() and returns None on success
   *                or Some on failure.
   */
  private def doPut[T](
      blockId: BlockId,
      level: StorageLevel,
      classTag: ClassTag[_],
      tellMaster: Boolean,
      keepReadLock: Boolean)(putBody: BlockInfo => Option[T]): Option[T] = {

    require(blockId != null, "BlockId is null")
    require(level != null && level.isValid, "StorageLevel is null or invalid")

    val putBlockInfo = {
      //创建blockinfo
      val newInfo = new BlockInfo(level, classTag, tellMaster)
      //获取一个写锁
      if (blockInfoManager.lockNewBlockForWriting(blockId, newInfo)) {
        newInfo
      } else {
        logWarning(s"Block $blockId already exists on this machine; not re-adding it")
        if (!keepReadLock) {
          // lockNewBlockForWriting returned a read lock on the existing block, so we must free it:
          releaseLock(blockId)
        }
        return None
      }
    }

    val startTimeMs = System.currentTimeMillis
    var exceptionWasThrown: Boolean = true
    val result: Option[T] = try {
      val res = putBody(putBlockInfo)
      exceptionWasThrown = false
      if (res.isEmpty) {
        // the block was successfully stored
        if (keepReadLock) {
        //如果持有读锁，锁降级
          blockInfoManager.downgradeLock(blockId)
        } else {
        //否则解锁
          blockInfoManager.unlock(blockId)
        }
      } else {
      //写block失败
        removeBlockInternal(blockId, tellMaster = false)
        logWarning(s"Putting block $blockId failed")
      }
      res
    } finally {
      // This cleanup is performed in a finally block rather than a `catch` to avoid having to
      // catch and properly re-throw InterruptedException.
      if (exceptionWasThrown) {
        logWarning(s"Putting block $blockId failed due to an exception")
        // If an exception was thrown then it's possible that the code in `putBody` has already
        // notified the master about the availability of this block, so we need to send an update
        // to remove this block location.
        removeBlockInternal(blockId, tellMaster = tellMaster)
        // The `putBody` code may have also added a new block status to TaskMetrics, so we need
        // to cancel that out by overwriting it with an empty block status. We only do this if
        // the finally block was entered via an exception because doing this unconditionally would
        // cause us to send empty block statuses for every block that failed to be cached due to
        // a memory shortage (which is an expected failure, unlike an uncaught exception).
        addUpdatedBlockStatusToTaskMetrics(blockId, BlockStatus.empty)
      }
    }
    if (level.replication > 1) {
      logDebug("Putting block %s with replication took %s"
        .format(blockId, Utils.getUsedTimeMs(startTimeMs)))
    } else {
      logDebug("Putting block %s without replication took %s"
        .format(blockId, Utils.getUsedTimeMs(startTimeMs)))
    }
    result
  }

看看memoryStore.putIteratorAsValues方法


 private[storage] def putIteratorAsValues[T](
      blockId: BlockId,
      values: Iterator[T],
      classTag: ClassTag[T]): Either[PartiallyUnrolledIterator[T], Long] = {

    require(!contains(blockId), s"Block $blockId is already present in the MemoryStore")

    // Number of elements unrolled so far 
    //到目前为止展开的element
    var elementsUnrolled = 0
    // Whether there is still enough memory for us to continue unrolling this block
    //是否还有足够内存去展开block
    var keepUnrolling = true
    // Initial per-task memory to request for unrolling blocks (bytes).
    //每个任务中，请求展开block的初始内存
    val initialMemoryThreshold = unrollMemoryThreshold
    // How often to check whether we need to request more memory
    //检查是否需要更多内存的频率
    val memoryCheckPeriod = 16
    // Memory currently reserved by this task for this particular unrolling operation
    //此task当前为此特定展开操作保留的内存
    var memoryThreshold = initialMemoryThreshold
    // Memory to request as a multiple of current vector size
    //作为当前向量大小的倍数请求的内存
    val memoryGrowthFactor = 1.5
    // Keep track of unroll memory used by this particular block / putIterator() operation
    //跟踪此特定块 putIterator() 操作使用的展开内存
    var unrollMemoryUsedByThisBlock = 0L
    // Underlying vector for unrolling the block
    //用于展开块的底层向量
    var vector = new SizeTrackingVector[T]()(classTag)

    // Request enough memory to begin unrolling
    //申请zug
    keepUnrolling =
      reserveUnrollMemoryForThisTask(blockId, initialMemoryThreshold, MemoryMode.ON_HEAP)

    if (!keepUnrolling) {
      logWarning(s"Failed to reserve initial memory threshold of " +
        s"${Utils.bytesToString(initialMemoryThreshold)} for computing block $blockId in memory.")
    } else {
      unrollMemoryUsedByThisBlock += initialMemoryThreshold
    }

    // Unroll this block safely, checking whether we have exceeded our threshold periodically
    while (values.hasNext && keepUnrolling) {
      vector += values.next()
      if (elementsUnrolled % memoryCheckPeriod == 0) {
        // If our vector's size has exceeded the threshold, request more memory
        val currentSize = vector.estimateSize()
        if (currentSize >= memoryThreshold) {
          val amountToRequest = (currentSize * memoryGrowthFactor - memoryThreshold).toLong
          keepUnrolling =
            reserveUnrollMemoryForThisTask(blockId, amountToRequest, MemoryMode.ON_HEAP)
          if (keepUnrolling) {
            unrollMemoryUsedByThisBlock += amountToRequest
          }
          // New threshold is currentSize * memoryGrowthFactor
          memoryThreshold += amountToRequest
        }
      }
      elementsUnrolled += 1
    }

    if (keepUnrolling) {
      // We successfully unrolled the entirety of this block
      val arrayValues = vector.toArray
      vector = null
      val entry =
        new DeserializedMemoryEntry[T](arrayValues, SizeEstimator.estimate(arrayValues), classTag)
      val size = entry.size
      def transferUnrollToStorage(amount: Long): Unit = {
        // Synchronize so that transfer is atomic
        memoryManager.synchronized {
          releaseUnrollMemoryForThisTask(MemoryMode.ON_HEAP, amount)
          val success = memoryManager.acquireStorageMemory(blockId, amount, MemoryMode.ON_HEAP)
          assert(success, "transferring unroll memory to storage memory failed")
        }
      }
      // Acquire storage memory if necessary to store this block in memory.
      val enoughStorageMemory = {
        if (unrollMemoryUsedByThisBlock <= size) {
          val acquiredExtra =
            memoryManager.acquireStorageMemory(
              blockId, size - unrollMemoryUsedByThisBlock, MemoryMode.ON_HEAP)
          if (acquiredExtra) {
            transferUnrollToStorage(unrollMemoryUsedByThisBlock)
          }
          acquiredExtra
        } else { // unrollMemoryUsedByThisBlock > size
          // If this task attempt already owns more unroll memory than is necessary to store the
          // block, then release the extra memory that will not be used.
          val excessUnrollMemory = unrollMemoryUsedByThisBlock - size
          releaseUnrollMemoryForThisTask(MemoryMode.ON_HEAP, excessUnrollMemory)
          transferUnrollToStorage(size)
          true
        }
      }
      if (enoughStorageMemory) {
        entries.synchronized {
          entries.put(blockId, entry)
        }
        logInfo("Block %s stored as values in memory (estimated size %s, free %s)".format(
          blockId, Utils.bytesToString(size), Utils.bytesToString(maxMemory - blocksMemoryUsed)))
        Right(size)
      } else {
        assert(currentUnrollMemoryForThisTask >= unrollMemoryUsedByThisBlock,
          "released too much unroll memory")
        Left(new PartiallyUnrolledIterator(
          this,
          MemoryMode.ON_HEAP,
          unrollMemoryUsedByThisBlock,
          unrolled = arrayValues.toIterator,
          rest = Iterator.empty))
      }
    } else {
      // We ran out of space while unrolling the values for this block
      logUnrollFailureMessage(blockId, vector.estimateSize())
      Left(new PartiallyUnrolledIterator(
        this,
        MemoryMode.ON_HEAP,
        unrollMemoryUsedByThisBlock,
        unrolled = vector.iterator,
        rest = values))
    }
  }

阿松0311

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
Spark源码——BlockManager

（图片来源：北风网）源码所在目录：首先进入BlockManagerMasterEndpoint看看/** * BlockManagerMasterEndpoint is an [[ThreadSafeRpcEndpoint]] on the master node to track statuses * of all slaves' block managers. */BlockManagerMasterEndpoint 是主节点上的 [[ThreadSafeRpcEndpoint]].
复制链接

扫一扫