Spark-MemoryManager、StorageMemoryPool源码解析
Spark的内存管理有2个版本,一个是1.6以前的 StaticMemoryManager,另一个是1.6及以后的 UnifiedMemoryManager。所以这里只看 UnifiedMemoryManager
UnifiedMemoryManager object
属性:
//这个是 启动jvm的保留内存
private val RESERVED_SYSTEM_MEMORY_BYTES = 300 * 1024 * 1024 //300M
方法:
//计算 spark memory,返回UnifiedMemoryManager 对象
def apply(conf: SparkConf, numCores: Int): UnifiedMemoryManager = {
//获取 最大的spark 可使用的内存
val maxMemory = getMaxMemory(conf)
//实例化 UnifiedMemoryManager对象
new UnifiedMemoryManager(
conf,
maxHeapMemory = maxMemory, //spark 运行内存
onHeapStorageRegionSize = //spark storage 内存
(maxMemory * conf.getDouble("spark.memory.storageFraction", 0.5)).toLong,
numCores = numCores)
}
private def getMaxMemory(conf: SparkConf): Long = {
val systemMemory = conf.getLong("spark.testing.memory", Runtime.getRuntime.maxMemory)
//这里的 Runtime.getRuntime.maxMemory 就是 yarn启动 jvm时的配置的内存,即driver-memory
//保留内存一般 300M
val reservedMemory = conf.getLong("spark.testing.reservedMemory",
if (conf.contains("spark.testing")) 0 else RESERVED_SYSTEM_MEMORY_BYTES)
//最小的要求内存 300*1.5 = 450M,即driver-memory >= 450M
val minSystemMemory = (reservedMemory * 1.5).ceil.toLong
if (systemMemory < minSystemMemory) {
throw new IllegalArgumentException(s"System memory $systemMemory must " +
s"be at least $minSystemMemory. Please increase heap size using the --driver-memory " +
s"option or spark.driver.memory in Spark configuration.")
}
// SPARK-12759 Check executor memory to fail fast if memory is insufficient
if (conf.contains("spark.executor.memory")) {
val executorMemory = conf.getSizeAsBytes("spark.executor.memory")
if (executorMemory < minSystemMemory) {
throw new IllegalArgumentException(s"Executor memory $executorMemory must be at least " +
s"$minSystemMemory. Please increase executor memory using the " +
s"--executor-memory option or spark.executor.memory in Spark configuration.")
}
}
//可用内存就是 driver-memory - 300M
val usableMemory = systemMemory - reservedMemory
val memoryFraction = conf.getDouble("spark.memory.fraction", 0.6)
(usableMemory * memoryFraction).toLong //spark memory
}
UnifiedMemoryManager class
这个类的主要功能是管理Spark 的内存,包扩 Spark Execution Memory、Spark Storage Memory。
构造方法:
//判断分配的 各部分的内存 加起来 是不是 等于 总内存
assertInvariants()
private def assertInvariants(): Unit = {
assert(onHeapExecutionMemoryPool.poolSize + onHeapStorageMemoryPool.poolSize == maxHeapMemory)
assert(
offHeapExecutionMemoryPool.poolSize + offHeapStorageMemoryPool.poolSize == maxOffHeapMemory)
}
方法:
//获取 最大的 Spark Storage Memory 在堆上,由此可见 优先满足 Spark Execution Memory
override def maxOnHeapStorageMemory: Long = synchronized {
maxHeapMemory - onHeapExecutionMemoryPool.memoryUsed
}
//获取 最大的 Spark Storage Memory 在堆外,由此可见 优先满足 Spark Execution Memory
override def maxOffHeapStorageMemory: Long = synchronized {
maxOffHeapMemory - offHeapExecutionMemoryPool.memoryUsed
}
//申请一定量的 Spark Execution Memory
override private[memory] def acquireExecutionMemory(
numBytes: Long,
taskAttemptId: Long,
memoryMode: MemoryMode): Long = synchronized {
assertInvariants()//检查 spark memory
assert(numBytes >= 0)//检查 需要申请内存的量 必须 >= 0
val (executionPool, storagePool, storageRegionSize, maxMemory) = memoryMode match {//一般这里会是 ON_HEAP
case MemoryMode.ON_HEAP => (
onHeapExecutionMemoryPool,//ExecutionMemoryPool 堆上的实例
onHeapStorageMemoryPool,//StorageMemoryPool 堆上的实例
onHeapStorageRegionSize, //spark storage memory 大小 long类型
maxHeapMemory) //spark memory 大小 long类型
case MemoryMode.OFF_HEAP => (
offHeapExecutionMemoryPool,
offHeapStorageMemoryPool,
offHeapStorageMemory,
maxOffHeapMemory)
}
//内部函数
def maybeGrowExecutionPool(extraMemoryNeeded: Long): Unit = {
//这个是 需要的内存量 - free 的 内存量,只有在 > 0的情况下,才会 借用 Storage的内存
if (extraMemoryNeeded > 0) {
val memoryReclaimableFromStorage = math.max(
storagePool.memoryFree,
storagePool.poolSize - storageRegionSize)//这个差值 目前会 <=0
//storageRegionSize spark storage 初始化的 大小
//storagePool.poolSize 的实际大小,可能会变化,目前来看 会变大和变小
//storagePool.memoryFree 的 spark storage 的空闲大小
if (memoryReclaimableFromStorage > 0) {
// extraMemoryNeeded 是需要多余的内存,这个是除了 Execution的 借用Storage的差值
//memoryReclaimableFromStorage这是 Storage 的空闲内存
//这里的min 是取 上面的最小值,假定是extraMemoryNeeded的量
//如果这个task 在execution需要的多余内存,比 Storage的内存小,那么会直接调整Storage减少这个多的量,Execution 增大这么多的量
val spaceToReclaim = storagePool.freeSpaceToShrinkPool(
math.min(extraMemoryNeeded, memoryReclaimableFromStorage))
storagePool.decrementPoolSize(spaceToReclaim)//减少 Storage 内存
executionPool.incrementPoolSize(spaceToReclaim)//增大 Execution 内存
}
}
}
def computeMaxExecutionPoolSize(): Long = {
maxMemory - math.min(storagePool.memoryUsed, storageRegionSize)
}
//ExecutionMemoryPool 堆上的实例 用来申请内存
executionPool.acquireMemory(
numBytes, taskAttemptId, maybeGrowExecutionPool, () => computeMaxExecutionPoolSize)
}
//申请 Storage Memory
override def acquireStorageMemory(
blockId: BlockId,
numBytes: Long,
memoryMode: MemoryMode): Boolean = synchronized {
assertInvariants()//验证
assert(numBytes >= 0)//验证 申请的Storage 量大于0
val (executionPool, storagePool, maxMemory) = memoryMode match {
case MemoryMode.ON_HEAP => (//一般是再堆上 申请
onHeapExecutionMemoryPool,
onHeapStorageMemoryPool,
maxOnHeapStorageMemory)
case MemoryMode.OFF_HEAP => (
offHeapExecutionMemoryPool,
offHeapStorageMemoryPool,
maxOffHeapStorageMemory)
}
if (numBytes > maxMemory) {//申请的Storage 量 大于spark Storage的最大值的话,将不会缓存这个 Block
// Fail fast if the block simply won't fit
logInfo(s"Will not store $blockId as the required space ($numBytes bytes) exceeds our " +
s"memory limit ($maxMemory bytes)")
return false
}
if (numBytes > storagePool.memoryFree) {//这里的话 storage 没有对于的 free
//memory,将从 execution memory 借用内存
val memoryBorrowedFromExecution = Math.min(executionPool.memoryFree,
numBytes - storagePool.memoryFree)//确定需要借用多少execution的内存量,这个量是 execution 的free memory 和 这个block 需要的额外的execution的量的 最小值
executionPool.decrementPoolSize(memoryBorrowedFromExecution)//减小execution memory
storagePool.incrementPoolSize(memoryBorrowedFromExecution)//增大storage memory
}
storagePool.acquireMemory(blockId, numBytes)//到 storage pool中申请资源
}
//申请 Unroll Memory ,实际申请的是 Storage Memory
override def acquireUnrollMemory(
blockId: BlockId,
numBytes: Long,
memoryMode: MemoryMode): Boolean = synchronized {
acquireStorageMemory(blockId, numBytes, memoryMode)
}
abstract MemoryManager
属性:
//Spark Storage Memory 堆上的内存
@GuardedBy("this")
protected val onHeapStorageMemoryPool = new StorageMemoryPool(this, MemoryMode.ON_HEAP)
//Spark Storage Memory 堆外的内存
@GuardedBy("this")
protected val offHeapStorageMemoryPool = new StorageMemoryPool(this, MemoryMode.OFF_HEAP)
//Spark Execution Memory 堆上的内存
@GuardedBy("this")
protected val onHeapExecutionMemoryPool = new ExecutionMemoryPool(this, MemoryMode.ON_HEAP)
//Spark Execution Memory 堆外的内存
@GuardedBy("this")
protected val offHeapExecutionMemoryPool = new ExecutionMemoryPool(this, MemoryMode.OFF_HEAP)
//Spark Storage Memory 堆上的内存 初始化 内存大小
onHeapStorageMemoryPool.incrementPoolSize(onHeapStorageMemory)
//Spark Execution Memory 堆上的内存 初始化 内存大小
onHeapExecutionMemoryPool.incrementPoolSize(onHeapExecutionMemory)
//获取 spark.memory.offHeap.size 堆外内存大小 配置 ,默认是 0
protected[this] val maxOffHeapMemory = conf.get(MEMORY_OFFHEAP_SIZE)
protected[this] val offHeapStorageMemory =
(maxOffHeapMemory * conf.getDouble("spark.memory.storageFraction", 0.5)).toLong
//0
offHeapExecutionMemoryPool.incrementPoolSize(maxOffHeapMemory - offHeapStorageMemory)
//0
offHeapStorageMemoryPool.incrementPoolSize(offHeapStorageMemory)
方法:
def maxOnHeapStorageMemory: Long//总的 Spark Storage Memory 堆上 内存大小
def maxOffHeapStorageMemory: Long//总的 Spark Storage Memory 堆外 内存大小
//设置 Spark Storage Memory 的 MemoryStore
final def setMemoryStore(store: MemoryStore): Unit = synchronized {
onHeapStorageMemoryPool.setMemoryStore(store)
offHeapStorageMemoryPool.setMemoryStore(store)
}
//为这个 blockId 申请Storage内存
def acquireStorageMemory(blockId: BlockId, numBytes: Long, memoryMode: MemoryMode): Boolean
//为这个 blockId 申请Unroll内存
def acquireUnrollMemory(blockId: BlockId, numBytes: Long, memoryMode: MemoryMode): Boolean
//为这个 blockId 申请 Spark Execution Memory
private[memory] def acquireExecutionMemory(numBytes: Long,
taskAttemptId: Long,
memoryMode: MemoryMode): Long
//释放 此Task 的 numBytes量的 Spark Execution Memory 堆上|堆外
private[memory] def releaseExecutionMemory(
numBytes: Long,
taskAttemptId: Long,
memoryMode: MemoryMode): Unit = synchronized {
memoryMode match {
case MemoryMode.ON_HEAP => onHeapExecutionMemoryPool.releaseMemory(numBytes, taskAttemptId)
case MemoryMode.OFF_HEAP => offHeapExecutionMemoryPool.releaseMemory(numBytes, taskAttemptId)
}
}
//释放 task 的 所有的 Spark Execution memory
private[memory] def releaseAllExecutionMemoryForTask(taskAttemptId: Long): Long = synchronized {
onHeapExecutionMemoryPool.releaseAllMemoryForTask(taskAttemptId) +
offHeapExecutionMemoryPool.releaseAllMemoryForTask(taskAttemptId)
}
//释放 一定量的 Spark Storage memory
def releaseStorageMemory(numBytes: Long, memoryMode: MemoryMode): Unit = synchronized {
memoryMode match {
case MemoryMode.ON_HEAP => onHeapStorageMemoryPool.releaseMemory(numBytes)
case MemoryMode.OFF_HEAP => offHeapStorageMemoryPool.releaseMemory(numBytes)
}
}
//释放所有的 Spark Storage memory 堆内和堆外
final def releaseAllStorageMemory(): Unit = synchronized {
onHeapStorageMemoryPool.releaseAllMemory()
offHeapStorageMemoryPool.releaseAllMemory()
}
//释放所有的 Spark Unroll memory 堆内和堆外,Unroll 是Storage的特殊形式
final def releaseUnrollMemory(numBytes: Long, memoryMode: MemoryMode): Unit = synchronized {
releaseStorageMemory(numBytes, memoryMode)
}
//execution 已经使用的 堆内和堆外 内存量
final def executionMemoryUsed: Long = synchronized {
onHeapExecutionMemoryPool.memoryUsed + offHeapExecutionMemoryPool.memoryUsed
}
//storage 已经使用的 堆内和堆外 内存量
final def storageMemoryUsed: Long = synchronized {
onHeapStorageMemoryPool.memoryUsed + offHeapStorageMemoryPool.memoryUsed
}
//获取这个Task 的 execution 的内存使用量 堆内和堆外
private[memory] def getExecutionMemoryUsageForTask(taskAttemptId: Long): Long = synchronized {
onHeapExecutionMemoryPool.getMemoryUsageForTask(taskAttemptId) +
offHeapExecutionMemoryPool.getMemoryUsageForTask(taskAttemptId)
}
//获取 tungstenMemoryMode 的MemoryMode 一般是在 堆上
final val tungstenMemoryMode: MemoryMode = {
if (conf.get(MEMORY_OFFHEAP_ENABLED)) {
require(conf.get(MEMORY_OFFHEAP_SIZE) > 0,
"spark.memory.offHeap.size must be > 0 when spark.memory.offHeap.enabled == true")
require(Platform.unaligned(),
"No support for unaligned Unsafe. Set spark.memory.offHeap.enabled to false.")
MemoryMode.OFF_HEAP
} else {
MemoryMode.ON_HEAP
}
}
val pageSizeBytes: Long = {
val minPageSize = 1L * 1024 * 1024 // 1MB
val maxPageSize = 64L * minPageSize // 64MB
val cores = if (numCores > 0) numCores else Runtime.getRuntime.availableProcessors()
// Because of rounding to next power of 2, we may have safetyFactor as 8 in worst case
val safetyFactor = 16
val maxTungstenMemory: Long = tungstenMemoryMode match {
case MemoryMode.ON_HEAP => onHeapExecutionMemoryPool.poolSize
case MemoryMode.OFF_HEAP => offHeapExecutionMemoryPool.poolSize
}
//size = spark execution heap memory / 16 / cores
//大小位于1M到64M,必须是2的幂次
val size = ByteArrayMethods.nextPowerOf2(maxTungstenMemory / cores / safetyFactor)
val default = math.min(maxPageSize, math.max(minPageSize, size)) //一般是64M
conf.getSizeAsBytes("spark.buffer.pageSize", default)
}
//ungsten 内存申请方式,堆内 使用jvm申请内存,堆外 使用sun.misc.Unsafe 直接申请内存
private[memory] final val tungstenMemoryAllocator: MemoryAllocator = {
tungstenMemoryMode match {
case MemoryMode.ON_HEAP => MemoryAllocator.HEAP
case MemoryMode.OFF_HEAP => MemoryAllocator.UNSAFE
}
}
ExecutionMemoryPool
这个类是 MemoryPool抽象类的 Spark Execution的实现类。
private[memory] class ExecutionMemoryPool(
lock: Object,
memoryMode: MemoryMode
) extends MemoryPool(lock) with Logging {
//设置 executionPoolName
private[this] val poolName: String = memoryMode match {
case MemoryMode.ON_HEAP => "on-heap execution"
case MemoryMode.OFF_HEAP => "off-heap execution"
}
@GuardedBy("lock")
private val memoryForTask = new mutable.HashMap[Long, Long]()
//task的内存使用量
//总的内存使用量,所有运行任务的总和
override def memoryUsed: Long = lock.synchronized {
memoryForTask.values.sum
}
//获取某个task的内存使用量
def getMemoryUsageForTask(taskAttemptId: Long): Long = lock.synchronized {
memoryForTask.getOrElse(taskAttemptId, 0L)
}
//申请execution 的内存
private[memory] def acquireMemory(
numBytes: Long,
taskAttemptId: Long,
maybeGrowPool: Long => Unit = (additionalSpaceNeeded: Long) => Unit,
computeMaxPoolSize: () => Long = () => poolSize): Long = lock.synchronized {
assert(numBytes > 0, s"invalid number of bytes requested: $numBytes")
if (!memoryForTask.contains(taskAttemptId)) {
memoryForTask(taskAttemptId) = 0L
// This will later cause waiting tasks to wake up and check numTasks again
lock.notifyAll()
}
while (true) {
val numActiveTasks = memoryForTask.keys.size
val curMem = memoryForTask(taskAttemptId)
//此处 当execution 不足 时,会借用 Storage 的 Memory
maybeGrowPool(numBytes - memoryFree)
//此时,可能由于 execution 借用了 storage的内存,所以execution的内存已经变大了
// execution = Spark memory - min(storage , init_storage)
val maxPoolSize = computeMaxPoolSize()
val maxMemoryPerTask = maxPoolSize / numActiveTasks//每个任务最大的exe内存分配
val minMemoryPerTask = poolSize / (2 * numActiveTasks) //每个任务最小的exe内存分配
//计算这个任务 最大的内存量
val maxToGrant = math.min(numBytes, math.max(0, maxMemoryPerTask - curMem))
//在 这个任务最大的内存量 和 execution 的memoryFree 中取最小值
//toGrant 这个就是计算出来 的这个任务的 内存量
val toGrant = math.min(maxToGrant, memoryFree)
//如果这个task的资源不足 将会等待资源释放,之后再更新 memoryForTask
if (toGrant < numBytes && curMem + toGrant < minMemoryPerTask) {
logInfo(s"TID $taskAttemptId waiting for at least 1/2N of $poolName pool to be free")
lock.wait()
} else {
memoryForTask(taskAttemptId) += toGrant//给这个task
return toGrant
}
}
0L // Never reached
}
//尽可能 释放掉taskAttemptId对应的numBytes内存量,否则释放了这个taskAttemptId的当前的内存量
def releaseMemory(numBytes: Long, taskAttemptId: Long): Unit = lock.synchronized {
val curMem = memoryForTask.getOrElse(taskAttemptId, 0L)
var memoryToFree = if (curMem < numBytes) {
logWarning(
s"Internal error: release called on $numBytes bytes but task only has $curMem bytes " +
s"of memory from the $poolName pool")
curMem
} else {
numBytes
}
if (memoryForTask.contains(taskAttemptId)) {
memoryForTask(taskAttemptId) -= memoryToFree
if (memoryForTask(taskAttemptId) <= 0) {
memoryForTask.remove(taskAttemptId)
}
}
lock.notifyAll() // Notify waiters in acquireMemory() that memory has been freed
}
//释放 task的内存使用量
def releaseAllMemoryForTask(taskAttemptId: Long): Long = lock.synchronized {
val numBytesToFree = getMemoryUsageForTask(taskAttemptId)
releaseMemory(numBytesToFree, taskAttemptId)
numBytesToFree
}
}
StorageMemoryPool
这个类是 MemoryPool抽象类的 Spark Storage的实现类。
private[memory] class StorageMemoryPool(
lock: Object,
memoryMode: MemoryMode
) extends MemoryPool(lock) with Logging {
//设置 memoryPoolName on-heap storage|off-heap storage
private[this] val poolName: String = memoryMode match {
case MemoryMode.ON_HEAP => "on-heap storage"
case MemoryMode.OFF_HEAP => "off-heap storage"
}
@GuardedBy("lock")
private[this] var _memoryUsed: Long = 0L //子类的这个类 维护 内存使用量
//重写 父类的 内存使用大小的方法
override def memoryUsed: Long = lock.synchronized {
_memoryUsed
}
private var _memoryStore: MemoryStore = _
def memoryStore: MemoryStore = {
if (_memoryStore == null) {
throw new IllegalStateException("memory store not initialized yet")
}
_memoryStore
}
/**
* Set the [[MemoryStore]] used by this manager to evict cached blocks.
* This must be set after construction due to initialization ordering constraints.
*/
final def setMemoryStore(store: MemoryStore): Unit = {
_memoryStore = store
}
//申请增大内存
def acquireMemory(blockId: BlockId, numBytes: Long): Boolean = lock.synchronized {
//当第一次incrementPoolSize的时候,memoryFree=incrementPoolSize
val numBytesToFree = math.max(0, numBytes - memoryFree) //如果这里的空闲内存足够,那么0的概率很大
//numBytesToFree这个参数 用来判断是否需要 其他的task 需要释放资源
acquireMemory(blockId, numBytes, numBytesToFree)
}
//申请增大内存
def acquireMemory(
blockId: BlockId,
numBytesToAcquire: Long,
numBytesToFree: Long): Boolean = lock.synchronized {
assert(numBytesToAcquire >= 0)
assert(numBytesToFree >= 0)
assert(memoryUsed <= poolSize)
// numBytesToFree > 0 的时候,则需要 释放storage 资源
if (numBytesToFree > 0) {
memoryStore.evictBlocksToFreeSpace(Some(blockId), numBytesToFree, memoryMode)
}
//判断是否有足够的storage内存
val enoughMemory = numBytesToAcquire <= memoryFree
if (enoughMemory) {
//有足够的 storage内存 话,则会 更新 storage 已使用的内存量
_memoryUsed += numBytesToAcquire
}
enoughMemory//返回是否缓存这个 block
}
//释放内存
def releaseMemory(size: Long): Unit = lock.synchronized {
if (size > _memoryUsed) {
logWarning(s"Attempted to release $size bytes of storage " +
s"memory when we only have ${_memoryUsed} bytes")
_memoryUsed = 0
} else {
_memoryUsed -= size
}
}
//释放所有的内存
def releaseAllMemory(): Unit = lock.synchronized {
_memoryUsed = 0
}
//
def freeSpaceToShrinkPool(spaceToFree: Long): Long = lock.synchronized {
val spaceFreedByReleasingUnusedMemory = math.min(spaceToFree, memoryFree)//在需要释放的目标数 和 空闲数 取最小值
val remainingSpaceToFree = spaceToFree - spaceFreedByReleasingUnusedMemory
//有足够内存的话 remainingSpaceToFree = 0
if (remainingSpaceToFree > 0) { //没有足够的内存 给execution
val spaceFreedByEviction =
memoryStore.evictBlocksToFreeSpace(None, remainingSpaceToFree, memoryMode)
spaceFreedByReleasingUnusedMemory + spaceFreedByEviction
} else {
spaceFreedByReleasingUnusedMemory//有足够内存 返回需要 释放的实际量
}
}
}
abstract MemoryPool
这个类是内存池的抽象类,主要作用是初期确定 这个池子的大小,可以动态调整池子的大小。
下面来看看源码:
private[memory] abstract class MemoryPool(lock: Object) {
@GuardedBy("lock")
private[this] var _poolSize: Long = 0 //内存大小
//获取此时的 内存大小
final def poolSize: Long = lock.synchronized {
_poolSize
}
//获取此时的 空闲内存
final def memoryFree: Long = lock.synchronized {
_poolSize - memoryUsed
}
//增加 内存大小
final def incrementPoolSize(delta: Long): Unit = lock.synchronized {
require(delta >= 0)
_poolSize += delta
}
//减少 内存大小
final def decrementPoolSize(delta: Long): Unit = lock.synchronized {
require(delta >= 0)
require(delta <= _poolSize)
require(_poolSize - delta >= memoryUsed)
_poolSize -= delta
}
//获取 已经使用的内存大小,注意这个方法需要子类 重写
def memoryUsed: Long
}
这个类的实例化 对象 将会在 SparkEnv中持有一份