spark2.3源码分析之in-memory collection

AppendOnlyMap

概述

一个只可以添加数据的hash table的实现。它的key值永远不会删除,而每个key的value值可能会改变。

该hash table使用开放探测方法中的二次探测法保存数据,所以内部只有一个数组的数据结构。

该hash table的大小始终为2的幂次方,最多可以支持0.7 * 2 ^ 29个元素。

该hash table为了内存本地性,在同一个数组中保存key和value值;更明确的说,元素的顺序是key0, value0, key1, value1, key2, value2....

 该AppendOnlyMap允许null作为key。当null作为key时,返回的value值也为null。

除了没有提供删除功能外,它提供了一个map应有的插入、修改、扩容、查找、迭代功能:

  • 插入:update方法实现。设置key和value值。
  • 修改:changeValue方法实现。修改key的value值。
  • 扩容:growTable方法实现。将该table双倍扩容,并所有元素重哈希。
  • 查找:apply方法实现。用于获取给定key的value值。
  • 迭代:destructiveSortedIterator方法实现。按照给定比较器的排序顺序返回该map的迭代器,该方法不需要使用额外的内存就能将map上的数据排序,但是会破坏map的有效性,底层的数组结构不能再被使用。

AppendOnlyMap可以基于比较器的排序顺序返回该map的迭代器,在这一点上与SortedMap类似。

/**
 * :: DeveloperApi ::
 * A simple open hash table optimized for the append-only use case, where keys
 * are never removed, but the value for each key may be changed.
 *
 * This implementation uses quadratic probing with a power-of-2 hash table
 * size, which is guaranteed to explore all spaces for each key (see
 * http://en.wikipedia.org/wiki/Quadratic_probing).
 *
 * The map can support up to `375809638 (0.7 * 2 ^ 29)` elements.
 *
 * TODO: Cache the hash values of each key? java.util.HashMap does that.
 */
@DeveloperApi
class AppendOnlyMap[K, V](initialCapacity: Int = 64)
  extends Iterable[(K, V)] with Serializable {

  import AppendOnlyMap._

  require(initialCapacity <= MAXIMUM_CAPACITY,
    s"Can't make capacity bigger than ${MAXIMUM_CAPACITY} elements")
  require(initialCapacity >= 1, "Invalid initial capacity")

  private val LOAD_FACTOR = 0.7

  private var capacity = nextPowerOf2(initialCapacity)
  private var mask = capacity - 1
  private var curSize = 0
  private var growThreshold = (LOAD_FACTOR * capacity).toInt

  // Holds keys and values in the same array for memory locality; specifically, the order of
  // elements is key0, value0, key1, value1, key2, value2, etc.
  //为了内存本地性在同一个数组中保存key和value值;
  //更明确的说,元素的顺序是key0, value0, key1, value1, key2, value2....
  private var data = new Array[AnyRef](2 * capacity)

  // Treat the null key differently so we can use nulls in "data" to represent empty items.
  private var haveNullValue = false
  private var nullValue: V = null.asInstanceOf[V]

  // Triggered by destructiveSortedIterator; the underlying data array may no longer be used
  private var destroyed = false
  private val destructionMessage = "Map state is invalid from destructive sorting!"

  /** Get the value for a given key */
//获取给定key的value值
  def apply(key: K): V = {
    assert(!destroyed, destructionMessage)
    val k = key.asInstanceOf[AnyRef]
    if (k.eq(null)) {
      return nullValue
    }
//获取key的hashCode并和mask相与,获取元素应该存放的数组下标(通过pos*2和pos*2+1)
    var pos = rehash(k.hashCode) & mask
    var i = 1
    while (true) {
//key在data数组中的下标为0,2,4,6
//假设data数组只保存key,则下标为0,1,2,3..
//所以2*pos为key在data数组中的下标,2*pos+1为相应value在data数组中的下标
      val curKey = data(2 * pos)
      if (k.eq(curKey) || k.equals(curKey)) {
	     //返回value
        return data(2 * pos + 1).asInstanceOf[V]
      } else if (curKey.eq(null)) {
        return null.asInstanceOf[V]
      } else {
	    //目标位置的key与要查找的key不一样,则使用二次探测法继续查找
	    //使用开放地址法的二次探测法继续探测
		//二次探测就是在线性探测上做一个修改而成的,
		//线性探测中,遇到冲突就自增1,而二次探测中,就是把这个自增1去掉换成一个固定值或自定义值
		//pos + delta是将pos向前偏移delta个位置
		//& mask是防止向前偏移delta个位置后超出数组下标
        val delta = i
        pos = (pos + delta) & mask
        i += 1
      }
    }
    null.asInstanceOf[V]
  }

  /** Set the value for a key */
  def update(key: K, value: V): Unit = {
    assert(!destroyed, destructionMessage)
    val k = key.asInstanceOf[AnyRef]
    if (k.eq(null)) {
      if (!haveNullValue) {
        incrementSize()
      }
      nullValue = value
      haveNullValue = true
      return
    }
	//获取key的hashCode并和mask相与
    var pos = rehash(key.hashCode) & mask
    var i = 1
    while (true) {
      val curKey = data(2 * pos)
      if (curKey.eq(null)) {
        data(2 * pos) = k
        data(2 * pos + 1) = value.asInstanceOf[AnyRef]
        incrementSize()  // Since we added a new key
        return
      } else if (k.eq(curKey) || k.equals(curKey)) {
        data(2 * pos + 1) = value.asInstanceOf[AnyRef]
        return
      } else {
	    //使用开放地址法的二次探测法继续探测
        val delta = i
        pos = (pos + delta) & mask
        i += 1
      }
    }
  }

  /**
   * Set the value for key to updateFunc(hadValue, oldValue), where oldValue will be the old value
   * for key, if any, or null otherwise. Returns the newly updated value.
   */
  def changeValue(key: K, updateFunc: (Boolean, V) => V): V = {
    assert(!destroyed, destructionMessage)
    val k = key.asInstanceOf[AnyRef]
    if (k.eq(null)) {
      if (!haveNullValue) {
        incrementSize()
      }
      nullValue = updateFunc(haveNullValue, nullValue)
      haveNullValue = true
      return nullValue
    }
    var pos = rehash(k.hashCode) & mask
    var i = 1
    while (true) {
      val curKey = data(2 * pos)
      /*updateFunc函数的作用是,如果有值执行aggregator的mergeValue函数,如果没值执行aggregator的createCombiner函数
     */
      if (curKey.eq(null)) {
       //updateFunc函数执行aggregator的createCombiner函数
        val newValue = updateFunc(false, null.asInstanceOf[V])
	  //设置key值:data(2*pos)为k 
        data(2 * pos) = k
	//设置value值:data(2 * pos + 1)为newValue
        data(2 * pos + 1) = newValue.asInstanceOf[AnyRef]
        incrementSize()
        return newValue
      } else if (k.eq(curKey) || k.equals(curKey)) {
       //updateFunc函数执行aggregator的mergeValue函数,将旧值合并成新值
        val newValue = updateFunc(true, data(2 * pos + 1).asInstanceOf[V])
        data(2 * pos + 1) = newValue.asInstanceOf[AnyRef]
        return newValue
      } else {
	    //使用开放地址法的二次探测法继续探测
        val delta = i
        pos = (pos + delta) & mask
        i += 1
      }
    }
    null.asInstanceOf[V] // Never reached but needed to keep compiler happy
  }

  /** Iterator method from Iterable */
  override def iterator: Iterator[(K, V)] = {
    assert(!destroyed, destructionMessage)
    new Iterator[(K, V)] {
      var pos = -1

      /** Get the next value we should return from next(), or null if we're finished iterating */
	  //在next方法中会调用该方法,从而能返回下一个value值
      def nextValue(): (K, V) = {
        if (pos == -1) {    // Treat position -1 as looking at the null value
          if (haveNullValue) {
            return (null.asInstanceOf[K], nullValue)
          }
          pos += 1
        }
        while (pos < capacity) {
          if (!data(2 * pos).eq(null)) {
		  //返回(k, v)键值对
            return (data(2 * pos).asInstanceOf[K], data(2 * pos + 1).asInstanceOf[V])
          }
          pos += 1
        }
        null
      }

      override def hasNext: Boolean = nextValue() != null

      override def next(): (K, V) = {
        val value = nextValue()
        if (value == null) {
          throw new NoSuchElementException("End of iterator")
        }
        pos += 1
        value
      }
    }
  }

  override def size: Int = curSize

  /** Increase table size by 1, rehashing if necessary */
  private def incrementSize() {
    curSize += 1
    if (curSize > growThreshold) {
      growTable()
    }
  }

  /**
   * Re-hash a value to deal better with hash functions that don't differ in the lower bits.
   */
  private def rehash(h: Int): Int = Hashing.murmur3_32().hashInt(h).asInt()

  /** Double the table's size and re-hash everything */
  //将table双倍扩容,并所有元素重哈希
  protected def growTable() {
    // capacity < MAXIMUM_CAPACITY (2 ^ 29) so capacity * 2 won't overflow
	//新的capacity为原capacity的2倍 
    val newCapacity = capacity * 2
    require(newCapacity <= MAXIMUM_CAPACITY, s"Can't contain more than ${growThreshold} elements")
	//创建新数组,因为要同时保存key和value,所以新数组的大小为2 * newCapacity
    val newData = new Array[AnyRef](2 * newCapacity)
	//新的掩码
    val newMask = newCapacity - 1
    // Insert all our old values into the new array. Note that because our old keys are
    // unique, there's no need to check for equality here when we insert.
    var oldPos = 0
    while (oldPos < capacity) {
      if (!data(2 * oldPos).eq(null)) {
	    //获取原data数组的key和value
        val key = data(2 * oldPos)
        val value = data(2 * oldPos + 1)
		//重哈希,获取key值在新data数组的下标位置
        var newPos = rehash(key.hashCode) & newMask
        var i = 1
        var keepGoing = true
        while (keepGoing) {
          val curKey = newData(2 * newPos)
          if (curKey.eq(null)) {
		    //设置在新data数组的下标位置的key和value值
            newData(2 * newPos) = key
            newData(2 * newPos + 1) = value
            keepGoing = false
          } else {
		    //使用开放地址法的二次探测法继续探测
            val delta = i
            newPos = (newPos + delta) & newMask
            i += 1
          }
        }
      }
      oldPos += 1
    }
    data = newData
    capacity = newCapacity
    mask = newMask
    growThreshold = (LOAD_FACTOR * newCapacity).toInt
  }

  //返回值总与参数n相等,
  private def nextPowerOf2(n: Int): Int = {
    val highBit = Integer.highestOneBit(n)
    if (highBit == n) n else highBit << 1
  }

  /**
   * Return an iterator of the map in sorted order. This provides a way to sort the map without
   * using additional memory, at the expense of destroying the validity of the map.
   */
//按照排序的顺序返回该map的迭代器,该方法不需要使用额外的内存就能将map上的数据排序,但是会破坏
   //map的有效性
  def destructiveSortedIterator(keyComparator: Comparator[K]): Iterator[(K, V)] = {
    destroyed = true
    // Pack KV pairs into the front of the underlying array
    //将不为null的kv键值对移到底层数组的前端,而null键和null值的kv对移到数组末端
    var keyIndex, newIndex = 0
    while (keyIndex < capacity) {
      if (data(2 * keyIndex) != null) {
        data(2 * newIndex) = data(2 * keyIndex)
        data(2 * newIndex + 1) = data(2 * keyIndex + 1)
        newIndex += 1
      }
      keyIndex += 1
    }
    assert(curSize == newIndex + (if (haveNullValue) 1 else 0))
   
//在data数组上排序
    new Sorter(new KVArraySortDataFormat[K, AnyRef]).sort(data, 0, newIndex, keyComparator)

//返回迭代器
    new Iterator[(K, V)] {
      var i = 0
      var nullValueReady = haveNullValue
      def hasNext: Boolean = (i < newIndex || nullValueReady)
      def next(): (K, V) = {
        if (nullValueReady) {
          nullValueReady = false
          (null.asInstanceOf[K], nullValue)
        } else {
          val item = (data(2 * i).asInstanceOf[K], data(2 * i + 1).asInstanceOf[V])
          i += 1
          item
        }
      }
    }
  }

  /**
   * Return whether the next insert will cause the map to grow
   */
  def atGrowThreshold: Boolean = curSize == growThreshold
}

//伴生对象
private object AppendOnlyMap {
  val MAXIMUM_CAPACITY = (1 << 29)
}

 SizeTrackingAppendOnlyMap

AppendOnlyMap的子类,可以追踪字节的预估大小。

/**
 * An append-only map that keeps track of its estimated size in bytes.
 */
private[spark] class SizeTrackingAppendOnlyMap[K, V]
  extends AppendOnlyMap[K, V] with SizeTracker
{
  override def update(key: K, value: V): Unit = {
    super.update(key, value)
    super.afterUpdate()
  }

  override def changeValue(key: K, updateFunc: (Boolean, V) => V): V = {
    val newValue = super.changeValue(key, updateFunc)
    super.afterUpdate()
    newValue
  }

  override protected def growTable(): Unit = {
    super.growTable()
    resetSamples()
  }
}

WritablePartitionedPairCollection

一个为保存key/value键值对的size-tracking collection提供以下功能的通用接口。它有以下功能:

    1、每个键值对有一个与之关联的partition;
    2、支持一个内存效率高的sorted iterator;
    3、支持一个WritablePartitionedIterator接口,用于将内容直接写为字节

该size-tracking collection指的其实就是:SizeTrackingAppendOnlyMap及其子类

/**
 * A common interface for size-tracking collections of key-value pairs that
 *
 *  - Have an associated partition for each key-value pair.
 *  - Support a memory-efficient sorted iterator
 *  - Support a WritablePartitionedIterator for writing the contents directly as bytes.
 */
private[spark] trait WritablePartitionedPairCollection[K, V] {
  /**
   * Insert a key-value pair with a partition into the collection
     插入一个键值对和一个partition到该集合中
   */
  def insert(partition: Int, key: K, value: V): Unit

  /**
   * Iterate through the data in order of partition ID and then the given comparator. This may
   * destroy the underlying collection.
   */
   //抽象方法,供子类实现,迭代数据按照它们的partitionId的顺序,然后按照给定的comparator的顺序。
   //这个可能会破坏底层的collection
  def partitionedDestructiveSortedIterator(keyComparator: Option[Comparator[K]])
    : Iterator[((Int, K), V)]

  /**
   * Iterate through the data and write out the elements instead of returning them. Records are
   * returned in order of their partition ID and then the given comparator.
   * This may destroy the underlying collection.
     调用partitionedDestructiveSortedIterator方法,使用比较器将集合上的元素排序,
     并返回排序后的集合的迭代器,然后用该迭代器迭代元素,写入磁盘文件。
   */
  def destructiveSortedWritablePartitionedIterator(keyComparator: Option[Comparator[K]])
    : WritablePartitionedIterator = {
    //抽象方法partitionedDestructiveSortedIterator()被子类实现后,在此处被调用,属于模板设计模式的使用
    val it = partitionedDestructiveSortedIterator(keyComparator)
    //实现WritablePartitionedIterator trait,并创建一个实例对象
    new WritablePartitionedIterator {
	  //获取当前的迭代元素
      private[this] var cur = if (it.hasNext) it.next() else null
      
	  //用DiskBlockObjectWriter写入当前的迭代元素
      def writeNext(writer: DiskBlockObjectWriter): Unit = {
        writer.write(cur._1._2, cur._2)
        cur = if (it.hasNext) it.next() else null
      }

      def hasNext(): Boolean = cur != null

      def nextPartition(): Int = cur._1._1
    }
  }
}

//伴生对象
//伴生对象中定义的字段和方法, 对应同名trait/class中的静态方法
private[spark] object WritablePartitionedPairCollection {
  /**
   * A comparator for (Int, K) pairs that orders them by only their partition ID.
     一个(Int,k)键值对的比较器,仅仅根据它们的partitionId进行排序
   */
  def partitionComparator[K]: Comparator[(Int, K)] = new Comparator[(Int, K)] {
    override def compare(a: (Int, K), b: (Int, K)): Int = {
      a._1 - b._1
    }
  }

  /**
   * A comparator for (Int, K) pairs that orders them both by their partition ID and a key ordering.
     一个(Int,k)键值对的比较器,同时根据它们的partitionId和key值进行排序
     其实是将传入的参数keyComparator装饰成partitionKeyComparator
   */
  def partitionKeyComparator[K](keyComparator: Comparator[K]): Comparator[(Int, K)] = {
    new Comparator[(Int, K)] {
      override def compare(a: (Int, K), b: (Int, K)): Int = {
        //先比较partitionId的大小,如果partitionId不同,可以直接返回比较结果
        val partitionDiff = a._1 - b._1
        if (partitionDiff != 0) {
          partitionDiff
        } else {
          //如果partitionId相同,再比较key
          keyComparator.compare(a._2, b._2)
        }
      }
    }
  }
}

/**
 * Iterator that writes elements to a DiskBlockObjectWriter instead of returning them. Each element
 * has an associated partition.
   将元素写入一个DiskBlockObjectWriter,每个元素都有一个与之关联的partition
 */
private[spark] trait WritablePartitionedIterator {
  def writeNext(writer: DiskBlockObjectWriter): Unit

  def hasNext(): Boolean

  def nextPartition(): Int
}

PartitionedAppendOnlyMap

WritablePartitionPairCollection的实现,它是map的包装器,该map的key值是一个(partition ID, K)元组。

如果说在父类AppendOnlyMap中,元素的存储格式为key0, value0, key1, value1, key2, value2....

在PartitionedAppendonlyMap中,元素的存储格式可以更加具体为

元素的顺序是(partitionId, k)0, value0, (partitionId, k)1, value1, (partitionId, k)2, value2....

图片引自: here

所以,它的迭代器迭代的元素类型也是((Int, K), V)

/**
 * Implementation of WritablePartitionedPairCollection that wraps a map in which the keys are tuples
 * of (partition ID, K)
   WritablePartitionPairCollection的实现,它是map的包装器,该map的key值是一个(partition ID, K)元组
 */
private[spark] class PartitionedAppendOnlyMap[K, V]
  extends SizeTrackingAppendOnlyMap[(Int, K), V] with WritablePartitionedPairCollection[K, V] {

  //迭代数据按照它们的partitionId的顺序,然后按照给定的comparator的顺序。
  def partitionedDestructiveSortedIterator(keyComparator: Option[Comparator[K]])
    : Iterator[((Int, K), V)] = {
//该map继承了WritablePartitionPairCollection trait
//所以它也能使用同名伴生对象WritablePartitionPairCollection的partitionKeyComparator
//和partitionComparator方法
/*
  Option.map(f:(A)=>B)方法的作用是:如果Option不为None,则将Option的value值作为函数f的参数
  执行函数f,并将f的返回值作为该方法的返回值。如果Option为None,返回None。
  在这里的作用是:如果keyComparator存在,将之装饰成partitionKeyComparator
*/
/*
  getOrElse方法在这里的作用是:如果keyComparator不存在,则使用partitionComparator
*/
    val comparator = keyComparator.map(partitionKeyComparator).getOrElse(partitionComparator)
    //调用父类AppendOnlyMap的destructiveSortediterator方法执行排序
    destructiveSortedIterator(comparator)
  }
  
  //
  def insert(partition: Int, key: K, value: V): Unit = {
    //调用AppendOnlyMap的update方法,设置key和value值
    update((partition, key), value)
  }
}

PartitionedPairBuffer

一个只支持追加的保存kv键值对的buffer。每个kv键值对都具有一个相应的partitionId。

该buffer底层有一个可扩容的数组结构。在该数组中同时保存key和value值,从而很方便地使用KVArraySortDataFormat进行排序。更明确的说,元素的顺序是key0, value0, key1, value1, key2, value2....

该buffer最多支持1073741819个元素。

相比PartitionedAppendOnlyMap,该buffer只提供了插入、扩容、迭代功能:

  • 插入:insert方法实现。元素会被插入数组的末端。在该数组中同时保存元素的key和value值,元素的key值是一个(partition ID, K)元组。
  • 扩容:growArray方法实现。确定新数组的capacity,然后将原数组的内容复制到新数组。
  • 迭代:partitionedDestructiveSortedIterator方法实现。

PartitionedAppendOnlyMap和PartitionedPairBuffer的比较如下:

 PartitionedAppendOnlyMapPartitionedPairBuffer
插入使用二次探测法确定元素在数组中的位置,找到为空的位置才插入直接插入到数组末端
扩容创建新容量的数组,需对原数组的所有元素进行重哈希,并用二次探测法确定在新数组中的下标位置创建新容量的数组,然后调用System.arraycopy方法直接将原数组内容复制到新数组
迭代迭代对底层数组是破坏性的迭代对底层数组实际上不是破坏性的
修改支持。二次探测法确定key在数组中的下标位置,并修改它的value值不支持
查找支持。二次探测法确定key在数组中的下标位置,并返回它的value值不支持
用途ExternalSorter需要map端的聚合时ExternalSorter不需要map端的聚合时
/**
 * Append-only buffer of key-value pairs, each with a corresponding partition ID, that keeps track
 * of its estimated size in bytes.
 *
 * The buffer can support up to 1073741819 elements.
 */
private[spark] class PartitionedPairBuffer[K, V](initialCapacity: Int = 64)
  extends WritablePartitionedPairCollection[K, V] with SizeTracker
{
  import PartitionedPairBuffer._

  require(initialCapacity <= MAXIMUM_CAPACITY,
    s"Can't make capacity bigger than ${MAXIMUM_CAPACITY} elements")
  require(initialCapacity >= 1, "Invalid initial capacity")

  // Basic growable array data structure. We use a single array of AnyRef to hold both the keys
  // and the values, so that we can sort them efficiently with KVArraySortDataFormat.
  private var capacity = initialCapacity
  private var curSize = 0
  private var data = new Array[AnyRef](2 * initialCapacity)

  /** Add an element into the buffer */
  def insert(partition: Int, key: K, value: V): Unit = {
    if (curSize == capacity) {
      growArray()
    }
    data(2 * curSize) = (partition, key.asInstanceOf[AnyRef])
    data(2 * curSize + 1) = value.asInstanceOf[AnyRef]
    curSize += 1
    afterUpdate()
  }

  /** Double the size of the array because we've reached capacity */
  private def growArray(): Unit = {
    if (capacity >= MAXIMUM_CAPACITY) {
      throw new IllegalStateException(s"Can't insert more than ${MAXIMUM_CAPACITY} elements")
    }
    val newCapacity =
      if (capacity * 2 > MAXIMUM_CAPACITY) { // Overflow
        MAXIMUM_CAPACITY
      } else {
        capacity * 2
      }
    val newArray = new Array[AnyRef](2 * newCapacity)
    System.arraycopy(data, 0, newArray, 0, 2 * capacity)
    data = newArray
    capacity = newCapacity
    resetSamples()
  }

  /** Iterate through the data in a given order. For this class this is not really destructive. */
  override def partitionedDestructiveSortedIterator(keyComparator: Option[Comparator[K]])
    : Iterator[((Int, K), V)] = {
    val comparator = keyComparator.map(partitionKeyComparator).getOrElse(partitionComparator)
    new Sorter(new KVArraySortDataFormat[(Int, K), AnyRef]).sort(data, 0, curSize, comparator)
    iterator
  }

  private def iterator(): Iterator[((Int, K), V)] = new Iterator[((Int, K), V)] {
    var pos = 0

    override def hasNext: Boolean = pos < curSize

    override def next(): ((Int, K), V) = {
      if (!hasNext) {
        throw new NoSuchElementException
      }
      val pair = (data(2 * pos).asInstanceOf[(Int, K)], data(2 * pos + 1).asInstanceOf[V])
      pos += 1
      pair
    }
  }
}

//伴生对象
private object PartitionedPairBuffer {
  val MAXIMUM_CAPACITY: Int = ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH / 2
}

参考:Spark Shuffle之SortShuffleWriter

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值