**
* Append-only buffer of key-value pairs, each with a corresponding partition ID, that keeps track
* of its estimated size in bytes.
*
* The buffer can support up to `1073741823 (2 ^ 30 - 1)` elements.
*/
private[spark] class PartitionedPairBuffer[K, V](initialCapacity: Int = 64)
extends WritablePartitionedPairCollection[K, V] with SizeTracker
{
import PartitionedPairBuffer._
fields
require(initialCapacity <= MAXIMUM_CAPACITY,
s"Can't make capacity bigger than ${MAXIMUM_CAPACITY} elements")
require(initialCapacity >=1, "Invalid initial capacity")
// Basic growable array data structure. We use a single array of AnyRef to hold both the keys// and the values, so that we can sort them efficiently with KVArraySortDataFormat.privatevar capacity = initialCapacity
privatevar curSize =0privatevardata=newArray[AnyRef](2 * initialCapacity)
/** Add an element into the buffer */
definsert(partition: Int, key: K, value: V): Unit = {
if (curSize == capacity) {
growArray()
}
data(2 * curSize) = (partition, key.asInstanceOf[AnyRef])
data(2 * curSize + 1) = value.asInstanceOf[AnyRef]
curSize += 1
afterUpdate()
}
/** Double the size of the array because we've reached capacity */privatedef growArray(): Unit = {
if (capacity >= MAXIMUM_CAPACITY) {
thrownew IllegalStateException(s"Can't insert more than ${MAXIMUM_CAPACITY} elements")
}
val newCapacity =
if (capacity * 2 < 0 || capacity * 2 > MAXIMUM_CAPACITY) { // Overflow
MAXIMUM_CAPACITY
} else {
capacity * 2
}
val newArray = new Array[AnyRef](2 * newCapacity)
System.arraycopy(data, 0, newArray, 0, 2 * capacity)
data = newArray
capacity = newCapacity
resetSamples()
}
partitionedDestructiveSortedIterator
/** Iterate through the data in a given order. For this class this is not really destructive. */overridedef partitionedDestructiveSortedIterator(keyComparator: Option[Comparator[K]])
: Iterator[((Int, K), V)] = {
val comparator = keyComparator.map(partitionKeyComparator).getOrElse(partitionComparator)
new Sorter(new KVArraySortDataFormat[(Int, K), AnyRef]).sort(data, 0, curSize, comparator)
iterator
}