AppendOnlyMap
概述
一个只可以添加数据的hash table的实现。它的key值永远不会删除,而每个key的value值可能会改变。
该hash table使用开放探测方法中的二次探测法保存数据,所以内部只有一个数组的数据结构。
该hash table的大小始终为2的幂次方,最多可以支持0.7 * 2 ^ 29个元素。
该hash table为了内存本地性,在同一个数组中保存key和value值;更明确的说,元素的顺序是key0, value0, key1, value1, key2, value2....
该AppendOnlyMap允许null作为key。当null作为key时,返回的value值也为null。
除了没有提供删除功能外,它提供了一个map应有的插入、修改、扩容、查找、迭代功能:
- 插入:update方法实现。设置key和value值。
- 修改:changeValue方法实现。修改key的value值。
- 扩容:growTable方法实现。将该table双倍扩容,并所有元素重哈希。
- 查找:apply方法实现。用于获取给定key的value值。
- 迭代:destructiveSortedIterator方法实现。按照给定比较器的排序顺序返回该map的迭代器,该方法不需要使用额外的内存就能将map上的数据排序,但是会破坏map的有效性,底层的数组结构不能再被使用。
AppendOnlyMap可以基于比较器的排序顺序返回该map的迭代器,在这一点上与SortedMap类似。
/**
* :: DeveloperApi ::
* A simple open hash table optimized for the append-only use case, where keys
* are never removed, but the value for each key may be changed.
*
* This implementation uses quadratic probing with a power-of-2 hash table
* size, which is guaranteed to explore all spaces for each key (see
* http://en.wikipedia.org/wiki/Quadratic_probing).
*
* The map can support up to `375809638 (0.7 * 2 ^ 29)` elements.
*
* TODO: Cache the hash values of each key? java.util.HashMap does that.
*/
@DeveloperApi
class AppendOnlyMap[K, V](initialCapacity: Int = 64)
extends Iterable[(K, V)] with Serializable {
import AppendOnlyMap._
require(initialCapacity <= MAXIMUM_CAPACITY,
s"Can't make capacity bigger than ${MAXIMUM_CAPACITY} elements")
require(initialCapacity >= 1, "Invalid initial capacity")
private val LOAD_FACTOR = 0.7
private var capacity = nextPowerOf2(initialCapacity)
private var mask = capacity - 1
private var curSize = 0
private var growThreshold = (LOAD_FACTOR * capacity).toInt
// Holds keys and values in the same array for memory locality; specifically, the order of
// elements is key0, value0, key1, value1, key2, value2, etc.
//为了内存本地性在同一个数组中保存key和value值;
//更明确的说,元素的顺序是key0, value0, key1, value1, key2, value2....
private var data = new Array[AnyRef](2 * capacity)
// Treat the null key differently so we can use nulls in "data" to represent empty items.
private var haveNullValue = false
private var nullValue: V = null.asInstanceOf[V]
// Triggered by destructiveSortedIterator; the underlying data array may no longer be used
private var destroyed = false
private val destructionMessage = "Map state is invalid from destructive sorting!"
/** Get the value for a given key */
//获取给定key的value值
def apply(key: K): V = {
assert(!destroyed, destructionMessage)
val k = key.asInstanceOf[AnyRef]
if (k.eq(null)) {
return nullValue
}
//获取key的hashCode并和mask相与,获取元素应该存放的数组下标(通过pos*2和pos*2+1)
var pos = rehash(k.hashCode) & mask
var i = 1
while (true) {
//key在data数组中的下标为0,2,4,6
//假设data数组只保存key,则下标为0,1,2,3..
//所以2*pos为key在data数组中的下标,2*pos+1为相应value在data数组中的下标
val curKey = data(2 * pos)
if (k.eq(curKey) || k.equals(curKey)) {
//返回value
return data(2 * pos + 1).asInstanceOf[V]
} else if (curKey.eq(null)) {
return null.asInstanceOf[V]
} else {
//目标位置的key与要查找的key不一样,则使用二次探测法继续查找
//使用开放地址法的二次探测法继续探测
//二次探测就是在线性探测上做一个修改而成的,
//线性探测中,遇到冲突就自增1,而二次探测中,就是把这个自增1去掉换成一个固定值或自定义值
//pos + delta是将pos向前偏移delta个位置
//& mask是防止向前偏移delta个位置后超出数组下标
val delta = i
pos = (pos + delta) & mask
i += 1
}
}
null.asInstanceOf[V]
}
/** Set the value for a key */
def update(key: K, value: V): Unit = {
assert(!destroyed, destructionMessage)
val k = key.asInstanceOf[AnyRef]
if (k.eq(null)) {
if (!haveNullValue) {
incrementSize()
}
nullValue = value
haveNullValue = true
return
}
//获取key的hashCode并和mask相与
var pos = rehash(key.hashCode) & mask
var i = 1
while (true) {
val curKey = data(2 * pos)
if (curKey.eq(null)) {
data(2 * pos) = k
data(2 * pos + 1) = value.asInstanceOf[AnyRef]
incrementSize() // Since we added a new key
return
} else if (k.eq(curKey) || k.equals(curKey)) {
data(2 * pos + 1) = value.asInstanceOf[AnyRef]
return
} else {
//使用开放地址法的二次探测法继续探测
val delta = i
pos = (pos + delta) & mask
i += 1
}
}
}
/**
* Set the value for key to updateFunc(hadValue, oldValue), where oldValue will be the old value
* for key, if any, or null otherwise. Returns the newly updated value.
*/
def changeValue(key: K, updateFunc: (Boolean, V) => V): V = {
assert(!destroyed, destructionMessage)
val k = key.asInstanceOf[AnyRef]
if (k.eq(null)) {
if (!haveNullValue) {
incrementSize()
}
nullValue = updateFunc(haveNullValue, nullValue)
haveNullValue = true
return nullValue
}
var pos = rehash(k.hashCode) & mask
var i = 1
while (true) {
val curKey = data(2 * pos)
/*updateFunc函数的作用是,如果有值执行aggregator的mergeValue函数,如果没值执行aggregator的createCombiner函数
*/
if (curKey.eq(null)) {
//updateFunc函数执行aggregator的createCombiner函数
val newValue = updateFunc(false, null.asInstanceOf[V])
//设置key值:data(2*pos)为k
data(2 * pos) = k
//设置value值:data(2 * pos + 1)为newValue
data(2 * pos + 1) = newValue.asInstanceOf[AnyRef]
incrementSize()
return newValue
} else if (k.eq(curKey) || k.equals(curKey)) {
//updateFunc函数执行aggregator的mergeValue函数,将旧值合并成新值
val newValue = updateFunc(true, data(2 * pos + 1).asInstanceOf[V])
data(2 * pos + 1) = newValue.asInstanceOf[AnyRef]
return newValue
} else {
//使用开放地址法的二次探测法继续探测
val delta = i
pos = (pos + delta) & mask
i += 1
}
}
null.asInstanceOf[V] // Never reached but needed to keep compiler happy
}
/** Iterator method from Iterable */
override def iterator: Iterator[(K, V)] = {
assert(!destroyed, destructionMessage)
new Iterator[(K, V)] {
var pos = -1
/** Get the next value we should return from next(), or null if we're finished iterating */
//在next方法中会调用该方法,从而能返回下一个value值
def nextValue(): (K, V) = {
if (pos == -1) { // Treat position -1 as looking at the null value
if (haveNullValue) {
return (null.asInstanceOf[K], nullValue)
}
pos += 1
}
while (pos < capacity) {
if (!data(2 * pos).eq(null)) {
//返回(k, v)键值对
return (data(2 * pos).asInstanceOf[K], data(2 * pos + 1).asInstanceOf[V])
}
pos += 1
}
null
}
override def hasNext: Boolean = nextValue() != null
override def next(): (K, V) = {
val value = nextValue()
if (value == null) {
throw new NoSuchElementException("End of iterator")
}
pos += 1
value
}
}
}
override def size: Int = curSize
/** Increase table size by 1, rehashing if necessary */
private def incrementSize() {
curSize += 1
if (curSize > growThreshold) {
growTable()
}
}
/**
* Re-hash a value to deal better with hash functions that don't differ in the lower bits.
*/
private def rehash(h: Int): Int = Hashing.murmur3_32().hashInt(h).asInt()
/** Double the table's size and re-hash everything */
//将table双倍扩容,并所有元素重哈希
protected def growTable() {
// capacity < MAXIMUM_CAPACITY (2 ^ 29) so capacity * 2 won't overflow
//新的capacity为原capacity的2倍
val newCapacity = capacity * 2
require(newCapacity <= MAXIMUM_CAPACITY, s"Can't contain more than ${growThreshold} elements")
//创建新数组,因为要同时保存key和value,所以新数组的大小为2 * newCapacity
val newData = new Array[AnyRef](2 * newCapacity)
//新的掩码
val newMask = newCapacity - 1
// Insert all our old values into the new array. Note that because our old keys are
// unique, there's no need to check for equality here when we insert.
var oldPos = 0
while (oldPos < capacity) {
if (!data(2 * oldPos).eq(null)) {
//获取原data数组的key和value
val key = data(2 * oldPos)
val value = data(2 * oldPos + 1)
//重哈希,获取key值在新data数组的下标位置
var newPos = rehash(key.hashCode) & newMask
var i = 1
var keepGoing = true
while (keepGoing) {
val curKey = newData(2 * newPos)
if (curKey.eq(null)) {
//设置在新data数组的下标位置的key和value值
newData(2 * newPos) = key
newData(2 * newPos + 1) = value
keepGoing = false
} else {
//使用开放地址法的二次探测法继续探测
val delta = i
newPos = (newPos + delta) & newMask
i += 1
}
}
}
oldPos += 1
}
data = newData
capacity = newCapacity
mask = newMask
growThreshold = (LOAD_FACTOR * newCapacity).toInt
}
//返回值总与参数n相等,
private def nextPowerOf2(n: Int): Int = {
val highBit = Integer.highestOneBit(n)
if (highBit == n) n else highBit << 1
}
/**
* Return an iterator of the map in sorted order. This provides a way to sort the map without
* using additional memory, at the expense of destroying the validity of the map.
*/
//按照排序的顺序返回该map的迭代器,该方法不需要使用额外的内存就能将map上的数据排序,但是会破坏
//map的有效性
def destructiveSortedIterator(keyComparator: Comparator[K]): Iterator[(K, V)] = {
destroyed = true
// Pack KV pairs into the front of the underlying array
//将不为null的kv键值对移到底层数组的前端,而null键和null值的kv对移到数组末端
var keyIndex, newIndex = 0
while (keyIndex < capacity) {
if (data(2 * keyIndex) != null) {
data(2 * newIndex) = data(2 * keyIndex)
data(2 * newIndex + 1) = data(2 * keyIndex + 1)
newIndex += 1
}
keyIndex += 1
}
assert(curSize == newIndex + (if (haveNullValue) 1 else 0))
//在data数组上排序
new Sorter(new KVArraySortDataFormat[K, AnyRef]).sort(data, 0, newIndex, keyComparator)
//返回迭代器
new Iterator[(K, V)] {
var i = 0
var nullValueReady = haveNullValue
def hasNext: Boolean = (i < newIndex || nullValueReady)
def next(): (K, V) = {
if (nullValueReady) {
nullValueReady = false
(null.asInstanceOf[K], nullValue)
} else {
val item = (data(2 * i).asInstanceOf[K], data(2 * i + 1).asInstanceOf[V])
i += 1
item
}
}
}
}
/**
* Return whether the next insert will cause the map to grow
*/
def atGrowThreshold: Boolean = curSize == growThreshold
}
//伴生对象
private object AppendOnlyMap {
val MAXIMUM_CAPACITY = (1 << 29)
}
SizeTrackingAppendOnlyMap
AppendOnlyMap的子类,可以追踪字节的预估大小。
/**
* An append-only map that keeps track of its estimated size in bytes.
*/
private[spark] class SizeTrackingAppendOnlyMap[K, V]
extends AppendOnlyMap[K, V] with SizeTracker
{
override def update(key: K, value: V): Unit = {
super.update(key, value)
super.afterUpdate()
}
override def changeValue(key: K, updateFunc: (Boolean, V) => V): V = {
val newValue = super.changeValue(key, updateFunc)
super.afterUpdate()
newValue
}
override protected def growTable(): Unit = {
super.growTable()
resetSamples()
}
}
WritablePartitionedPairCollection
一个为保存key/value键值对的size-tracking collection提供以下功能的通用接口。它有以下功能:
1、每个键值对有一个与之关联的partition;
2、支持一个内存效率高的sorted iterator;
3、支持一个WritablePartitionedIterator接口,用于将内容直接写为字节
该size-tracking collection指的其实就是:SizeTrackingAppendOnlyMap及其子类
/**
* A common interface for size-tracking collections of key-value pairs that
*
* - Have an associated partition for each key-value pair.
* - Support a memory-efficient sorted iterator
* - Support a WritablePartitionedIterator for writing the contents directly as bytes.
*/
private[spark] trait WritablePartitionedPairCollection[K, V] {
/**
* Insert a key-value pair with a partition into the collection
插入一个键值对和一个partition到该集合中
*/
def insert(partition: Int, key: K, value: V): Unit
/**
* Iterate through the data in order of partition ID and then the given comparator. This may
* destroy the underlying collection.
*/
//抽象方法,供子类实现,迭代数据按照它们的partitionId的顺序,然后按照给定的comparator的顺序。
//这个可能会破坏底层的collection
def partitionedDestructiveSortedIterator(keyComparator: Option[Comparator[K]])
: Iterator[((Int, K), V)]
/**
* Iterate through the data and write out the elements instead of returning them. Records are
* returned in order of their partition ID and then the given comparator.
* This may destroy the underlying collection.
调用partitionedDestructiveSortedIterator方法,使用比较器将集合上的元素排序,
并返回排序后的集合的迭代器,然后用该迭代器迭代元素,写入磁盘文件。
*/
def destructiveSortedWritablePartitionedIterator(keyComparator: Option[Comparator[K]])
: WritablePartitionedIterator = {
//抽象方法partitionedDestructiveSortedIterator()被子类实现后,在此处被调用,属于模板设计模式的使用
val it = partitionedDestructiveSortedIterator(keyComparator)
//实现WritablePartitionedIterator trait,并创建一个实例对象
new WritablePartitionedIterator {
//获取当前的迭代元素
private[this] var cur = if (it.hasNext) it.next() else null
//用DiskBlockObjectWriter写入当前的迭代元素
def writeNext(writer: DiskBlockObjectWriter): Unit = {
writer.write(cur._1._2, cur._2)
cur = if (it.hasNext) it.next() else null
}
def hasNext(): Boolean = cur != null
def nextPartition(): Int = cur._1._1
}
}
}
//伴生对象
//伴生对象中定义的字段和方法, 对应同名trait/class中的静态方法
private[spark] object WritablePartitionedPairCollection {
/**
* A comparator for (Int, K) pairs that orders them by only their partition ID.
一个(Int,k)键值对的比较器,仅仅根据它们的partitionId进行排序
*/
def partitionComparator[K]: Comparator[(Int, K)] = new Comparator[(Int, K)] {
override def compare(a: (Int, K), b: (Int, K)): Int = {
a._1 - b._1
}
}
/**
* A comparator for (Int, K) pairs that orders them both by their partition ID and a key ordering.
一个(Int,k)键值对的比较器,同时根据它们的partitionId和key值进行排序
其实是将传入的参数keyComparator装饰成partitionKeyComparator
*/
def partitionKeyComparator[K](keyComparator: Comparator[K]): Comparator[(Int, K)] = {
new Comparator[(Int, K)] {
override def compare(a: (Int, K), b: (Int, K)): Int = {
//先比较partitionId的大小,如果partitionId不同,可以直接返回比较结果
val partitionDiff = a._1 - b._1
if (partitionDiff != 0) {
partitionDiff
} else {
//如果partitionId相同,再比较key
keyComparator.compare(a._2, b._2)
}
}
}
}
}
/**
* Iterator that writes elements to a DiskBlockObjectWriter instead of returning them. Each element
* has an associated partition.
将元素写入一个DiskBlockObjectWriter,每个元素都有一个与之关联的partition
*/
private[spark] trait WritablePartitionedIterator {
def writeNext(writer: DiskBlockObjectWriter): Unit
def hasNext(): Boolean
def nextPartition(): Int
}
PartitionedAppendOnlyMap
WritablePartitionPairCollection的实现,它是map的包装器,该map的key值是一个(partition ID, K)元组。
如果说在父类AppendOnlyMap中,元素的存储格式为key0, value0, key1, value1, key2, value2....
则在PartitionedAppendonlyMap中,元素的存储格式可以更加具体为
元素的顺序是(partitionId, k)0, value0, (partitionId, k)1, value1, (partitionId, k)2, value2....
图片引自: here
所以,它的迭代器迭代的元素类型也是((Int, K), V)
/**
* Implementation of WritablePartitionedPairCollection that wraps a map in which the keys are tuples
* of (partition ID, K)
WritablePartitionPairCollection的实现,它是map的包装器,该map的key值是一个(partition ID, K)元组
*/
private[spark] class PartitionedAppendOnlyMap[K, V]
extends SizeTrackingAppendOnlyMap[(Int, K), V] with WritablePartitionedPairCollection[K, V] {
//迭代数据按照它们的partitionId的顺序,然后按照给定的comparator的顺序。
def partitionedDestructiveSortedIterator(keyComparator: Option[Comparator[K]])
: Iterator[((Int, K), V)] = {
//该map继承了WritablePartitionPairCollection trait
//所以它也能使用同名伴生对象WritablePartitionPairCollection的partitionKeyComparator
//和partitionComparator方法
/*
Option.map(f:(A)=>B)方法的作用是:如果Option不为None,则将Option的value值作为函数f的参数
执行函数f,并将f的返回值作为该方法的返回值。如果Option为None,返回None。
在这里的作用是:如果keyComparator存在,将之装饰成partitionKeyComparator
*/
/*
getOrElse方法在这里的作用是:如果keyComparator不存在,则使用partitionComparator
*/
val comparator = keyComparator.map(partitionKeyComparator).getOrElse(partitionComparator)
//调用父类AppendOnlyMap的destructiveSortediterator方法执行排序
destructiveSortedIterator(comparator)
}
//
def insert(partition: Int, key: K, value: V): Unit = {
//调用AppendOnlyMap的update方法,设置key和value值
update((partition, key), value)
}
}
PartitionedPairBuffer
一个只支持追加的保存kv键值对的buffer。每个kv键值对都具有一个相应的partitionId。
该buffer底层有一个可扩容的数组结构。在该数组中同时保存key和value值,从而很方便地使用KVArraySortDataFormat进行排序。更明确的说,元素的顺序是key0, value0, key1, value1, key2, value2....
该buffer最多支持1073741819个元素。
相比PartitionedAppendOnlyMap,该buffer只提供了插入、扩容、迭代功能:
- 插入:insert方法实现。元素会被插入数组的末端。在该数组中同时保存元素的key和value值,元素的key值是一个(partition ID, K)元组。
- 扩容:growArray方法实现。确定新数组的capacity,然后将原数组的内容复制到新数组。
- 迭代:partitionedDestructiveSortedIterator方法实现。
PartitionedAppendOnlyMap和PartitionedPairBuffer的比较如下:
PartitionedAppendOnlyMap | PartitionedPairBuffer | |
插入 | 使用二次探测法确定元素在数组中的位置,找到为空的位置才插入 | 直接插入到数组末端 |
扩容 | 创建新容量的数组,需对原数组的所有元素进行重哈希,并用二次探测法确定在新数组中的下标位置 | 创建新容量的数组,然后调用System.arraycopy方法直接将原数组内容复制到新数组 |
迭代 | 迭代对底层数组是破坏性的 | 迭代对底层数组实际上不是破坏性的 |
修改 | 支持。二次探测法确定key在数组中的下标位置,并修改它的value值 | 不支持 |
查找 | 支持。二次探测法确定key在数组中的下标位置,并返回它的value值 | 不支持 |
用途 | ExternalSorter需要map端的聚合时 | ExternalSorter不需要map端的聚合时 |
/**
* Append-only buffer of key-value pairs, each with a corresponding partition ID, that keeps track
* of its estimated size in bytes.
*
* The buffer can support up to 1073741819 elements.
*/
private[spark] class PartitionedPairBuffer[K, V](initialCapacity: Int = 64)
extends WritablePartitionedPairCollection[K, V] with SizeTracker
{
import PartitionedPairBuffer._
require(initialCapacity <= MAXIMUM_CAPACITY,
s"Can't make capacity bigger than ${MAXIMUM_CAPACITY} elements")
require(initialCapacity >= 1, "Invalid initial capacity")
// Basic growable array data structure. We use a single array of AnyRef to hold both the keys
// and the values, so that we can sort them efficiently with KVArraySortDataFormat.
private var capacity = initialCapacity
private var curSize = 0
private var data = new Array[AnyRef](2 * initialCapacity)
/** Add an element into the buffer */
def insert(partition: Int, key: K, value: V): Unit = {
if (curSize == capacity) {
growArray()
}
data(2 * curSize) = (partition, key.asInstanceOf[AnyRef])
data(2 * curSize + 1) = value.asInstanceOf[AnyRef]
curSize += 1
afterUpdate()
}
/** Double the size of the array because we've reached capacity */
private def growArray(): Unit = {
if (capacity >= MAXIMUM_CAPACITY) {
throw new IllegalStateException(s"Can't insert more than ${MAXIMUM_CAPACITY} elements")
}
val newCapacity =
if (capacity * 2 > MAXIMUM_CAPACITY) { // Overflow
MAXIMUM_CAPACITY
} else {
capacity * 2
}
val newArray = new Array[AnyRef](2 * newCapacity)
System.arraycopy(data, 0, newArray, 0, 2 * capacity)
data = newArray
capacity = newCapacity
resetSamples()
}
/** Iterate through the data in a given order. For this class this is not really destructive. */
override def partitionedDestructiveSortedIterator(keyComparator: Option[Comparator[K]])
: Iterator[((Int, K), V)] = {
val comparator = keyComparator.map(partitionKeyComparator).getOrElse(partitionComparator)
new Sorter(new KVArraySortDataFormat[(Int, K), AnyRef]).sort(data, 0, curSize, comparator)
iterator
}
private def iterator(): Iterator[((Int, K), V)] = new Iterator[((Int, K), V)] {
var pos = 0
override def hasNext: Boolean = pos < curSize
override def next(): ((Int, K), V) = {
if (!hasNext) {
throw new NoSuchElementException
}
val pair = (data(2 * pos).asInstanceOf[(Int, K)], data(2 * pos + 1).asInstanceOf[V])
pos += 1
pair
}
}
}
//伴生对象
private object PartitionedPairBuffer {
val MAXIMUM_CAPACITY: Int = ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH / 2
}