Dictionary
Dictionary
内部只有一个成员变量 _variantBuffer
,它的类型是 _VariantDictionaryBuffer
。
public struct Dictionary<Key: Hashable, Value> {
internal typealias _VariantBuffer = _VariantDictionaryBuffer<Key, Value>
![](https://user-gold-cdn.xitu.io/2018/7/6/1646ed8037e769fe?w=847&h=544&f=png&s=35984)
复制代码
主要有两种初始化方法。
public init() {
self = Dictionary<Key, Value>(_nativeBuffer: _NativeBuffer())
}
public init(minimumCapacity: Int) {
_variantBuffer = .native(_NativeBuffer(minimumCapacity: minimumCapacity))
}
#if _runtime(_ObjC)
public init(_immutableCocoaDictionary: _NSDictionary) {
_sanityCheck(
_isBridgedVerbatimToObjectiveC(Key.self) &&
_isBridgedVerbatimToObjectiveC(Value.self),
"Dictionary can be backed by NSDictionary buffer only when both key and value are bridged verbatim to Objective-C")
_variantBuffer = .cocoa(
_CocoaDictionaryBuffer(cocoaDictionary: _immutableCocoaDictionary))
}
#endif
}
复制代码
从两个初始化的情况来看, 一种是需要桥接到 Objective-C
,而另外一种是不需要桥接到 Objective-C
。
接下来具体看看 _VariantDictionaryBuffer
_VariantDictionaryBuffer
internal enum _VariantDictionaryBuffer<Key: Hashable, Value>: _HashBuffer {
internal typealias NativeBuffer = _NativeDictionaryBuffer<Key, Value>
#if _runtime(_ObjC)
internal typealias CocoaBuffer = _CocoaDictionaryBuffer
#endif
case native(NativeBuffer)
#if _runtime(_ObjC)
case cocoa(CocoaBuffer)
#endif
复制代码
_VariantDictionaryBuffer
其实是个 enum
,只有两个值 native
和 cocoa
,分别对应着 _NativeDictionaryBuffer
和 _CocoaDictionaryBuffer
。
_NativeDictionaryBuffer
_NativeDictionaryBuffer
中也只有一个成员变量 _storage
,它的类型是 _RawNativeDictionaryStorage
。
internal struct _NativeDictionaryBuffer<Key, Value> {
internal typealias RawStorage = _RawNativeDictionaryStorage
internal var _storage: RawStorage
复制代码
再来看看 _NativeDictionaryBuffer
内部的初始化代码。
internal init(minimumCapacity: Int) {
let bucketCount = _NativeDictionaryBuffer.bucketCount(
forCapacity: minimumCapacity,
maxLoadFactorInverse: _hashContainerDefaultMaxLoadFactorInverse)
self.init(bucketCount: bucketCount)
}
internal var _hashContainerDefaultMaxLoadFactorInverse: Double {
return 1.0 / 0.75
}
internal static func bucketCount(
forCapacity capacity: Int,
maxLoadFactorInverse: Double
) -> Int {
return max(Int((Double(capacity) * maxLoadFactorInverse).rounded(.up)),
capacity + 1)
}
internal init(bucketCount: Int) {
_sanityCheck(bucketCount <= (Int.max >> 1) + 1)
let buckets = 1 &<< ((Swift.max(bucketCount, 2) - 1)._binaryLogarithm() + 1)
self.init(_exactBucketCount: buckets)
}
internal init(_exactBucketCount bucketCount: Int) {
let bitmapWordCount = _UnsafeBitMap.sizeInWords(forSizeInBits: bucketCount)
let storage = Builtin.allocWithTailElems_3(HashTypedStorage.self,
bitmapWordCount._builtinWordValue, UInt.self,
bucketCount._builtinWordValue, Key.self,
bucketCount._builtinWordValue, Value.self)
self.init(_exactBucketCount: bucketCount, storage: storage)
}
internal init(_exactBucketCount bucketCount: Int, storage: RawStorage) {
storage.bucketCount = bucketCount
storage.count = 0
self.init(_storage: storage)
let initializedEntries = _UnsafeBitMap(
storage: _initializedHashtableEntriesBitMapBuffer,
bitCount: bucketCount)
initializedEntries.initializeToZero()
let bitmapAddr = Builtin.projectTailElems(_storage, UInt.self)
let bitmapWordCount = _UnsafeBitMap.sizeInWords(forSizeInBits: bucketCount)
let keysAddr = Builtin.getTailAddr_Word(bitmapAddr,
bitmapWordCount._builtinWordValue, UInt.self, Key.self)
_storage.initializedEntries = initializedEntries
_storage.keys = UnsafeMutableRawPointer(keysAddr)
let valuesAddr = Builtin.getTailAddr_Word(keysAddr,
bucketCount._builtinWordValue, Key.self, Value.self)
_storage.values = UnsafeMutableRawPointer(valuesAddr)
let seed = _Hasher._seed
let perturbation = bucketCount
_storage.seed = (seed.0 ^ UInt64(truncatingIfNeeded: perturbation), seed.1)
}
}
复制代码
TypedStorage
在文档中的说明是,此类有两个作用,第一个作用是为了能够创建 _NativeDictionaryBuffer<AnyObject, AnyObject>
,但是创建出来的 Dictionary
只能使用索引和迭代器的功能,第二个作用是继承于 _RawNativeDictionaryStorage
,实现 deinit
方法,反初始化 key 和 value
。
Builtin.allocWithTailElems_3
初始化 _storage
,分配几段连续内存
- 为防止
key
是AnyObject
需要为其添加类型约束,所以需要为TypeStorage
分配一段约束。 - 分配
bitmapWordCount
个连续的内存块用来存储bitmap
(ps:bitmap,最主要的作用是能够快速的对key值进行查重操作) - 分配
bucketCount
个连续的内存块用来存储key
。 - 分配
bucketCount
个连续的内存块用来存储value
。
bucketCount
计算相当于是 minimumCapacity
的 1.0 / 0.75
倍。
initializedEntries
用于哈希算法中解决冲突问题的辅助 bitmap
下面会具体提到它的使用。
Builtin.getTailAddr_Word
获取到 key
和 value
的初始内存地址分别赋值给 _storage.key
和 value
,而 _RawNativeDictionaryStorage
内部定义了通用的方法和属性。
ps: 至于 seed
的作用,我一直没搞懂,希望有大牛能够指点一下。
追加空间申请
来看一下 _NativeDictionaryBuffer
中追加空间申请的实现思路。暂时会隐去 _CocoaDictionaryBuffer
的相关内容。
public mutating func reserveCapacity(_ minimumCapacity: Int) {
_variantBuffer.reserveCapacity(minimumCapacity)
}
//_VariantDictionaryBuffer
internal mutating func reserveCapacity(_ capacity: Int) {
_ = ensureUniqueNativeBuffer(withCapacity: capacity)
}
internal mutating func ensureUniqueNativeBuffer(
withCapacity minimumCapacity: Int
) -> (reallocated: Bool, capacityChanged: Bool) {
let bucketCount = NativeBuffer.bucketCount(
forCapacity: minimumCapacity,
maxLoadFactorInverse: _hashContainerDefaultMaxLoadFactorInverse)
return ensureUniqueNativeBuffer(withBucketCount: bucketCount)
}
internal mutating func ensureUniqueNativeBuffer(
withBucketCount desiredBucketCount: Int
) -> (reallocated: Bool, capacityChanged: Bool) {
let n = _isNative
if n {
return ensureUniqueNativeBufferNative(withBucketCount: desiredBucketCount)
}
}
internal mutating func ensureUniqueNativeBufferNative(
withBucketCount desiredBucketCount: Int
) -> (reallocated: Bool, capacityChanged: Bool) {
let oldBucketCount = asNative.bucketCount
if oldBucketCount >= desiredBucketCount && isUniquelyReferenced() {
return (reallocated: false, capacityChanged: false)
}
let oldNativeBuffer = asNative
var newNativeBuffer = NativeBuffer(bucketCount: desiredBucketCount)
let newBucketCount = newNativeBuffer.bucketCount
for i in 0..<oldBucketCount {
if oldNativeBuffer.isInitializedEntry(at: i) {
if oldBucketCount == newBucketCount {
let key = oldNativeBuffer.key(at: i)
let value = oldNativeBuffer.value(at: i)
newNativeBuffer.initializeKey(key, value: value , at: i)
} else {
let key = oldNativeBuffer.key(at: i)
newNativeBuffer.unsafeAddNew(
key: key,
value: oldNativeBuffer.value(at: i))
}
}
}
newNativeBuffer.count = oldNativeBuffer.count
self = .native(newNativeBuffer)
return (reallocated: true,
capacityChanged: oldBucketCount != newBucketCount)
}
internal func initializeKey(_ k: Key, value v: Value, at i: Int) {
_sanityCheck(!isInitializedEntry(at: i))
defer { _fixLifetime(self) }
(keys + i).initialize(to: k)
(values + i).initialize(to: v)
_storage.initializedEntries[i] = true
}
复制代码
可以看到,capacity
不足时,Swift 将现存的 capacity
取其 1.0 / 0.75
倍,再去申请新的 buffer
。
ensureUniqueNativeBufferNative
确保 Dictionary
持有者的唯一性和追加空间申请。
- 如果现有容量大于等于所需要的容量,并且
Dictionary
只有一个持有者,则直接返回不进行任何操作。 - 如果持有者不唯一或者需要追加空间申请,则需要重新初始化一个
_NativeDictionaryBuffer
的实例newNativeBuffer
并分配desiredBucketCount
个连续的内存块。isInitializedEntry
是用于确认当前偏移量为i
的key
值是否已存在。- 如果不需要追加内存空间,也就是
Dictionary
的持有者不唯一,则会执行 写时复制 ,则初始化偏移量为i
的内存空间,并赋值key
和value
。 - 如果需要追加内存空间,由于原本的存储空间发生了改变,所以需要重新计算每个
key
应该插入的位置,则会调用unsafeAddNew
先找到一个合适的位置插入key
,之后再进行初始化的操作。
- 如果不需要追加内存空间,也就是
就上文中提到的 unsafeAddNew
,来看看 Swift
是如何解决哈希表 key
值冲突的问题。
internal func unsafeAddNew(key newKey: Key, value: Value) {
let (i, found) = _find(newKey, startBucket: _bucket(newKey))
initializeKey(newKey, value: value, at: i.offset)
}
internal func _find(_ key: Key, startBucket: Int)
-> (pos: Index, found: Bool) {
var bucket = startBucket
while true {
let isHole = !isInitializedEntry(at: bucket)
if isHole {
return (Index(offset: bucket), false)
}
if self.key(at: bucket) == key {
return (Index(offset: bucket), true)
}
bucket = _index(after: bucket)
}
}
复制代码
其实解决冲突主要的函数是 _find
。
- 通过
_bucket(newKey)
找到当前key
值对应的位置bucket
- 从
bucket
开始遍历,如果找到bucket
的key
与当前的key
相同则返回当前key
的位置bucket
- 否则找到 第一个未被占用的位置。
由此可见 Swift
在解决哈希表冲突的时候,使用的是线性探测法。
从删除的情况来看,Dictionary
采用的是 链地址法,解决 key
值之间的冲突。
_CocoaDictionaryBuffer
_CocoaDictionaryBuffer
中只有一个成员变量 cocoaDictionary
,它的类型是 _NSDictionary
。而 _NSDictionary
在 ShadowProtocols.swift
内部定义为一个协议,继承与 _NSDictionaryCore
主要用于桥接到 Objective-C
。
internal struct _CocoaDictionaryBuffer: _HashBuffer {
internal var cocoaDictionary: _NSDictionary
}
复制代码
而在 _CocoaDictionaryBuffer
结构体内部,只是实现了 _HashBuffer
协议相关的函数和计算属性。
追加空间申请
在追加空间申请时,同 _NativeDictionaryBuffer
不同的地方就在于 ensureUniqueNativeBuffer
内部实现。
internal mutating func ensureUniqueNativeBuffer(
withBucketCount desiredBucketCount: Int
) -> (reallocated: Bool, capacityChanged: Bool) {
let cocoaDictionary = cocoaBuffer.cocoaDictionary
var newNativeBuffer = NativeBuffer(bucketCount: desiredBucketCount)
let oldCocoaIterator = _CocoaDictionaryIterator(cocoaDictionary)
while let (key, value) = oldCocoaIterator.next() {
newNativeBuffer.unsafeAddNew(
key: _forceBridgeFromObjectiveC(key, Key.self),
value: _forceBridgeFromObjectiveC(value, Value.self))
}
newNativeBuffer.count = cocoaDictionary.count
self = .native(newNativeBuffer)
return (reallocated: true, capacityChanged: true)
}
public func _forceBridgeFromObjectiveC<T>(_ x: AnyObject, _: T.Type) -> T {
if _fastPath(_isClassOrObjCExistential(T.self)) {
return x as! T
}
var result: T?
_bridgeNonVerbatimFromObjectiveC(x, T.self, &result)
return result!
}
复制代码
从源码中,可以看出,在追加空间申请时,会将 _CocoaDictionaryBuffer
转换成 _NativeDictionaryBuffer
,接下来的操作就和 _NativeDictionaryBuffer
一样。
而_forceBridgeFromObjectiveC
的功能就是将 x
从Objective-C
桥接到 Swift
,来看看具体实现。
- 如果是
T
是class
x
的类型是T
或者T
子类,则函数会直接返回 `x。
- 否则 ,如果
T
是 遵从_ObjectiveCBridgeable
协议。- 如果
x
的类型不是T.ObjectiveType
或者其子类,则会crash。 - 否则,会返回
T._forceBridgeFromObjectiveC(x)
的结果。
- 如果
操作
接下来再来看看,一些比较常用操作的具体实现。
remove
internal mutating func removeValue(forKey key: Key) -> Value? {
if _fastPath(guaranteedNative) {
return nativeRemoveObject(forKey: key)
}
switch self {
case .native:
return nativeRemoveObject(forKey: key)
#if _runtime(_ObjC)
case .cocoa(let cocoaBuffer):
let anyObjectKey: AnyObject = _bridgeAnythingToObjectiveC(key)
if cocoaBuffer.maybeGet(anyObjectKey) == nil {
return nil
}
migrateDataToNativeBuffer(cocoaBuffer)
return nativeRemoveObject(forKey: key)
#endif
}
复制代码
_NativeDictionaryBuffer
internal mutating func nativeRemoveObject(forKey key: Key) -> Value? {
var idealBucket = asNative._bucket(key)
var (index, found) = asNative._find(key, startBucket: idealBucket)
if !found {
return nil
}
let bucketCount = asNative.bucketCount
let (_, capacityChanged) = ensureUniqueNativeBuffer(
withBucketCount: bucketCount)
let nativeBuffer = asNative
if capacityChanged {
idealBucket = nativeBuffer._bucket(key)
(index, found) = nativeBuffer._find(key, startBucket: idealBucket)
_sanityCheck(found, "key was lost during buffer migration")
}
let oldValue = nativeBuffer.value(at: index.offset)
nativeDelete(nativeBuffer, idealBucket: idealBucket,
offset: index.offset)
return oldValue
}
internal mutating func nativeDelete(
_ nativeBuffer: NativeBuffer, idealBucket: Int, offset: Int
) {
var nativeBuffer = nativeBuffer
nativeBuffer.destroyEntry(at: offset)
nativeBuffer.count -= 1
var hole = offset
var start = idealBucket
while nativeBuffer.isInitializedEntry(at: nativeBuffer._prev(start)) {
start = nativeBuffer._prev(start)
}
var lastInChain = hole
var b = nativeBuffer._index(after: lastInChain)
while nativeBuffer.isInitializedEntry(at: b) {
lastInChain = b
b = nativeBuffer._index(after: b)
}
while hole != lastInChain {
var b = lastInChain
while b != hole {
let idealBucket = nativeBuffer._bucket(nativeBuffer.key(at: b))
let c0 = idealBucket >= start
let c1 = idealBucket <= hole
if start <= hole ? (c0 && c1) : (c0 || c1) {
break // Found it
}
b = nativeBuffer._prev(b)
}
if b == hole {
break
}
nativeBuffer.moveInitializeEntry(
from: nativeBuffer,
at: b,
toEntryAt: hole)
hole = b
}
}
复制代码
- 检查
Dictionary
的持有者是否唯一,如果不唯一则进行写时复制。 - 找到
key
对应的index
。 destroyEntry
从内存中回收offset
偏移量的key
和value
,并且将offset
偏移量的位置在bitmap
中标记为未被占用。- 从
offset
左边取第一个被占用的bucket
,记为start
和 右边取最后一个被占用的bucket
,记为lastInChain
。 - 查找
[start, lastInChain]
不合理的元素,并进行调整。
解释下何为不合理的元素,举个简单例子:比如有一条哈希表 hashTable
为:
[1,2,3,4,5,6,7,8]
复制代码
插入 3
,采用线性探测法
[1,2,3,4,5,6,7,8,3]
复制代码
删除 5
[1,2,3,4,nil,6,7,8,3]
复制代码
但是发现 3
本应该是在 nil
之前的,此时 3
就是不合理的元素,所以需要交换 3
和 nil
的位置。
[1,2,3,4,3,6,7,8,nil]
复制代码
_CocoaDictionaryBuffer
public func _bridgeAnythingToObjectiveC<T>(_ x: T) -> AnyObject {
if _fastPath(_isClassOrObjCExistential(T.self)) {
return unsafeBitCast(x, to: AnyObject.self)
}
return _bridgeAnythingNonVerbatimToObjectiveC(x)
}
internal func maybeGet(_ key: Key) -> Value? {
return cocoaDictionary.objectFor(key)
}
internal mutating func migrateDataToNativeBuffer(
_ cocoaBuffer: _CocoaDictionaryBuffer
) {
let allocated = ensureUniqueNativeBuffer(
withCapacity: cocoaBuffer.count).reallocated
_sanityCheck(allocated, "failed to allocate native Dictionary buffer")
}
复制代码
_bridgeAnythingToObjectiveC
将任意值转换成AnyObject
。maybeGet
获取当前key
值的value
。migrateDataToNativeBuffer
,和追加申请空间的时候操作相同,将_CocoaDictionaryBuffer
转换成_NativeDictionaryBuffer
。- 调用
nativeRemoveObject
,操作和_NativeDictionaryBuffer
相同。
updateValue
internal mutating func updateValue(
_ value: Value, forKey key: Key
) -> Value? {
if _fastPath(guaranteedNative) {
return nativeUpdateValue(value, forKey: key)
}
switch self {
case .native:
return nativeUpdateValue(value, forKey: key)
#if _runtime(_ObjC)
case .cocoa(let cocoaBuffer):
migrateDataToNativeBuffer(cocoaBuffer)
return nativeUpdateValue(value, forKey: key)
#endif
}
}
复制代码
_NativeDictionaryBuffer
internal mutating func nativeUpdateValue(
_ value: Value, forKey key: Key
) -> Value? {
var (i, found) = asNative._find(key, startBucket: asNative._bucket(key))
let minBuckets = found
? asNative.bucketCount
: NativeBuffer.bucketCount(
forCapacity: asNative.count + 1,
maxLoadFactorInverse: _hashContainerDefaultMaxLoadFactorInverse)
let (_, capacityChanged) = ensureUniqueNativeBuffer(
withBucketCount: minBuckets)
if capacityChanged {
i = asNative._find(key, startBucket: asNative._bucket(key)).pos
}
let oldValue: Value? = found ? asNative.value(at: i.offset) : nil
if found {
asNative.setKey(key, value: value, at: i.offset)
} else {
asNative.initializeKey(key, value: value, at: i.offset)
asNative.count += 1
}
return oldValue
}
复制代码
- 如果当前
key
存在,调用setKey
,通过偏移量改变value
。 - 如果不存在
- 校验添加新的
key
之后是否超出原有的容量。- 如果有,则追加空间申请,重新计算当前的
i
。
- 如果有,则追加空间申请,重新计算当前的
- 校验添加新的
- 初始化偏移量为
i.offset
的内存块,并初始化key
和value
。
_CocoaDictionaryBuffer
_CocoaDictionaryBuffer
转换成_NativeDictionaryBuffer
,之后操作和_NativeDictionaryBuffer
相同。
总结
- 从操作内存的函数来看,
Swift
在实现key
和value
的一一对应,采用的是内存偏移量的方式。 - 字典中,哈希表解决冲突,采用了线性探测法。
- 字典的完整结构为