.NET Framework 4.8 Dictionary<TKey, TValue> HashTable算法:
1、将key(任意二进制数据)通过hash算法得到一个hashCode
2、创建一个bucket数组,,hashCode对bucket数组长度取余的结果作为下标,hashCode等数据保存到此下标的buket中
这里要考虑:不同数据可能得到相同hashCode,不同hashCode取余结果可能相同,那么就会保存在同一个bucket中;
对于这种情况,采用链表存储这个bucket中的多个hashCode,这也就是拉链法。
要点:
- 不以hashcode作为key直接创建一个数组,是因为hashcode采用很大的数值存储(4字节),创建一个略大于数据总量相同的bucket数组,保证够用又不浪费空间。
- 取值过程:key通过hash算法得到一个hashCode,hashCode对bucket数组长度取余,找到对应下标的bucket,比较key,相同则返回value,不同则根据next判断链表中的下一个值,获取value的过程没有遍历,所以效率高。
- bucket数组长度为大于数据量且为质数,bucket长度为质数是因为,质数的因子最少,key取余,就将除了与bucket长度整除的以外的其他数分散分布。 参考:https://www.lagou.com/lgeduarticle/6335.html
- Resize会重新分配数据到buket中,所以预先设置容量,可以减少扩容产生的性能消耗
.NET Framework 4.8 Dictionary<TKey, TValue>源码分析
private void Initialize(int capacity) {
//大于capacity的最小质数作为buckets长度
int size = HashHelpers.GetPrime(capacity);
buckets = new int[size];
for (int i = 0; i < buckets.Length; i++) buckets[i] = -1;
entries = new Entry[size];
freeList = -1;
}
private void Insert(TKey key, TValue value, bool add) {
if( key == null ) {
ThrowHelper.ThrowArgumentNullException(ExceptionArgument.key);
}
if (buckets == null) Initialize(0);
//targetBucket作为buckets的下标必须为非负数,通过位操作使得hashcode必为非负数
int hashCode = comparer.GetHashCode(key) & 0x7FFFFFFF;
int targetBucket = hashCode % buckets.Length;
#if FEATURE_RANDOMIZED_STRING_HASHING
int collisionCount = 0;
#endif
for (int i = buckets[targetBucket]; i >= 0; i = entries[i].next) {
//相同key,若允许则替换value,否则抛出错误
if (entries[i].hashCode == hashCode && comparer.Equals(entries[i].key, key)) {
if (add) {
ThrowHelper.ThrowArgumentException(ExceptionResource.Argument_AddingDuplicate);
}
entries[i].value = value;
version++;
return;
}
#if FEATURE_RANDOMIZED_STRING_HASHING
collisionCount++;
#endif
}
int index;
if (freeCount > 0) {
index = freeList;
freeList = entries[index].next;
freeCount--;
}
else {
if (count == entries.Length)
{
Resize();
targetBucket = hashCode % buckets.Length;
}
index = count;
count++;
}
//第一个数据添加到buckets
//entries[index].next = buckets[targetBucket] = -1
//buckets[targetBucket] = index = 第一个数据在entries中的下标
//如果再加一个数据
//entries[index].next = buckets[targetBucket] = 第一个数据在entries中的下标
//buckets[targetBucket] = index = 第二个数据在entries中的下标
//从而通过next,将同一个bucket的数据串联起来。
entries[index].hashCode = hashCode;
entries[index].next = buckets[targetBucket];
entries[index].key = key;
entries[index].value = value;
buckets[targetBucket] = index;
version++;
#if FEATURE_RANDOMIZED_STRING_HASHING
#if FEATURE_CORECLR
// In case we hit the collision threshold we'll need to switch to the comparer which is using randomized string hashing
// in this case will be EqualityComparer<string>.Default.
// Note, randomized string hashing is turned on by default on coreclr so EqualityComparer<string>.Default will
// be using randomized string hashing
//达到冲突上限,通过扩容,重新分配数据到bucket来解决
if (collisionCount > HashHelpers.HashCollisionThreshold && comparer == NonRandomizedStringEqualityComparer.Default)
{
comparer = (IEqualityComparer<TKey>) EqualityComparer<string>.Default;
Resize(entries.Length, true);
}
#else
if(collisionCount > HashHelpers.HashCollisionThreshold && HashHelpers.IsWellKnownEqualityComparer(comparer))
{
comparer = (IEqualityComparer<TKey>) HashHelpers.GetRandomizedEqualityComparer(comparer);
Resize(entries.Length, true);
}
#endif // FEATURE_CORECLR
#endif
}
public bool Remove(TKey key) {
if(key == null) {
ThrowHelper.ThrowArgumentNullException(ExceptionArgument.key);
}
if (buckets != null) {
int hashCode = comparer.GetHashCode(key) & 0x7FFFFFFF;
int bucket = hashCode % buckets.Length;
int last = -1;
for (int i = buckets[bucket]; i >= 0; last = i, i = entries[i].next) {
if (entries[i].hashCode == hashCode && comparer.Equals(entries[i].key, key)) {
if (last < 0) {
//移除的是buckets中的第一个数据,buckets[bucket]链接到第二个数据
buckets[bucket] = entries[i].next;
}
else {
//移除的是buckets中的非第一个数据,重新链接entries[last].next
entries[last].next = entries[i].next;
}
entries[i].hashCode = -1;
entries[i].next = freeList;
entries[i].key = default(TKey);
entries[i].value = default(TValue);
//Remove操作,记录entries移除后空置的位置,通过freeCount、freeList等待下次Insert使用
freeList = i;
freeCount++;
version++;
return true;
}
}
}
return false;
}