GoLang之Map深度讲解

GoGo在努力

已于 2022-05-18 14:09:53 修改

阅读量1.2k

点赞数

分类专栏： GoLang底层文章标签： golang 散列表哈希算法

于 2022-05-18 00:05:08 首次发布

本文链接：https://blog.csdn.net/weixin_52690231/article/details/124832899

版权

GoLang底层专栏收录该内容

216 篇文章 139 订阅

订阅专栏

本文深入探讨了Go语言中Map的实现细节，包括Map查找过程、哈希函数的工作原理、Map插入与修改的逻辑以及扩容策略。文中详细解释了tophash的含义和作用，如何判断bucket是否为空，以及哈希计算中如何避免冲突。此外，还介绍了Map在达到负载因子时如何进行等量或翻倍扩容，并分析了扩容过程中数据迁移的状态标记。通过对Map内部机制的理解，有助于提升Go语言编程的效率和质量。

摘要由CSDN通过智能技术生成

文章目录

GoLang之Map深度讲解

GoLang之Map深度讲解

1.Map查找

//以下常量在runtime/map.go里
const (
	// Maximum number of key/elem pairs a bucket can hold.
	bucketCntBits = 3
	//1<<3=8
	bucketCnt     = 1 << bucketCntBits

	// Maximum average load of a bucket that triggers growth is 6.5.
	// Represent as loadFactorNum/loadFactorDen, to allow integer math.
	loadFactorNum = 13
	loadFactorDen = 2

	// Maximum key or elem size to keep inline (instead of mallocing per element).
	// Must fit in a uint8.
	// Fast versions cannot handle big elems - the cutoff size for
	// fast versions in cmd/compile/internal/gc/walk.go must be at most this elem.
	maxKeySize  = 128
	maxElemSize = 128

	// data offset should be the size of the bmap struct, but needs to be
	// aligned correctly. For amd64p32 this means 64-bit alignment
	// even though pointers are 32 bit.
	//dataOffset 是 key 相对于 bmap 起始地址的偏移
	dataOffset = unsafe.Offsetof(struct {
		b bmap//b 是 bmap 的地址
		v int64
	}{}.v)

	// Possible tophash values. We reserve a few possibilities for special marks.
	// Each bucket (including its overflow buckets, if any) will have either all or none of its
	// entries in the evacuated* states (except during the evacuate() method, which only happens
	// during map writes and thus no one else can observe the map during that time).
	
   //初始化的时，tophash会被置为emptyRest;
   //删除map元素时，会判断是否需要把删除key对应的tophash置为emptyRest;
   //emptyRest值有两层意思：一是表示该tophash对应的K/V位置是可用的；二是表示该位置后面的K/V位置都是可用的。
   //用于判断bucket是否为空,当tophash[0]==emptyRest表示整个bucket都是空的，这就是源码里面判断bucket是否为空的方法
   //用于查找时快速判断后面位置是否还需遍历,如在查找时，在一个bucket中，找到tophash[2]位置，发现值为emptyRest，就可以判断该bucket没有该元素，继续查找下一个bucket
	emptyRest      = 0 //this cell is empty, and there are no more non-empty cells at higher indexes or overflows
	
    //仅表示该tophash对应的K/V位置是可用的，其后面的是否可用不知道;
    //删除map元素时，会把key对应的tophash先置为emptyOne，再继续判断是否需要置为emptyRest
	emptyOne       = 1 // this cell is empty
	evacuatedX     = 2 //key,value 已经搬迁完毕，但是 key 都在新 bucket 前半部分
	evacuatedY     = 3 // 同上，key,value 已经搬迁完毕，但是 key 都在新 bucket 后半部分

    //当bucket被迁移完时，tophash值置为evacuatedEmpty
	evacuatedEmpty = 4 // cell is empty, bucket is evacuated.
	minTopHash     = 5 //  tophash 的最小正常值

	// flags
	// 可能有迭代器使用 buckets
	iterator     = 1 // there may be an iterator using buckets
	
    // 可能有迭代器使用 oldbuckets
	oldIterator  = 2 // there may be an iterator using oldbuckets
	
	//
	hashWriting  = 4 // a goroutine is writing to the map

    // 等量扩容
	sameSizeGrow = 8 // the current map growth is to a new map of the same size

	// sentinel bucket ID for iterator checks
	noCheck = 1<<(8*goarch.PtrSize) - 1
)

evacuatedX && evacuatedY
这两个状态与扩容有关，记录元素被迁移到了新桶的部位X或Y;
如果是等量扩容迁移，旧桶的元素必然被迁移到X部；
如果是翻倍扩容迁移，旧桶元素可能迁移到X部，也可能迁移到Y部。当迁移到X部时，旧桶tophash置为evacuatedX；当迁移到Y部时，旧桶tophash置为evacuatedY。如下图：
举个例子说明：扩容迁移，要把旧桶1的元素迁到新桶，因为新桶长度增长了一倍，因此旧桶1元素可能被迁移到新桶的1或5。当元素迁移到了1时，把旧桶tophash置为evacuatedX；反之，迁移到了5时，tophash置为evacuatedY。要注意置的是旧桶的tophash

在这里插入图片描述

//tophash函数在runtime/map.go里
// tophash函数用于计算哈希的tophash值
func tophash(hash uintptr) uint8 {
	top := uint8(hash >> (goarch.PtrSize*8 - 8))
	//当top值小于 minTopHash 时，即如果key的哈希值高8位小于minTopHash时，这时候怎么区分是tophash存的是哈希值还是标志这个cell的迁移状态？
	//因为状态值是放在 tophash 数组里，为了和正常的哈希值区分开，会给 key 计算出来的哈希值一个增量：minTopHash
	//会直接在原有哈希值基础上加上minTopHash，确保哈希值一定大于minTopHash,这样就能区分正常的 top hash 值和表示状态的哈希值
	if top < minTopHash {
		top += minTopHash
	}
	return top
}

//evacuated函数在runtime/map.go里
//判断这个 bucket 是否已经搬迁完毕，用到的函数：
func evacuated(b *bmap) bool {
	h := b.tophash[0]
	//只取了 tophash 数组的第一个值，判断它是否在 2-4 之间;对比上面的常量，当tophash[0]是 evacuatedX(2)、evacuatedY(3)、evacuatedEmpty(4) 这三个值之一，说明此 bucket 中的 key 全部被搬迁到了新 bucket。
	return h > emptyOne && h < minTopHash
}

2.哈希函数

hasher哈希函数根据 key 的类型，hasher被设置对应类型的哈希函数，另外还会有一个hash0哈希种子来增加哈希函数的随机性

//maptype结构体位于runtime/type.go
type maptype struct {
	typ    _type
	key    *_type
	elem   *_type
	bucket *_type // internal type representing a hash bucket
	// function for hashing keys (ptr to key, seed) -> hash
	hasher     func(unsafe.Pointer, uintptr) uintptr
	keysize    uint8  // size of key slot
	elemsize   uint8  // size of elem slot
	bucketsize uint16 // size of bucket
	flags      uint32
}

3.Map插入、修改

//mapassign函数在runtime/map.go里
func mapassign(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer {
	// 判断当前map是否为nil，如果为nil，报空指针
	if h == nil {
		panic(plainError("assignment to entry in nil map"))
	}
	if raceenabled {
		callerpc := getcallerpc()
		pc := abi.FuncPCABIInternal(mapassign)
		racewritepc(unsafe.Pointer(h), callerpc, pc)
		raceReadObjectPC(t.key, key, callerpc, pc)
	}
	if msanenabled {
		msanread(key, t.key.size)
	}
	if asanenabled {
		asanread(key, t.key.size)
	}
	//判断当前map是不是在写状态中，如果别的goroutine在修改这个map，会报错。
	if h.flags&hashWriting != 0 {
		throw("concurrent map writes")
	}
	//对当前的key计算hash值
	hash := t.hasher(key, uintptr(h.hash0))

	//修改当前map的写标志
	h.flags ^= hashWriting

	if h.buckets == nil {
		h.buckets = newobject(t.bucket) // newarray(t.bucket, 1)
	}

again:
	bucket := hash & bucketMask(h.B)
	//如果在扩容阶段，将旧桶中这个bucket的数据迁移到新桶。 确保新桶中的bucket对应老bucket已经完成了迁移工作。
	if h.growing() {
		growWork(t, h, bucket)
	}
	//计算出新桶位置，以及hash的高八位
	b := (*bmap)(add(h.buckets, bucket*uintptr(t.bucketsize)))
	top := tophash(hash)

	var inserti *uint8
	var insertk unsafe.Pointer
	var elem unsafe.Pointer
bucketloop:
//双层循环遍历当前bucket及它的溢出桶的所有tophash。 一层遍历所有bmap，一层遍历bmap内的tophash。
// 会依次遍历当前bmap的所有tophash
	for {
		for i := uintptr(0); i < bucketCnt; i++ {
		//如果当前的tophash不等于我们key的hash值高八位。
			if b.tophash[i] != top {
			// 找到第一个空的位置，存储下来。为写入所做准备。
				if isEmpty(b.tophash[i]) && inserti == nil {
					inserti = &b.tophash[i]
					insertk = add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize))
					elem = add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.elemsize))
				}
				///如果当前的位置为emptyRest，直接return，说明已经找了所有的tophash，没有找到与本key相同的值
				if b.tophash[i] == emptyRest {
					break bucketloop
				}
				// 继续下一个tophash
				continue
			}
			//如果当前的tophash等于我们key的hash值高八位,首先找到这个tophash所对应的key
			k := add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize))
			if t.indirectkey() {
				k = *((*unsafe.Pointer)(k))
			}
			// 如果发现这个key和我们要存的key不同，继续比对下一个tophash,也就是两个key的哈希高八位是相同的，但是key却不一样，就将本tophash跳过
			if !t.key.equal(key, k) {
				continue
			}
			// 如果发现可以相同，发现map中已经有这个key了，进行更新操作
			if t.needkeyupdate() {
				typedmemmove(t.key, k, key)
			}
			elem = add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.elemsize))
			goto done
		}
		//进入溢出桶，不断的循环上面的操作，直到所有的都遍历完成，也没有跳出循环时，说明我们是要插入数据，而不是更新。
		ovf := b.overflow(t)
		if ovf == nil {
			break
		}
		b = ovf
	}

	
	//判断当前是否处在扩容时机中，如果满足，进行扩容，并且从1开始重新进行操作（扩容会使一切无效，需要重新来一遍前面的步骤）
	if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) {
		hashGrow(t, h)
		goto again // Growing the table invalidates everything, so try again
	}
//判断inserti是否为nil，如果为nil意味着当前buckets是满的，需要添加一个新的
	if inserti == nil {
		// The current bucket and all the overflow buckets connected to it are full, allocate a new one.
		newb := h.newoverflow(t, b)
		inserti = &newb.tophash[0]
		insertk = add(unsafe.Pointer(newb), dataOffset)
		elem = add(insertk, bucketCnt*uintptr(t.keysize))
	}

	//保存新的key/value，tophash
	if t.indirectkey() {
		kmem := newobject(t.key)
		*(*unsafe.Pointer)(insertk) = kmem
		insertk = kmem
	}
	if t.indirectelem() {
		vmem := newobject(t.elem)
		*(*unsafe.Pointer)(elem) = vmem
	}
	typedmemmove(t.key, insertk, key)
	*inserti = top
	h.count++//map count++

done:
	if h.flags&hashWriting == 0 {
		throw("concurrent map writes")
	}
	//维护map flags
	h.flags &^= hashWriting
	if t.indirectelem() {
		elem = *((*unsafe.Pointer)(elem))
	}
	return elem
}

4.扩容分析

扩容后：
1.如果等量扩容的话B不变; 如果翻倍扩容的话h.B+1,相当于是原来 2 倍的空间新B=旧B+1；
2.noverflow设置为0，扩容后新桶中已使用的溢出桶为0；
3.oldbuckets指向原来的桶（旧桶）；
4.buckets指向新创建的桶（新桶中暂时还没有数据）；
5.nevacuate设置为0，表示如果数据迁移的话，应该从原桶（旧桶）中的第0个位置开始迁移；
6.extra.overflow设置为nil，因为新桶中还未使用溢出桶
7.extra.nextOverflow设置为新创建的桶中的第一个溢出桶的位置；
8.extra.oldoverflow设置为原桶（旧桶）已使用的所有溢出桶，即：h.extra.oldoverflow = h.extra.overflow。

//advanceEvacuationMark函数在runtime/map.go里
func advanceEvacuationMark(h *hmap, t *maptype, newbit uintptr) {
	h.nevacuate++// 进度加 1
	//尝试往后看 1024 个 bucket
	stop := h.nevacuate + 1024
	if stop > newbit {
		stop = newbit
	}
	//寻找没有搬迁的 bucket
	for h.nevacuate != stop && bucketEvacuated(t, h, h.nevacuate) {
		h.nevacuate++
	}
	//现在 h.nevacuate 之前的 bucket 都被搬迁完毕
	
	//所有的 buckets 搬迁完毕
	if h.nevacuate == newbit { 
		// 清除老的 buckets
		h.oldbuckets = nil
		// 清除老的 overflow bucket
        // 回忆一下：[0] 表示当前 overflow bucket
        // [1] 表示 old overflow bucket
		if h.extra != nil {
			h.extra.oldoverflow = nil
		}
		// 清除正在扩容的标志位
		h.flags &^= sameSizeGrow
	}
}

//tooManyOverflowBuckets函数位于runtime.go文件里
func tooManyOverflowBuckets(noverflow uint16, B uint8) bool {
	if B > 15 {
		B = 15
	}
	return noverflow >= uint16(1)<<(B&15)
}

//overLoadFactor函数位于runtime.go文件里
func overLoadFactor(count int, B uint8) bool {
	return count > bucketCnt && uintptr(count) > loadFactorNum*(bucketShift(B)/loadFactorDen)
}