在随机选取map中元素时,本想用map遍历的方式来返回,但是却并没有通过测试。
那么难道map的遍历并不是那么的随机吗?
以下代码参考go1.18
hiter是map遍历的结构,主要记录了当前遍历的元素、开始位置等来完成整个遍历过程
// A hash iteration structure.
// If you modify hiter, also change cmd/compile/internal/reflectdata/reflect.go
// and reflect/value.go to match the layout of this structure.
type hiter struct {
// 指向下一个遍历key的地址
key unsafe.Pointer // Must be in first position. Write nil to indicate iteration end (see cmd/compile/internal/walk/range.go).
// 指向下一个遍历value的地址
elem unsafe.Pointer // Must be in second position (see cmd/compile/internal/walk/range.go).
// map类型
t *maptype
// map header
h *hmap
// 初始化时指向的bucket
buckets unsafe.Pointer // bucket ptr at hash_iter initialization time
// 当前遍历到的bmap
bptr *bmap // current bucket
overflow *[]*bmap // keeps overflow buckets of hmap.buckets alive
oldoverflow *[]*bmap // keeps overflow buckets of hmap.oldbuckets alive
// 开始桶
startBucket uintptr // bucket iteration started at
// 桶内偏移量
offset uint8 // intra-bucket offset to start from during iteration (should be big enough to hold bucketCnt-1)
// 是否从头遍历了
wrapped bool // already wrapped around from end of bucket array to beginning
B uint8
// 正在遍历的槽位
i uint8
// 正在遍历的桶位
bucket uintptr
// 用于扩容时进行检查
checkBucket uintptr
}
mapiterinit为开始遍历的方法,主要是确定初始遍历的位置
// mapiterinit initializes the hiter struct used for ranging over maps.
// The hiter struct pointed to by 'it' is allocated on the stack
// by the compilers order pass or on the heap by reflect_mapiterinit.
// Both need to have zeroed hiter since the struct contains pointers.
func mapiterinit(t *maptype, h *hmap, it *hiter) {
// 若map为空,则跳过遍历过程
it.t = t
if h == nil || h.count == 0 {
return
}
if unsafe.Sizeof(hiter{})/goarch.PtrSize != 12 {
throw("hash_iter size incorrect") // see cmd/compile/internal/reflectdata/reflect.go
}
it.h = h
// grab snapshot of bucket state
// 迭代器快照记录map桶信息
it.B = h.B
it.buckets = h.buckets
if t.bucket.ptrdata == 0 {
// Allocate the current slice and remember pointers to both current and old.
// This preserves all relevant overflow buckets alive even if
// the table grows and/or overflow buckets are added to the table
// while we are iterating.
h.createOverflow()
it.overflow = h.extra.overflow
it.oldoverflow = h.extra.oldoverflow
}
// decide where to start
// 开始bucket选择随机数的低B位
// 偏移量选择随机数高B位与桶数量,显然这个桶数量是不包括溢出桶的
r := uintptr(fastrand())
if h.B > 31-bucketCntBits {
r += uintptr(fastrand()) << 31
}
it.startBucket = r & bucketMask(h.B)
it.offset = uint8(r >> h.B & (bucketCnt - 1))
// iterator state
// 更新迭代器桶为初始桶
it.bucket = it.startBucket
// Remember we have an iterator.
// Can run concurrently with another mapiterinit().
// 标记可能有迭代正在使用桶和旧桶
if old := h.flags; old&(iterator|oldIterator) != iterator|oldIterator {
atomic.Or8(&h.flags, iterator|oldIterator)
}
mapiternext(it)
}
从上面的代码分析我们便可以看出随机选取的元素并不是真的随机,溢出桶并不包含在随机选择的范围里面
在具体的遍历过程,存在以下疑问
- 如果在扩容中,如何进行遍历?
- 如何保证不遗漏?
- 如何防止重复遍历?
func mapiternext(it *hiter) {
h := it.h
// 如果标记已经写入,则抛出并发迭代写入错误
if h.flags&hashWriting != 0 {
throw("concurrent map iteration and map write")
}
t := it.t
bucket := it.bucket
b := it.bptr
i := it.i
checkBucket := it.checkBucket
next:
if b == nil {
// 如果再次遇到开始bucket且是从头遍历的,则说明迭代结束,返回
if bucket == it.startBucket && it.wrapped {
// end of iteration
it.key = nil
it.elem = nil
return
}
// 如果正在迁移过程中,且老桶没被迁移,采用老桶
if h.growing() && it.B == h.B {
// Iterator was started in the middle of a grow, and the grow isn't done yet.
// If the bucket we're looking at hasn't been filled in yet (i.e. the old
// bucket hasn't been evacuated) then we need to iterate through the old
// bucket and only return the ones that will be migrated to this bucket.
oldbucket := bucket & it.h.oldbucketmask()
b = (*bmap)(add(h.oldbuckets, oldbucket*uintptr(t.bucketsize)))
// bucket未迁移,记录bucket
// checkBucket在当前map处于迁移而bucket未迁移时,为当前bucket
// 否则为noCheck
if !evacuated(b) {
checkBucket = bucket
} else {
b = (*bmap)(add(it.buckets, bucket*uintptr(t.bucketsize)))
checkBucket = noCheck
}
} else {
// map处于未迁移,或者bucket迁移完成,采用新桶
b = (*bmap)(add(it.buckets, bucket*uintptr(t.bucketsize)))
checkBucket = noCheck
}
// 推进到下一桶
bucket++
// 遍历到最后一个桶,要绕回0桶继续遍历
if bucket == bucketShift(it.B) {
bucket = 0
it.wrapped = true
}
i = 0
}
// 遍历桶内元素
for ; i < bucketCnt; i++ {
// 从offset槽开始
offi := (i + it.offset) & (bucketCnt - 1)
// 跳过空槽
if isEmpty(b.tophash[offi]) || b.tophash[offi] == evacuatedEmpty {
// TODO: emptyRest is hard to use here, as we start iterating
// in the middle of a bucket. It's feasible, just tricky.
continue
}
// 获取元素key、value
k := add(unsafe.Pointer(b), dataOffset+uintptr(offi)*uintptr(t.keysize))
if t.indirectkey() {
k = *((*unsafe.Pointer)(k))
}
e := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+uintptr(offi)*uintptr(t.elemsize))
// 扩容迁移时过滤掉不属于当前指向新桶的旧桶元素
if checkBucket != noCheck && !h.sameSizeGrow() {
// Special case: iterator was started during a grow to a larger size
// and the grow is not done yet. We're working on a bucket whose
// oldbucket has not been evacuated yet. Or at least, it wasn't
// evacuated when we started the bucket. So we're iterating
// through the oldbucket, skipping any keys that will go
// to the other new bucket (each oldbucket expands to two
// buckets during a grow).
// 若key是有效的
if t.reflexivekey() || t.key.equal(k, k) {
// If the item in the oldbucket is not destined for
// the current new bucket in the iteration, skip it.
// 如果旧桶中的项在迭代中不打算用于当前的新桶,则跳过它。
hash := t.hasher(k, uintptr(h.hash0))
if hash&bucketMask(it.B) != checkBucket {
continue
}
} else {
// 对k!=k,也就是nil之类的,判断是否属于该新桶
// 不是,则跳过
// Hash isn't repeatable if k != k (NaNs). We need a
// repeatable and randomish choice of which direction
// to send NaNs during evacuation. We'll use the low
// bit of tophash to decide which way NaNs go.
// NOTE: this case is why we need two evacuate tophash
// values, evacuatedX and evacuatedY, that differ in
// their low bit.
if checkBucket>>(it.B-1) != uintptr(b.tophash[offi]&1) {
continue
}
}
}
// 如果当前桶未扩容迁移,或者是每次hash不一致的key,获取到key、value添加到迭代器中
if (b.tophash[offi] != evacuatedX && b.tophash[offi] != evacuatedY) ||
!(t.reflexivekey() || t.key.equal(k, k)) {
// This is the golden data, we can return it.
// OR
// key!=key, so the entry can't be deleted or updated, so we can just return it.
// That's lucky for us because when key!=key we can't look it up successfully.
it.key = k
if t.indirectelem() {
e = *((*unsafe.Pointer)(e))
}
it.elem = e
} else {
// 数据已经迁移情况下,处理键已被删除、更新或删除并重新插入的情况,定位数据,最后添加遍历key、value
// The hash table has grown since the iterator was started.
// The golden data for this key is now somewhere else.
// Check the current hash table for the data.
// This code handles the case where the key
// has been deleted, updated, or deleted and reinserted.
// NOTE: we need to regrab the key as it has potentially been
// updated to an equal() but not identical key (e.g. +0.0 vs -0.0).
rk, re := mapaccessK(t, h, k)
if rk == nil {
continue // key has been deleted
}
it.key = rk
it.elem = re
}
// 迭代器记录进度
it.bucket = bucket
if it.bptr != b { // avoid unnecessary write barrier; see issue 14921
it.bptr = b
}
it.i = i + 1
it.checkBucket = checkBucket
return
}
// 遍历溢出桶
b = b.overflow(t)
i = 0
goto next
}
通过以上代码分析,可以看出:
-
在扩容时遍历,
-
如果当前遍历的桶已经迁移好了,那么取新桶
-
如果仍然处于旧桶,则取旧桶。
但值得注意的是要过滤掉那些不属于该新桶的旧桶元素。因为旧桶在扩容迁移时会分为两块,当前指向的新桶只属于其中之一
-
-
bucket从初始桶逐渐递增,保证正常桶都能遍历到。此外也保证了完整遍历溢出桶,直到溢出桶为空
-
通过记录是否从头遍历的标志和起始bucket,以及在扩容过程中过滤不属于该新桶的元素来保证不会重复遍历
Ref
- https://zhuanlan.zhihu.com/p/597348765
- https://www.cnblogs.com/cnblogs-wangzhipeng/p/13292524.html
- https://qcrao.com/post/dive-into-go-map/