golang map真有那么随机吗？——map遍历研究

动态一时爽，重构火葬场

于 2024-01-25 22:45:00 发布

阅读量1k

点赞数 20

分类专栏： lang 文章标签： golang

本文链接：https://blog.csdn.net/iucool/article/details/135851213

版权

lang 专栏收录该内容

36 篇文章 0 订阅

订阅专栏

在随机选取map中元素时，本想用map遍历的方式来返回，但是却并没有通过测试。

那么难道map的遍历并不是那么的随机吗？

以下代码参考go1.18

hiter是map遍历的结构，主要记录了当前遍历的元素、开始位置等来完成整个遍历过程

// A hash iteration structure.
// If you modify hiter, also change cmd/compile/internal/reflectdata/reflect.go
// and reflect/value.go to match the layout of this structure.
type hiter struct {
  // 指向下一个遍历key的地址
	key         unsafe.Pointer // Must be in first position.  Write nil to indicate iteration end (see cmd/compile/internal/walk/range.go).
  // 指向下一个遍历value的地址
	elem        unsafe.Pointer // Must be in second position (see cmd/compile/internal/walk/range.go).
  // map类型
	t           *maptype
  // map header
	h           *hmap
  // 初始化时指向的bucket
	buckets     unsafe.Pointer // bucket ptr at hash_iter initialization time
  // 当前遍历到的bmap
	bptr        *bmap          // current bucket
	overflow    *[]*bmap       // keeps overflow buckets of hmap.buckets alive
	oldoverflow *[]*bmap       // keeps overflow buckets of hmap.oldbuckets alive
  // 开始桶
	startBucket uintptr        // bucket iteration started at
	// 桶内偏移量
  offset      uint8          // intra-bucket offset to start from during iteration (should be big enough to hold bucketCnt-1)
  // 是否从头遍历了
	wrapped     bool           // already wrapped around from end of bucket array to beginning
	B           uint8
  // 正在遍历的槽位
	i           uint8
  // 正在遍历的桶位
	bucket      uintptr
  // 用于扩容时进行检查
	checkBucket uintptr
}

mapiterinit为开始遍历的方法，主要是确定初始遍历的位置

// mapiterinit initializes the hiter struct used for ranging over maps.
// The hiter struct pointed to by 'it' is allocated on the stack
// by the compilers order pass or on the heap by reflect_mapiterinit.
// Both need to have zeroed hiter since the struct contains pointers.
func mapiterinit(t *maptype, h *hmap, it *hiter) {
	// 若map为空，则跳过遍历过程
  it.t = t
	if h == nil || h.count == 0 {
		return
	}

	if unsafe.Sizeof(hiter{})/goarch.PtrSize != 12 {
		throw("hash_iter size incorrect") // see cmd/compile/internal/reflectdata/reflect.go
	}
	it.h = h

	// grab snapshot of bucket state
  // 迭代器快照记录map桶信息
	it.B = h.B
	it.buckets = h.buckets
	if t.bucket.ptrdata == 0 {
		// Allocate the current slice and remember pointers to both current and old.
		// This preserves all relevant overflow buckets alive even if
		// the table grows and/or overflow buckets are added to the table
		// while we are iterating.
		h.createOverflow()
		it.overflow = h.extra.overflow
		it.oldoverflow = h.extra.oldoverflow
	}

	// decide where to start
  // 开始bucket选择随机数的低B位
  // 偏移量选择随机数高B位与桶数量，显然这个桶数量是不包括溢出桶的
	r := uintptr(fastrand())
	if h.B > 31-bucketCntBits {
		r += uintptr(fastrand()) << 31
	}
	it.startBucket = r & bucketMask(h.B)
	it.offset = uint8(r >> h.B & (bucketCnt - 1))

	// iterator state
  // 更新迭代器桶为初始桶
	it.bucket = it.startBucket

	// Remember we have an iterator.
	// Can run concurrently with another mapiterinit().
  // 标记可能有迭代正在使用桶和旧桶
	if old := h.flags; old&(iterator|oldIterator) != iterator|oldIterator {
		atomic.Or8(&h.flags, iterator|oldIterator)
	}

	mapiternext(it)
}

从上面的代码分析我们便可以看出随机选取的元素并不是真的随机，溢出桶并不包含在随机选择的范围里面

在具体的遍历过程，存在以下疑问

如果在扩容中，如何进行遍历？
如何保证不遗漏？
如何防止重复遍历？

func mapiternext(it *hiter) {
	h := it.h

  // 如果标记已经写入，则抛出并发迭代写入错误
	if h.flags&hashWriting != 0 {
		throw("concurrent map iteration and map write")
	}
	t := it.t
	bucket := it.bucket
	b := it.bptr
	i := it.i
	checkBucket := it.checkBucket

next:
	if b == nil {
    // 如果再次遇到开始bucket且是从头遍历的，则说明迭代结束，返回
		if bucket == it.startBucket && it.wrapped {
			// end of iteration
			it.key = nil
			it.elem = nil
			return
		}
    
    // 如果正在迁移过程中，且老桶没被迁移，采用老桶
		if h.growing() && it.B == h.B {
			// Iterator was started in the middle of a grow, and the grow isn't done yet.
			// If the bucket we're looking at hasn't been filled in yet (i.e. the old
			// bucket hasn't been evacuated) then we need to iterate through the old
			// bucket and only return the ones that will be migrated to this bucket.
			oldbucket := bucket & it.h.oldbucketmask()
			b = (*bmap)(add(h.oldbuckets, oldbucket*uintptr(t.bucketsize)))
      // bucket未迁移，记录bucket
      // checkBucket在当前map处于迁移而bucket未迁移时，为当前bucket
      // 否则为noCheck
			if !evacuated(b) {
				checkBucket = bucket
			} else {
				b = (*bmap)(add(it.buckets, bucket*uintptr(t.bucketsize)))
				checkBucket = noCheck
			}
		} else {
      // map处于未迁移，或者bucket迁移完成，采用新桶
			b = (*bmap)(add(it.buckets, bucket*uintptr(t.bucketsize)))
			checkBucket = noCheck
		}
    
    // 推进到下一桶
		bucket++
    // 遍历到最后一个桶，要绕回0桶继续遍历
		if bucket == bucketShift(it.B) {
			bucket = 0
			it.wrapped = true
		}
		i = 0
	}
  
  // 遍历桶内元素
	for ; i < bucketCnt; i++ {
    // 从offset槽开始
		offi := (i + it.offset) & (bucketCnt - 1)
    // 跳过空槽
		if isEmpty(b.tophash[offi]) || b.tophash[offi] == evacuatedEmpty {
			// TODO: emptyRest is hard to use here, as we start iterating
			// in the middle of a bucket. It's feasible, just tricky.
			continue
		}
    // 获取元素key、value
		k := add(unsafe.Pointer(b), dataOffset+uintptr(offi)*uintptr(t.keysize))
		if t.indirectkey() {
			k = *((*unsafe.Pointer)(k))
		}
		e := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+uintptr(offi)*uintptr(t.elemsize))
    
    // 扩容迁移时过滤掉不属于当前指向新桶的旧桶元素
		if checkBucket != noCheck && !h.sameSizeGrow() {
			// Special case: iterator was started during a grow to a larger size
			// and the grow is not done yet. We're working on a bucket whose
			// oldbucket has not been evacuated yet. Or at least, it wasn't
			// evacuated when we started the bucket. So we're iterating
			// through the oldbucket, skipping any keys that will go
			// to the other new bucket (each oldbucket expands to two
			// buckets during a grow).
      // 若key是有效的
			if t.reflexivekey() || t.key.equal(k, k) {
				// If the item in the oldbucket is not destined for
				// the current new bucket in the iteration, skip it.
        // 如果旧桶中的项在迭代中不打算用于当前的新桶，则跳过它。
				hash := t.hasher(k, uintptr(h.hash0))
				if hash&bucketMask(it.B) != checkBucket {
					continue
				}
			} else {
        // 对k！=k，也就是nil之类的，判断是否属于该新桶
        // 不是，则跳过
				// Hash isn't repeatable if k != k (NaNs).  We need a
				// repeatable and randomish choice of which direction
				// to send NaNs during evacuation. We'll use the low
				// bit of tophash to decide which way NaNs go.
				// NOTE: this case is why we need two evacuate tophash
				// values, evacuatedX and evacuatedY, that differ in
				// their low bit.
				if checkBucket>>(it.B-1) != uintptr(b.tophash[offi]&1) {
					continue
				}
			}
		}
    
    // 如果当前桶未扩容迁移，或者是每次hash不一致的key，获取到key、value添加到迭代器中
		if (b.tophash[offi] != evacuatedX && b.tophash[offi] != evacuatedY) ||
			!(t.reflexivekey() || t.key.equal(k, k)) {
			// This is the golden data, we can return it.
			// OR
			// key!=key, so the entry can't be deleted or updated, so we can just return it.
			// That's lucky for us because when key!=key we can't look it up successfully.
			it.key = k
			if t.indirectelem() {
				e = *((*unsafe.Pointer)(e))
			}
			it.elem = e
		} else {
      // 数据已经迁移情况下，处理键已被删除、更新或删除并重新插入的情况，定位数据，最后添加遍历key、value
			// The hash table has grown since the iterator was started.
			// The golden data for this key is now somewhere else.
			// Check the current hash table for the data.
			// This code handles the case where the key
			// has been deleted, updated, or deleted and reinserted.
			// NOTE: we need to regrab the key as it has potentially been
			// updated to an equal() but not identical key (e.g. +0.0 vs -0.0).
			rk, re := mapaccessK(t, h, k)
			if rk == nil {
				continue // key has been deleted
			}
			it.key = rk
			it.elem = re
		}
    
    // 迭代器记录进度
		it.bucket = bucket
		if it.bptr != b { // avoid unnecessary write barrier; see issue 14921
			it.bptr = b
		}
		it.i = i + 1
		it.checkBucket = checkBucket
		return
	}
  
  // 遍历溢出桶
	b = b.overflow(t)
	i = 0
	goto next
}

通过以上代码分析，可以看出：

在扩容时遍历，
- 如果当前遍历的桶已经迁移好了，那么取新桶
- 如果仍然处于旧桶，则取旧桶。
  
  但值得注意的是要过滤掉那些不属于该新桶的旧桶元素。因为旧桶在扩容迁移时会分为两块，当前指向的新桶只属于其中之一
bucket从初始桶逐渐递增，保证正常桶都能遍历到。此外也保证了完整遍历溢出桶，直到溢出桶为空
通过记录是否从头遍历的标志和起始bucket，以及在扩容过程中过滤不属于该新桶的元素来保证不会重复遍历