GoLang Map 实现分析

文章目录

概述

本文主题是通过dlv调试工具单步调试GoLang源码map数据结构的实现原理,加深对map的理解和运用。 Golang中map是一种kv存储结构,底层基于hash的实现;

  • 工具版本
Delve Debugger
Version: 1.8.2
Build: $Id: dbb493ec14d1e7753504d016b1e1ef1665b75b16 $

go version go1.17.10 darwin/amd64
  • 调试代码
package main

import "fmt"

func main() {
	m := write()
	fmt.Println(m["a"])
	delete(m, "a")
}

func write() map[string]int {
	m := make(map[string]int)
	m["a"] = 1
	m["b"] = 2
	m["c"] = 2
	return m
}

调试

  1. 进入调试并设置断点
$ dlv debug main.go 
(dlv) b main.main
Breakpoint 1 set at 0x10ac8aa for main.main() ./main.go:5
  1. 程序运行并阻塞在断点
(dlv) c
> main.main() ./main.go:5 (hits goroutine(1):1 total:1) (PC: 0x10ac8aa)
    1:	package main
    2:	
    3:	import "fmt"
    4:	
=>   5:	func main() {
    6:		m := write()
    7:		fmt.Println(m["a"])
    8:		delete(m, "a")
    9:	}
   10:	
  1. 进入write()函数并查看write()汇编指令
(dlv) s
> main.write() ./main.go:11 (PC: 0x10ac9aa)
    6:		m := write()
    7:		fmt.Println(m["a"])
    8:		delete(m, "a")
    9:	}
   10:	
=>  11:	func write() map[string]int {
   12:		m := make(map[string]int)
   13:		m["a"] = 1
   14:		m["b"] = 2
   15:		m["c"] = 2
   16:		return m
(dlv) disass
   TEXT main.write(SB) /Users/sun/work/workhelp/map/main.go
   	main.go:11	0x10ac9a0	493b6610		cmp rsp, qword ptr [r14+0x10]
   	main.go:11	0x10ac9a4	0f86b4000000		jbe 0x10aca5e
   	main.go:11	0x10ac9aa	4883ec50		sub rsp, 0x50
   	main.go:11	0x10ac9ae	48896c2448		mov qword ptr [rsp+0x48], rbp
   	main.go:11	0x10ac9b3	488d6c2448		lea rbp, ptr [rsp+0x48]
   	main.go:11	0x10ac9b8	48c744242000000000	mov qword ptr [rsp+0x20], 0x0
=>	main.go:12	0x10ac9c1	e81a07f6ff		call $runtime.makemap_small
   	main.go:12	0x10ac9c6	4889442428		mov qword ptr [rsp+0x28], rax
   	main.go:13	0x10ac9cb	4889c3			mov rbx, rax
   	main.go:13	0x10ac9ce	488d0d237d0100		lea rcx, ptr [rip+0x17d23]
   	main.go:13	0x10ac9d5	bf01000000		mov edi, 0x1
   	main.go:13	0x10ac9da	488d057fa60000		lea rax, ptr [rip+0xa67f]
   	main.go:13	0x10ac9e1	e8fa4ff6ff		call $runtime.mapassign_faststr
   	main.go:13	0x10ac9e6	4889442440		mov qword ptr [rsp+0x40], rax
   	main.go:13	0x10ac9eb	8400			test byte ptr [rax], al
   	main.go:13	0x10ac9ed	48c70001000000		mov qword ptr [rax], 0x1
   	main.go:14	0x10ac9f4	488b5c2428		mov rbx, qword ptr [rsp+0x28]
   	main.go:14	0x10ac9f9	488d0560a60000		lea rax, ptr [rip+0xa660]
   	main.go:14	0x10aca00	488d0df27c0100		lea rcx, ptr [rip+0x17cf2]
   	main.go:14	0x10aca07	bf01000000		mov edi, 0x1
   	main.go:14	0x10aca0c	e8cf4ff6ff		call $runtime.mapassign_faststr
   	main.go:14	0x10aca11	4889442438		mov qword ptr [rsp+0x38], rax
   	main.go:14	0x10aca16	8400			test byte ptr [rax], al
   	main.go:14	0x10aca18	48c70002000000		mov qword ptr [rax], 0x2
   	main.go:15	0x10aca1f	488b5c2428		mov rbx, qword ptr [rsp+0x28]
   	main.go:15	0x10aca24	488d0535a60000		lea rax, ptr [rip+0xa635]
   	main.go:15	0x10aca2b	488d0dc87c0100		lea rcx, ptr [rip+0x17cc8]
   	main.go:15	0x10aca32	bf01000000		mov edi, 0x1
   	main.go:15	0x10aca37	e8a44ff6ff		call $runtime.mapassign_faststr
   	main.go:15	0x10aca3c	4889442430		mov qword ptr [rsp+0x30], rax
   	main.go:15	0x10aca41	8400			test byte ptr [rax], al
   	main.go:15	0x10aca43	48c70002000000		mov qword ptr [rax], 0x2
   	main.go:16	0x10aca4a	488b442428		mov rax, qword ptr [rsp+0x28]
   	main.go:16	0x10aca4f	4889442420		mov qword ptr [rsp+0x20], rax
   	main.go:16	0x10aca54	488b6c2448		mov rbp, qword ptr [rsp+0x48]
   	main.go:16	0x10aca59	4883c450		add rsp, 0x50
   	main.go:16	0x10aca5d	c3			ret
   	main.go:11	0x10aca5e	6690			data16 nop
   	main.go:11	0x10aca60	e8bb1cfbff		call $runtime.morestack_noctxt
   	.:0		0x10aca65	e936ffffff		jmp $main.write
(dlv) si
> runtime.makemap_small() /usr/local/Cellar/go/1.17/src/runtime/map.go:292 (PC: 0x100d0e0)
Warning: debugging optimized function
   287:	}
   288:	
   289:	// makemap_small implements Go map creation for make(map[k]v) and
   290:	// make(map[k]v, hint) when hint is known to be at most bucketCnt
   291:	// at compile time and the map needs to be allocated on the heap.
=> 292:	func makemap_small() *hmap {
   293:		h := new(hmap)
   294:		h.hash0 = fastrand()
   295:		return h
   296:	}

跟踪到这里我们可以发现,map 经编译后底层结构是hmap; makemap_small和makemap函数都是创建hmap 的入口,区别是初始化的属性数量差异,makemap_small 只设置了 hash0属性,makemap 初始化大部分属性。 hmap 的结构定义如下:

(dlv) p h
*runtime.hmap {
	count: 0,                                                          // 元素数量
	flags: 0,                                                     
	B: 0,                                                                // 桶的个数 2^B
	noverflow: 0,                                                   // 溢出桶个数   hash 冲突产品
	hash0: 3724950317,                                      // hash seed       
	buckets: unsafe.Pointer(0x0),                        // buckets 数组指针
	oldbuckets: unsafe.Pointer(0x0),                   // 扩容时赋值的buckets数组
	nevacuate: 0,                                                 // 搬迁进度
	extra: *runtime.mapextra nil,                          // 扩容的指针
}
(dlv) si
=> 202:	func mapassign_faststr(t *maptype, h *hmap, s string) unsafe.Pointer {
   203:		if h == nil {
   204:			panic(plainError("assignment to entry in nil map"))
   205:		}

mapassign_faststr 首先检验 h 是否是nil, 由上文可知,h = makemap_small, h != nil

=> 210:		if h.flags&hashWriting != 0 {
   211:				throw("concurrent map writes")
   212:			}

map 检查 h.flags 是否已经存在goroutine写入操作,如果存在就panic; 由此可见 第一点 map 是并发不安全的, 第二点 map 是有状态的,由h.flags 管理。

   213:		key := stringStructOf(&s)
   214:		hash := t.hasher(noescape(unsafe.Pointer(&s)), uintptr(h.hash0))
   215:	
   216:		// Set hashWriting after calling t.hasher for consistency with mapassign.
=> 217:		h.flags ^= hashWriting
   218:	
   219:		if h.buckets == nil {
   220:			h.buckets = newobject(t.bucket) // newarray(t.bucket, 1)
   221:		}
   222:	
(dlv) p key 
*runtime.stringStruct {str: unsafe.Pointer(0x10c46f8), len: 1}
(dlv) p hash
11401214700741223085
(dlv) 

通过这段逻辑 可知 hmap 对 key 进行处理 并生成 hash 值; 同时处理 h.buckets; 对此 我们可能疑惑 hmap 是如何完成 kv 存储,hash 冲突处理,存储扩容,垃圾回收等功能的; 我们先往下看,稍后作出解答


   224:		bucket := hash & bucketMask(h.B)
   225:		if h.growing() {
   226:			growWork_faststr(t, h, bucket)
   227:		}
   228:		b := (*bmap)(add(h.buckets, bucket*uintptr(t.bucketsize)))
=> 229:    top := tophash(hash)

这里 可以看到一个新的数据结构 bmap; bmap 存储的具体结构;结合源代码仔细理解一下, 请添加图片描述

func mapassign_faststr(t *maptype, h *hmap, s string) unsafe.Pointer {
    // h map 指针,s key
	if h == nil {
		panic(plainError("assignment to entry in nil map"))
	}
	// 竞态检查
	if raceenabled {
		callerpc := getcallerpc()
		racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapassign_faststr))
	}
	//检查是否是并发写入
	if h.flags&hashWriting != 0 {
		throw("concurrent map writes")
	}
	//runtime.stringStruct 包装一下s, 应该对齐key; *runtime.stringStruct {str: unsafe.Pointer(0x10c46f9), len: 1}
	key := stringStructOf(&s)
	//根据hash seead 和key 生成hash值;  15254809650390487842
	hash := t.hasher(noescape(unsafe.Pointer(&s)), uintptr(h.hash0))

	// Set hashWriting after calling t.hasher for consistency with mapassign.
	//设置状态值,表示写入
	h.flags ^= hashWriting

	//没有存储数据空间则申请, makemap_small 创建出来的hmap中 buckets 为nil
	if h.buckets == nil {
		h.buckets = newobject(t.bucket) // newarray(t.bucket, 1)
	}

again:
	bucket := hash & bucketMask(h.B)
	if h.growing() {
		growWork_faststr(t, h, bucket)
	}
	b := (*bmap)(add(h.buckets, bucket*uintptr(t.bucketsize)))
	// 211 = 15254809650390487842 的高8位
	top := tophash(hash)

	var insertb *bmap
	var inserti uintptr
	var insertk unsafe.Pointer

bucketloop:
	for {
	    // bucketCnt 常量8; 判断bmap中top等于211的值
		for i := uintptr(0); i < bucketCnt; i++ {
			if b.tophash[i] != top {
				if isEmpty(b.tophash[i]) && insertb == nil {
					insertb = b
					inserti = i
				}
				if b.tophash[i] == emptyRest {
					break bucketloop
				}
				continue
			}
			// 计算k 的地址
			k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+i*2*sys.PtrSize))
			if k.len != key.len {
				continue
			}
			// 判断key 是否存在且一致
			if k.str != key.str && !memequal(k.str, key.str, uintptr(key.len)) {
				continue
			}
			// already have a mapping for key. Update it.
			inserti = i
			insertb = b
			// Overwrite existing key, so it can be garbage collected.
			// The size is already guaranteed to be set correctly.
			k.str = key.str
			goto done
		}
		//如果不在当前bmap中,那么从overflow中继续查找
		ovf := b.overflow(t)
		if ovf == nil {
			break
		}
		b = ovf
	}

	// Did not find mapping for key. Allocate new cell & add entry.

	// If we hit the max load factor or we have too many overflow buckets,
	// and we're not already in the middle of growing, start growing.
	// 扩容的2个条件,满足任何一个都将进行扩容操作; 负载因子 和 overflow 桶数量
	if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) {
		hashGrow(t, h)
		goto again // Growing the table invalidates everything, so try again
	}

	// 没有容纳空间,申请overflow 桶
	if insertb == nil {
		// The current bucket and all the overflow buckets connected to it are full, allocate a new one.
		insertb = h.newoverflow(t, b)
		inserti = 0 // not necessary, but avoids needlessly spilling inserti
	}
	insertb.tophash[inserti&(bucketCnt-1)] = top // mask inserti to avoid bounds checks

	//插入key
	insertk = add(unsafe.Pointer(insertb), dataOffset+inserti*2*sys.PtrSize)
	// store new key at insert position
	*((*stringStruct)(insertk)) = *key
	h.count++

done:
	// 插入value
	elem := add(unsafe.Pointer(insertb), dataOffset+bucketCnt*2*sys.PtrSize+inserti*uintptr(t.elemsize))
	if h.flags&hashWriting == 0 {
		throw("concurrent map writes")
	}
	// 去掉写标记
	h.flags &^= hashWriting
	return elem
}

总结一下map 的赋值操作, map 根据key的类型选择不同处理函数,编译阶段确定的; 第二步 对key进行hash 操作,将hash值的高8位 与bmap中tophash 比对,查询是否存在且类型和值是一致的; 第三步 就是查询可设置内容的位置,如果当前bmap 没有可用的,那么去overflow中查询;
在这里插入图片描述

     5:	func main() {
     6:		m := write()
     7:		fmt.Println(m["a"])
=>   8:		delete(m, "a")
     9:	}

赋值了解之后,看下删除是如何设置的; 在delete(m, “a”) si 命令进入底层函数 mapdelete_faststr

func mapdelete_faststr(t *maptype, h *hmap, ky string) {
	//竞态检查
	if raceenabled && h != nil {
		callerpc := getcallerpc()
		racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapdelete_faststr))
	}
	// h 等于nil 或者 没有元素时就不需要 再删除了
	if h == nil || h.count == 0 {
		return
	}
	// 删除时不允许 并发写
	if h.flags&hashWriting != 0 {
		throw("concurrent map writes")
	}
	// key hash 
	key := stringStructOf(&ky)
	hash := t.hasher(noescape(unsafe.Pointer(&ky)), uintptr(h.hash0))

	// Set hashWriting after calling t.hasher for consistency with mapdelete
	h.flags ^= hashWriting

	bucket := hash & bucketMask(h.B)
	if h.growing() {
		growWork_faststr(t, h, bucket)
	}
	b := (*bmap)(add(h.buckets, bucket*uintptr(t.bucketsize)))
	bOrig := b
	// 高8位
	top := tophash(hash)
search:
	// 从当前bmap 寻找,否则从overflow 寻找
	for ; b != nil; b = b.overflow(t) {
		for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) {
			k := (*stringStruct)(kptr)
			if k.len != key.len || b.tophash[i] != top {
				continue
			}
			if k.str != key.str && !memequal(k.str, key.str, uintptr(key.len)) {
				continue
			}
			// Clear key's pointer.
			k.str = nil
			e := add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.elemsize))
			if t.elem.ptrdata != 0 {
				memclrHasPointers(e, t.elem.size)
			} else {
				memclrNoHeapPointers(e, t.elem.size)
			}
			// 标记空
			b.tophash[i] = emptyOne
			// If the bucket now ends in a bunch of emptyOne states,
			// change those to emptyRest states.
			if i == bucketCnt-1 {
				if b.overflow(t) != nil && b.overflow(t).tophash[0] != emptyRest {
					goto notLast
				}
			} else {
				if b.tophash[i+1] != emptyRest {
					goto notLast
				}
			}
			for {
				b.tophash[i] = emptyRest
				if i == 0 {
					if b == bOrig {
						break // beginning of initial bucket, we're done.
					}
					// Find previous bucket, continue at its last entry.
					c := b
					for b = bOrig; b.overflow(t) != c; b = b.overflow(t) {
					}
					i = bucketCnt - 1
				} else {
					i--
				}
				if b.tophash[i] != emptyOne {
					break
				}
			}
		notLast:
			h.count--
			// Reset the hash seed to make it more difficult for attackers to
			// repeatedly trigger hash collisions. See issue 25237.
			if h.count == 0 {
				h.hash0 = fastrand()
			}
			break search
		}
	}

	if h.flags&hashWriting == 0 {
		throw("concurrent map writes")
	}
	h.flags &^= hashWriting
}

请添加图片描述
总结一下, map 删除也是并发不安全的; 删除时 进行相同的查询逻辑,当找到key时 会置空标志;

参考

[1] https://yangxikun.com/golang/2019/10/07/golang-map.html
[2] https://github.com/cch123/golang-notes/blob/master/map.md

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

cugriver

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值