Go语言源码阅怎么看?
引言
本文记录笔者近期学习Go底层源码时所涉及到的部分笔记,在此记录以供自己复习使用。
未完待续。。
栈分配内存的阈值
cmd/compile/internal/ir/cfg.go
改文件定义了非逃逸的情况下栈上分配内存的阈值
// 定义显示声明的变量,在栈上能分配的最大容量(10MB),若大于该值,显示变量将被分配在堆上
// maximum size variable which we will allocate on the stack.
// This limit is for explicit variable declarations like "var x T" or "x := ...".
// Note: the flag smallframes can update this value.
MaxStackVarSize = int64(10 * 1024 * 1024)
// 定义隐式声明的变量,在栈上能够分配的最大容量(64KB),若大于改值,隐式变量将被分配在堆上
// maximum size of implicit variables that we will allocate on the stack.
// p := new(T) allocating T on the stack
// p := &T{} allocating T on the stack
// s := make([]T, n) allocating [n]T on the stack
// s := []byte("...") allocating [n]byte on the stack
// Note: the flag smallframes can update this value.
MaxImplicitStackVarSize = int64(64 * 1024)
//MaxSmallArraySize 是被认为很小的数组的最大大小。小数组将直接使用一系列常量存储进行初始化。大型数组将通过从静态临时复制来初始化。选择 256 字节以最小化生成的代码 + statictmp 大小。
// MaxSmallArraySize is the maximum size of an array which is considered small.
// Small arrays will be initialized directly with a sequence of constant stores.
// Large arrays will be initialized by copying from a static temp.
// 256 bytes was chosen to minimize generated code + statictmp size.
MaxSmallArraySize = int64(256)
通道的底层结构体
通道循环队列结构
runtime/chan.go
Go中通道的底层结构体
type hchan struct {
// 通道队列中的数据个数
qcount uint // total data in the queue
// 通道队列中的数据大小
dataqsiz uint // size of the circular queue
// 存放实际数据的指针
// 写入时:如果读取等待队列中没有正在等待的协程,但是该通道是带缓冲区的,并且当前缓冲区没有满,则向当前缓冲区中写入当前元素
// 读取时:当缓冲区没有满,如果队列中没有正在等待写入的协程,但是该通道是带缓冲区的,并且当前缓冲区中有数据,则读取该缓冲区中的数据,并将数据写入当前的读取协程中
buf unsafe.Pointer // points to an array of dataqsiz elements
// 通道类型大小
elemsize uint16
// 通道是否关闭
closed uint32
// 通道类型
elemtype *_type // element type
// 记录发送者在buf中的序号
sendx uint // send index
// 记录接收者在buf中的序号
recvx uint // receive index
// 读取的堵塞协程队列,每个协程对应一个sudog结构,它是对协程的封装,包含了准备获取的写成中的元素指针等
// 当通道无缓冲区或者当前缓冲区没有数据则代表当前协程的sudog结构需要放入recvq链表末尾,并且当前协程陷入休眠状态,等待被唤醒重新执行
// 写入时:当有读取的协程正在等待时,直接从该协程链表中,获取第一个协程,并将元素直接复制到对应的协程中,在唤醒被堵塞的协程
recvq waitq // list of recv waiters
// 写入的阻塞协程队列,
// 若当前通道无缓冲区或者当前缓冲区已满,则代表当前协程的sudog结构需要放入sendq链表末尾中,并且当前协程陷入休眠状态,等待被唤醒重新执行
// 读取时:当有等待写入的协程时,直接从等待的写入的协程链表中获取第一个协程,并将写入的元素直接复制到当前协程中,再唤醒被堵塞的写入协程,这样当前协程将不需要陷入休眠
sendq waitq // list of send waiters
// lock protects all fields in hchan, as well as several
// fields in sudogs blocked on this channel.
//
// Do not change another G's status while holding this lock
// (in particular, do not ready a G), as this can deadlock
// with stack shrinking.
// 锁,并发保护
lock mutex
}
接口的底层结构体
runtime/runtime2.go
接口的底层结构体
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-aublsyKk-1648737839640)(https://raw.githubusercontent.com/Voryla/myimages/main/deepinJVM/20220331221546.png)]
// 带方法签名的接口在运行时的具体结构由iface构成
type iface struct {
// 记录接口的类型和拥有的func
tab *itab
// 指向接口的值
data unsafe.Pointer
}
itab
记录接口的类型
type itab struct {
inter *interfacetype
_type *_type
// 唯一标识一个接口类型
hash uint32 // copy of _type.hash. Used for type switches.
// 内存对齐
_ [4]byte
// 记录func
fun [1]uintptr // variable sized. fun[0]==0 means _type does not implement inter.
}
interfacetype
是对_type的简单包装
type interfacetype struct {
// 类型标识
typ _type
// 接口所在的包名
pkgpath name
// 表示接口中暴露的方法在最终可执行文件中的名字和类型偏移量,通过此偏移量在运行时能够通过 resolveNameOff 和 resolveTypeOff 函数快速找到方法名和类型
mhdr []imethod
}
切片的底层结构体
reflect/value.go
切片的底层结构体
type SliceHeader struct {
// 指向底层数组的指针
Data uintptr
// 长度
Len int
// 容量
Cap int
}
Map的底层结构体
Map的底层结构体位于runtime/map.go
// A header for a Go map.
type hmap struct {
// 代表桶的 bmap 结构在运行时只列出了首个字段,即一个固定长度 为8的数组。此字段顺序存储key的哈希值的前8位。
// Note: the format of the hmap is also encoded in cmd/compile/internal/reflectdata/reflect.go.
// Make sure this stays in sync with the compiler's definition.
// 代表map中元素的数量
count int // # live cells == size of map. Must be first (used by len() builtin)
// 代表当前map的状态
flags uint8
// 2的B次幂次表示当前map中桶的数量
B uint8 // log_2 of # of buckets (can hold up to loadFactor * 2^B items)
// map中溢出桶的数量。当溢出桶的桶太多时,map会进行等量扩容,收拢溢出桶中零散的key/value 避免溢出桶过大导致内存泄漏
noverflow uint16 // approximate number of overflow buckets; see incrnoverflow for details
hash0 uint32 // hash seed 是哈希的种子,它能为哈希函数的结果引入随机性,这个值在创建哈希表时确定,并在调用哈希函数时作为参数传入
// 指向map对应的桶的指针
buckets unsafe.Pointer // array of 2^B Buckets. may be nil if count==0.
// 在map扩容时存储的旧桶,当所有旧桶中的数据都已经转移到了新桶中时,则清空
oldbuckets unsafe.Pointer // previous bucket array of half the size, non-nil only when growing
// 指向当前第二次要转移的的旧桶位置(扩容时,growWork函数,不仅会转移命中桶,还会转移nevacuate指向的桶),扩容时使用,用于标记当前旧桶中小于 nevacuate 的数据都已经转移到了新桶中
nevacuate uintptr // progress counter for evacuation (buckets less than this have been evacuated)
// 存储map中的溢出桶
extra *mapextra // optional fields
}
bmap
表示map中具体的一个bucket
同样位于runtime/map.go
// 代表桶的 bmap 结构在运行时只列出了首个字段,即一个固定长度 为8的数组。此字段顺序存储key的哈希值的前8位。
// map在 编译时即确定了map中key、value及桶的大小,因此在运行时仅仅通过 指针操作就可以找到特定位置的元素。
// 桶在存储的tophash字段后,会存储key数组及value数组。 择将key与value分开存储而不是以key/value/key/value 的形式存储,是为了在字节对齐时压缩空间
// 在进行hash[key]的map访问操作时,会首先找到桶的位置,找到桶的位置后遍历tophash数组,如果在数组中 找到了相同的hash,那么可以接着通过指针的寻址操作找到对应的key与value
// A bucket for a Go map.
type bmap struct {
// tophash generally contains the top byte of the hash value
// for each key in this bucket. If tophash[0] < minTopHash,
// tophash[0] is a bucket evacuation state instead.
tophash [bucketCnt]uint8
// Followed by bucketCnt keys and then bucketCnt elems.
// NOTE: packing all the keys together and then all the elems together makes the
// code a bit more complicated than alternating key/elem/key/elem/... but it allows
// us to eliminate padding which would be needed for, e.g., map[int64]int8.
// Followed by an overflow pointer.
}
G调度函数
G调度函数位于:runtime/proc.go
func newproc(siz int32, fn *funcval) {
argp := add(unsafe.Pointer(&fn), sys.PtrSize)
// 获取当前G的指针
gp := getg()
// 得到函数调用结束后的返回地址(也就是由call指针入栈的返回地址)
pc := getcallerpc()
systemstack(func() {
// 创建一个协程
// fn 新建协程的入口,argp: fn地址+一个指针位置,也就是传递给当前协程的参数地址,siz:参数大小 占多少字节, gp:父协程(创建当前协程的协程),pc:创建完协程后的返回地址
newg := newproc1(fn, argp, siz, gp, pc)
// 当前P
_p_ := getg().m.p.ptr()
// 将newg放入到当前p的本地队列中
runqput(_p_, newg, true)
// 如果m0已开启且正在调度其他P,则再尝试开启一个P运行G
if mainStarted {
// 启动一个m并将其设置为spinning状态
wakep()
}
})
}
newproc
调用newproc1
完成一个G的创建
// fn 新建协程的入口,argp: fn地址+一个指针位置,也就是传递给当前协程的参数地址,siz:参数大小 占多少字节, gp:父协程(创建当前协程的协程),pc:创建完协程后的返回地址
func newproc1(fn *funcval, argp unsafe.Pointer, narg int32, callergp *g, callerpc uintptr) *g {
if goexperiment.RegabiDefer && narg != 0 {
// TODO: When we commit to GOEXPERIMENT=regabidefer,
// rewrite the comments for newproc and newproc1.
// newproc will no longer have a funny stack layout or
// need to be nosplit.
throw("go with non-empty frame")
}
_g_ := getg()
if fn == nil {
_g_.m.throwing = -1 // do not dump full stacks
throw("go of nil func value")
}
// 禁止当前m被抢占
// 为什么不能被抢占呢?
// 因为接下来要执行的程序中,可能会把当前p保存在g0栈的局部变量中,若此时m被抢占,p关联到其他m,等到再次恢复时,继续使用这个局部变量里保存的p,就会造成数据不一致问题
acquirem() // disable preemption because it can be holding p in a local var
siz := narg
siz = (siz + 7) &^ 7
// We could allocate a larger initial stack if necessary.
// Not worth it: this is almost always an error.
// 4*PtrSize: extra space added below
// PtrSize: caller's LR (arm) or return address (x86, in gostartcall).
if siz >= _StackMin-4*sys.PtrSize-sys.PtrSize {
throw("newproc: function arguments too large for new goroutine")
}
_p_ := _g_.m.p.ptr()
// 尝试获取一个空闲的G
newg := gfget(_p_)
if newg == nil {
// 如果当前P和调度器中都没有空闲的G,就创建一个并添加到全局allgs中
newg = malg(_StackMin)
casgstatus(newg, _Gidle, _Gdead)
allgadd(newg) // publishes with a g->status of Gdead so GC scanner doesn't look at uninitialized stack.
}
if newg.stack.hi == 0 {
throw("newproc1: newg missing stack")
}
if readgstatus(newg) != _Gdead {
throw("newproc1: new g is not Gdead")
}
// 初始化协程栈
totalSize := 4*sys.PtrSize + uintptr(siz) + sys.MinFrameSize // extra space in case of reads slightly beyond frame
totalSize += -totalSize & (sys.StackAlign - 1) // align to StackAlign
sp := newg.stack.hi - totalSize
spArg := sp
if usesLR {
// caller's LR
*(*uintptr)(unsafe.Pointer(sp)) = 0
prepGoExitFrame(sp)
spArg += sys.MinFrameSize
}
// 如果协程入口函数有参数,就把参数移动到协程栈上,对 newg 来说就要将父协程栈中保存的参数拷贝到自己的协程栈中
if narg > 0 {
memmove(unsafe.Pointer(spArg), argp, uintptr(narg))
// This is a stack-to-stack copy. If write barriers
// are enabled and the source stack is grey (the
// destination is always black), then perform a
// barrier copy. We do this *after* the memmove
// because the destination stack may have garbage on
// it.
if writeBarrier.needed && !_g_.m.curg.gcscandone {
f := findfunc(fn.fn)
stkmap := (*stackmap)(funcdata(f, _FUNCDATA_ArgsPointerMaps))
if stkmap.nbit > 0 {
// We're in the prologue, so it's always stack map index 0.
bv := stackmapdata(stkmap, 0)
bulkBarrierBitmap(spArg, spArg, uintptr(bv.n)*sys.PtrSize, 0, bv.bytedata)
}
}
}
memclrNoHeapPointers(unsafe.Pointer(&newg.sched), unsafe.Sizeof(newg.sched))
// g.sched结构体用来保存现场,将g.sched.sp设置为协程栈指针
newg.sched.sp = sp
newg.stktopsp = sp
// 将goexit函数的地址加1压入协程栈,也就是作为新协程栈的返回地址,即完成新协程的工作后,直接跳转到goexit近些那个回收工作
newg.sched.pc = abi.FuncPCABI0(goexit) + sys.PCQuantum // +PCQuantum so that previous instruction is in same function
newg.sched.g = guintptr(unsafe.Pointer(newg))
gostartcallfn(&newg.sched, fn)
// 设置为父协程调用newproc后的返回地址
newg.gopc = callerpc
newg.ancestors = saveAncestors(callergp)
// 将新g的startpc设置为写成入口函数起始地址
newg.startpc = fn.fn
if _g_.m.curg != nil {
newg.labels = _g_.m.curg.labels
}
if isSystemGoroutine(newg, false) {
atomic.Xadd(&sched.ngsys, +1)
}
// Track initial transition?
newg.trackingSeq = uint8(fastrand())
if newg.trackingSeq%gTrackingPeriod == 0 {
newg.tracking = true
}
// 调整协程的状态为_Grunnable,这个状态也就意味着,这个G可以进入到runq中了
casgstatus(newg, _Gdead, _Grunnable)
if _p_.goidcache == _p_.goidcacheend {
// Sched.goidgen is the last allocated id,
// this batch must be [sched.goidgen+1, sched.goidgen+GoidCacheBatch].
// At startup sched.goidgen=0, so main goroutine receives goid=1.
_p_.goidcache = atomic.Xadd64(&sched.goidgen, _GoidCacheBatch)
_p_.goidcache -= _GoidCacheBatch - 1
_p_.goidcacheend = _p_.goidcache + _GoidCacheBatch
}
// 为newg设置唯一id
newg.goid = int64(_p_.goidcache)
_p_.goidcache++
if raceenabled {
newg.racectx = racegostart(callerpc)
}
if trace.enabled {
traceGoCreate(newg, newg.startpc)
}
releasem(_g_.m)
return newg
}
GMP底层结构体
GMP底层结构体位于:runtime/runtime2.go
GMP结构体代码量过多,笔者不再列出,这里列出schedt
结构体,该结构体记录了GMP模型的一些重要的全局变量
// GMP模型的全局变量
type schedt struct {
// accessed atomically. keep at top to ensure alignment on 32-bit systems.
goidgen uint64
lastpoll uint64 // time of last network poll, 0 if currently polling
pollUntil uint64 // time to which current poll is sleeping
lock mutex
// When increasing nmidle, nmidlelocked, nmsys, or nmfreed, be
// sure to call checkdead().
// 所有空闲的m
midle muintptr // idle m's waiting for work
nmidle int32 // number of idle m's waiting for work
nmidlelocked int32 // number of locked m's waiting for work
mnext int64 // number of m's that have been created and next M ID
maxmcount int32 // maximum number of m's allowed (or die)
nmsys int32 // number of system m's not counted for deadlock
nmfreed int64 // cumulative number of freed m's
ngsys uint32 // number of system goroutines; updated atomically
// 所有空闲的p
pidle puintptr // idle p's
npidle uint32
nmspinning uint32 // See "Worker thread parking/unparking" comment in proc.go.
// Global runnable queue. 全局共享队列
runq gQueue
runqsize int32
// disable controls selective disabling of the scheduler.
//
// Use schedEnableUser to control this.
//
// disable is protected by sched.lock.
disable struct {
// user disables scheduling of user goroutines.
user bool
runnable gQueue // pending runnable Gs
n int32 // length of runnable
}
// Global cache of dead G's.
gFree struct {
lock mutex
stack gList // Gs with stacks
noStack gList // Gs without stacks
n int32
}
// Central cache of sudog structs.
sudoglock mutex
sudogcache *sudog
// Central pool of available defer structs of different sizes.
deferlock mutex
deferpool [5]*_defer
// freem is the list of m's waiting to be freed when their
// m.exited is set. Linked through m.freelink.
freem *m
gcwaiting uint32 // gc is waiting to run
stopwait int32
stopnote note
sysmonwait uint32
sysmonnote note
// While true, sysmon not ready for mFixup calls.
// Accessed atomically.
sysmonStarting uint32
// safepointFn should be called on each P at the next GC
// safepoint if p.runSafePointFn is set.
safePointFn func(*p)
safePointWait int32
safePointNote note
profilehz int32 // cpu profiling rate
procresizetime int64 // nanotime() of last change to gomaxprocs
totaltime int64 // ∫gomaxprocs dt up to procresizetime
// sysmonlock protects sysmon's actions on the runtime.
//
// Acquire and hold this mutex to block sysmon from interacting
// with the rest of the runtime.
sysmonlock mutex
_ uint32 // ensure timeToRun has 8-byte alignment
// timeToRun is a distribution of scheduling latencies, defined
// as the sum of time a G spends in the _Grunnable state before
// it transitions to _Grunning.
//
// timeToRun is protected by sched.lock.
timeToRun timeHistogram
}
互斥锁结构体
互斥锁结构体位于:sync/mutex.go
type Mutex struct {
// 状态量
state int32
// 信号量
// 最0位表示:当前锁是否为锁定状态 1锁定 0未锁定 对应 mutexLocked
// 第1位表示:是否有G被唤醒 1是 0否 对应 mutexWoken
// 第2位表示:当前锁是否为饥饿状态 0正常模式 1饥饿模式 对应 mutexStarving
// 3-31表示:正在等待被唤醒的协程数量
sema uint32
}
互斥锁的全局hash表
全局hasha表中每个桶通过平衡二叉树+双向链表,记录了所有互斥锁的等待队列
const semTabSize = 251
// 全局 semtable 哈希表,根据锁的地址存储所有锁
var semtable [semTabSize]struct {
root semaRoot
pad [cpu.CacheLinePadSize - unsafe.Sizeof(semaRoot{})]byte
}