【go语言阻塞唤醒底层实现之gopark和goready】-CSDN博客

本文链接：https://blog.csdn.net/qq_37674060/article/details/127398756

go语言之阻塞唤醒底层实现

gopark
goready
- ready
- - runqput
  - wakep

看过channel和mutex的实现都知道，对于channel的阻塞和唤醒对应的底层实现分别是gopark和goready。然后mutex中阻塞和唤醒对应的分别是runtime_SemacquireMutex和runtime_Semrelease。
接下来先说一下gopark(阻塞)和goready(运行)这两个搭档

gopark

这个就是用来阻塞当前goroutine，不过这样说并不准确，因为是将当前goroutine和底层的M解绑，然后让M去运行别的G，然后还是看一下这个源码

这个注释比较多，主要是针对unlockf func(*g, unsafe.Pointer) bool 。可以理解为是否需要等待，当为false的时候，会把当前goroutine重新恢复到运行。所以需要注意的需要保证调用这个gopark的goroutine不会被外面给唤醒。

// Puts the current goroutine into a waiting state and calls unlockf on the
// system stack.
//
// If unlockf returns false, the goroutine is resumed.
//
// unlockf must not access this G's stack, as it may be moved between
// the call to gopark and the call to unlockf.
//
// Note that because unlockf is called after putting the G into a waiting
// state, the G may have already been readied by the time unlockf is called
// unless there is external synchronization preventing the G from being
// readied. If unlockf returns false, it must guarantee that the G cannot be
// externally readied.
//
// Reason explains why the goroutine has been parked. It is displayed in stack
// traces and heap dumps. Reasons should be unique and descriptive. Do not
// re-use reasons, add new ones.
func gopark(unlockf func(*g, unsafe.Pointer) bool, lock unsafe.Pointer, reason waitReason, traceEv byte, traceskip int) {
	if reason != waitReasonSleep {
		checkTimeouts() // timeouts may expire while two goroutines keep the scheduler busy
	}
	// 获取当前的m
	mp := acquirem()
	// 当前的g
	gp := mp.curg
	// 获取当前g的状态
	status := readgstatus(gp)
	if status != _Grunning && status != _Gscanrunning {
		throw("gopark: bad g status")
	}
	// 设置变量
	mp.waitlock = lock
	mp.waitunlockf = unlockf
	gp.waitreason = reason
	mp.waittraceev = traceEv
	mp.waittraceskip = traceskip
 
    // 释放m
	releasem(mp)
 
    // 切换到g0栈操作
	// can't do anything that might move the G between Ms here.
	mcall(park_m)
}

mcall

这里mcall是汇编实现，大概逻辑就是首先把上下文保存在g->sched中，然后切换到g0栈去调用传进来的函数，这里是park_m。看一下go中的汇编实现，这里是基于asm_arm64

// void mcall(fn func(*g))
// Switch to m->g0's stack, call fn(g).
// Fn must never return. It should gogo(&g->sched)
// to keep running g.
TEXT runtime·mcall<ABIInternal>(SB), NOSPLIT|NOFRAME, $0-8
	MOVD	R0, R26				// context

	// Save caller state in g->sched
	MOVD	RSP, R0
	MOVD	R0, (g_sched+gobuf_sp)(g)
	MOVD	R29, (g_sched+gobuf_bp)(g)
	MOVD	LR, (g_sched+gobuf_pc)(g)
	MOVD	$0, (g_sched+gobuf_lr)(g)

	// Switch to m->g0 & its stack, call fn.
	MOVD	g, R3
	MOVD	g_m(g), R8
	MOVD	m_g0(R8), g
	BL	runtime·save_g(SB)
	CMP	g, R3
	BNE	2(PC)
	B	runtime·badmcall(SB)

	MOVD	(g_sched+gobuf_sp)(g), R0
	MOVD	R0, RSP	// sp = m->g0->sched.sp
	MOVD	(g_sched+gobuf_bp)(g), R29
	MOVD	R3, R0				// arg = g
	MOVD	$0, -16(RSP)			// dummy LR
	SUB	$16, RSP
	MOVD	0(R26), R4			// code pointer
	BL	(R4)
	B	runtime·badmcall2(SB)

park_m

这里的park_m是在g0上操作的，看一下源码的实现

// park continuation on g0.
func park_m(gp *g) {
	_g_ := getg()

	if trace.enabled {
		traceGoPark(_g_.m.waittraceev, _g_.m.waittraceskip)
	}
 
    // 这个是将当前的g状态切换为等待
	casgstatus(gp, _Grunning, _Gwaiting)
 
    // 将当前的goroutine和当前的M进行解绑
	dropg()
  
    // 调用传入的回调函数
	if fn := _g_.m.waitunlockf; fn != nil {
	    // 判断是否需要等待
		ok := fn(gp, _g_.m.waitlock)
		// 重置
		_g_.m.waitunlockf = nil
		_g_.m.waitlock = nil
		// 如果不需要等待
		if !ok {
			if trace.enabled {
				traceGoUnpark(gp, 2)
			}
			// 重新切换状态 切换为运行
			casgstatus(gp, _Gwaiting, _Grunnable)
			// 将当前的go重新恢复运行
			execute(gp, true) // Schedule it back, never returns.
		}
	}
	// 如果需要等待 那么重新找一个goroutine
	schedule()
}

execute

// Schedules gp to run on the current M.
// If inheritTime is true, gp inherits the remaining time in the
// current time slice. Otherwise, it starts a new time slice.
// Never returns.
//
// Write barriers are allowed because this is called immediately after
// acquiring a P in several places.
//
//go:yeswritebarrierrec
func execute(gp *g, inheritTime bool) {
    // 获取当前g
	_g_ := getg()

	if goroutineProfile.active {
		// Make sure that gp has had its stack written out to the goroutine
		// profile, exactly as it was when the goroutine profiler first stopped
		// the world.
		tryRecordGoroutineProfile(gp, osyield)
	}

	// Assign gp.m before entering _Grunning so running Gs have an
	// M.
	// 设置状态
	_g_.m.curg = gp
	gp.m = _g_.m
  
    // 切换可运行到运行中
	casgstatus(gp, _Grunnable, _Grunning)
	gp.waitsince = 0
	gp.preempt = false
	gp.stackguard0 = gp.stack.lo + _StackGuard
	if !inheritTime {
		_g_.m.p.ptr().schedtick++
	}

	// Check whether the profiler needs to be turned on or off.
	hz := sched.profilehz
	if _g_.m.profilehz != hz {
		setThreadCPUProfiler(hz)
	}

	if trace.enabled {
		// GoSysExit has to happen when we have a P, but before GoStart.
		// So we emit it here.
		if gp.syscallsp != 0 && gp.sysblocktraced {
			traceGoSysExit(gp.sysexitticks)
		}
		traceGoStart()
	}
    // 调用gogo函数 恢复g的上下文
	gogo(&gp.sched)
}

可以看出来，去掉一些无关的代码，execute函数的主要作用就是切换g的状态，然后根据g的sched也就是g的上下文，恢复当前g的运行。

gogo

gogo这个函数是用汇编实现的，看一下汇编的实现

/*
 *  go-routine
 */

// void gogo(Gobuf*)
// restore state from Gobuf; longjmp
TEXT runtime·gogo(SB), NOSPLIT|NOFRAME, $0-8
	MOVD	buf+0(FP), R5
	MOVD	gobuf_g(R5), R6
	MOVD	0(R6), R4	// make sure g != nil
	B	gogo<>(SB)

TEXT gogo<>(SB), NOSPLIT|NOFRAME, $0
	MOVD	R6, g
	BL	runtime·save_g(SB)

	MOVD	gobuf_sp(R5), R0
	MOVD	R0, RSP
	MOVD	gobuf_bp(R5), R29
	MOVD	gobuf_lr(R5), LR
	MOVD	gobuf_ret(R5), R0
	MOVD	gobuf_ctxt(R5), R26
	MOVD	$0, gobuf_sp(R5)
	MOVD	$0, gobuf_bp(R5)
	MOVD	$0, gobuf_ret(R5)
	MOVD	$0, gobuf_lr(R5)
	MOVD	$0, gobuf_ctxt(R5)
	CMP	ZR, ZR // set condition codes for == test, needed by stack split
	MOVD	gobuf_pc(R5), R6
	B	(R6)

汇编也不是很懂，但是结合上下文看是通过g的sched把传入的g恢复运行

goready

首先看一下官方的实现

func goready(gp *g, traceskip int) {
	systemstack(func() {
		ready(gp, traceskip, true)
	})
}

这个systemstack就是系统栈，看一下官方的解释

// systemstack runs fn on a system stack.
// If systemstack is called from the per-OS-thread (g0) stack, or
// if systemstack is called from the signal handling (gsignal) stack,
// systemstack calls fn directly and returns.
// Otherwise, systemstack is being called from the limited stack
// of an ordinary goroutine. In this case, systemstack switches
// to the per-OS-thread stack, calls fn, and switches back.
// It is common to use a func literal as the argument, in order
// to share inputs and outputs with the code around the call
// to system stack:
//
//	... set up y ...
//	systemstack(func() {
//		x = bigcall(y)
//	})
//	... use x ...
//
//go:noescape
func systemstack(fn func())

简单说就是如果在g0栈上或者信号处理栈上那么直接处理函数，如果在栈空间有限或者goroutine的栈上，那么就切换到当前的系统的线程栈上也就是g0栈上。
所以这个函数其实就是可以理解为切换到g0栈上去掉传入的函数.

ready

看一下官方的实现

// Mark gp ready to run.
func ready(gp *g, traceskip int, next bool) {
	if trace.enabled {
		traceGoUnpark(gp, traceskip)
	}
	// 获取当前g的状态
	status := readgstatus(gp)

	// Mark runnable.
	_g_ := getg()
	mp := acquirem() // disable preemption because it can be holding p in a local var
	if status&^_Gscan != _Gwaiting {
		dumpgstatus(gp)
		throw("bad g->status in ready")
	}

	// status is Gwaiting or Gscanwaiting, make Grunnable and put on runq
	// 切换当前g的状态从等到到可运行
	casgstatus(gp, _Gwaiting, _Grunnable)
 
    // 放到可运行队列中 第一个参数就是当前m对应的p的指针
	runqput(_g_.m.p.ptr(), gp, next)
	// 欢迎一个空闲的p去运行
	wakep()
	// 释放m
	releasem(mp)
}

runqput

看一下官方的实现，主要作用就是把当前q设置为运行，放到p的下一个待运行的，并把替换的设置到runq或者全局队列中

// runqput tries to put g on the local runnable queue.
// If next is false, runqput adds g to the tail of the runnable queue.
// If next is true, runqput puts g in the _p_.runnext slot.
// If the run queue is full, runnext puts g on the global queue.
// Executed only by the owner P.
func runqput(_p_ *p, gp *g, next bool) {
	if randomizeScheduler && next && fastrandn(2) == 0 {
		next = false
	}

	if next {
	retryNext:
	    // oldnext 是即将运行的下一个g 但是这里即将被替换
		oldnext := _p_.runnext
		// 进行CAS操作 失败了进行重试
		if !_p_.runnext.cas(oldnext, guintptr(unsafe.Pointer(gp))) {
			goto retryNext
		}
		// 如果当前没有下一个运行的 直接返回
		if oldnext == 0 {
			return
		}
		// Kick the old runnext out to the regular run queue.
		// 将被替换的g放到正常运行的队列
		gp = oldnext.ptr()
	}

retry:
    // 查询是都还有空闲的runq位置，如果有放进去
	h := atomic.LoadAcq(&_p_.runqhead) // load-acquire, synchronize with consumers
	t := _p_.runqtail
	if t-h < uint32(len(_p_.runq)) {
		_p_.runq[t%uint32(len(_p_.runq))].set(gp)
		atomic.StoreRel(&_p_.runqtail, t+1) // store-release, makes the item available for consumption
		return
	}
    // 否则放到全局队列中
	if runqputslow(_p_, gp, h, t) {
		return
	}
	// the queue is not full, now the put above must succeed
	goto retry
}

wakep

这个方法主要是为了唤醒沉睡的p，因为上面有新的g加入，说明开始繁忙了，需要新的p加入干活了
先看看官方的实现

// Tries to add one more P to execute G's.
// Called when a G is made runnable (newproc, ready).
func wakep() {
    // 是否有闲置的P
	if atomic.Load(&sched.npidle) == 0 {
		return
	}
	// be conservative about spinning threads
	// 判断是否有在p和m匹配的 如果有那么跳过
	if atomic.Load(&sched.nmspinning) != 0 || !atomic.Cas(&sched.nmspinning, 0, 1) {
		return
	}
	// 将m和p进行绑定
	startm(nil, true)
}

这个m其实就是先寻找空闲的p，然后去和m绑定，如果没有空闲的m那么就创建一个，接下来看一下官方的实现。

// Schedules some M to run the p (creates an M if necessary).
// If p==nil, tries to get an idle P, if no idle P's does nothing.
// May run with m.p==nil, so write barriers are not allowed.
// If spinning is set, the caller has incremented nmspinning and startm will
// either decrement nmspinning or set m.spinning in the newly started M.
//
// Callers passing a non-nil P must call from a non-preemptible context. See
// comment on acquirem below.
//
// Must not have write barriers because this may be called without a P.
//
//go:nowritebarrierrec
func startm(_p_ *p, spinning bool) {
	// Disable preemption.
	//
	// Every owned P must have an owner that will eventually stop it in the
	// event of a GC stop request. startm takes transient ownership of a P
	// (either from argument or pidleget below) and transfers ownership to
	// a started M, which will be responsible for performing the stop.
	//
	// Preemption must be disabled during this transient ownership,
	// otherwise the P this is running on may enter GC stop while still
	// holding the transient P, leaving that P in limbo and deadlocking the
	// STW.
	//
	// Callers passing a non-nil P must already be in non-preemptible
	// context, otherwise such preemption could occur on function entry to
	// startm. Callers passing a nil P may be preemptible, so we must
	// disable preemption before acquiring a P from pidleget below.
	mp := acquirem()
	lock(&sched.lock)
	if _p_ == nil {
		_p_, _ = pidleget(0)
		if _p_ == nil {
			unlock(&sched.lock)
			if spinning {
				// The caller incremented nmspinning, but there are no idle Ps,
				// so it's okay to just undo the increment and give up.
				if int32(atomic.Xadd(&sched.nmspinning, -1)) < 0 {
					throw("startm: negative nmspinning")
				}
			}
			releasem(mp)
			return
		}
	}
	nmp := mget()
	if nmp == nil {
		// No M is available, we must drop sched.lock and call newm.
		// However, we already own a P to assign to the M.
		//
		// Once sched.lock is released, another G (e.g., in a syscall),
		// could find no idle P while checkdead finds a runnable G but
		// no running M's because this new M hasn't started yet, thus
		// throwing in an apparent deadlock.
		//
		// Avoid this situation by pre-allocating the ID for the new M,
		// thus marking it as 'running' before we drop sched.lock. This
		// new M will eventually run the scheduler to execute any
		// queued G's.
		id := mReserveID()
		unlock(&sched.lock)

		var fn func()
		if spinning {
			// The caller incremented nmspinning, so set m.spinning in the new M.
			fn = mspinning
		}
		newm(fn, _p_, id)
		// Ownership transfer of _p_ committed by start in newm.
		// Preemption is now safe.
		releasem(mp)
		return
	}
	unlock(&sched.lock)
	if nmp.spinning {
		throw("startm: m is spinning")
	}
	if nmp.nextp != 0 {
		throw("startm: m has p")
	}
	if spinning && !runqempty(_p_) {
		throw("startm: p has runnable gs")
	}
	// The caller incremented nmspinning, so set m.spinning in the new M.
	nmp.spinning = spinning
	nmp.nextp.set(_p_)
	notewakeup(&nmp.park)
	// Ownership transfer of _p_ committed by wakeup. Preemption is now
	// safe.
	releasem(mp)
}