【go语言阻塞唤醒底层实现之sync_runtime_SemacquireMutex和runtime_Semrelease】

最新推荐文章于 2024-04-30 16:28:10 发布

不爱学习的王小小

最新推荐文章于 2024-04-30 16:28:10 发布

阅读量1k

点赞数 1

文章标签： golang 数据结构链表

本文链接：https://blog.csdn.net/qq_37674060/article/details/127418321

版权

go语言阻塞唤醒底层实现之sync_runtime_SemacquireMutex和runtime_Semrelease

sync_runtime_SemacquireMutex
- semacquire1
- - cansemacquire
  - semaRoot 平衡树
sync_runtime_Semrelease

sync_runtime_SemacquireMutex 和 runtime_Semrelease这两个函数是在go的mutex中分别是让当前goroutine沉睡和苏醒的方法。来看一下这两个方法的实现

sync_runtime_SemacquireMutex

先看一下官方的解释

// SemacquireMutex is like Semacquire, but for profiling contended Mutexes.
// If lifo is true, queue waiter at the head of wait queue.
// skipframes is the number of frames to omit during tracing, counting from
// runtime_SemacquireMutex's caller.
func runtime_SemacquireMutex(s *uint32, lifo bool, skipframes int)

从注释看出来s 是一个地址，这个是后面为了分配到平衡树用的，lifo 是用来判断是放在头部,skipframes 是跳过tracing 。

然后这个实现是在runtime/sema.go中，是通过 go:linkname 编译的时候指过去的。

const (
	semaBlockProfile semaProfileFlags = 1 << iota
	semaMutexProfile
)

//go:linkname sync_runtime_SemacquireMutex sync.runtime_SemacquireMutex
func sync_runtime_SemacquireMutex(addr *uint32, lifo bool, skipframes int) {
	semacquire1(addr, lifo, semaBlockProfile|semaMutexProfile, skipframes)
}

semaBlockProfile 和 semaMutexProfile 从字面意思看应该是是否进行采样，应该是给pprof去使用的。
然后看一下semacquire1这个方法,主要是调用的这个方法。

semacquire1

func semacquire1(addr *uint32, lifo bool, profile semaProfileFlags, skipframes int) {
    // 获取当前的g
	gp := getg()
	if gp != gp.m.curg {
		throw("semacquire not on the G stack")
	}

	// Easy case. 根据信号量判断是否可以获取锁
	if cansemacquire(addr) {
		return
	}

	// Harder case:
	//	increment waiter count
	//	try cansemacquire one more time, return if succeeded
	//	enqueue itself as a waiter
	//	sleep
	//	(waiter descriptor is dequeued by signaler)
 
    // 获取一个sudog的链表
	s := acquireSudog()
	// 根据addr的地址去需要对应的树的节点 和初始化变量
	root := semtable.rootFor(addr)
	t0 := int64(0)
	s.releasetime = 0
	s.acquiretime = 0
	s.ticket = 0
	if profile&semaBlockProfile != 0 && blockprofilerate > 0 {
		t0 = cputicks()
		s.releasetime = -1
	}
	if profile&semaMutexProfile != 0 && mutexprofilerate > 0 {
		if t0 == 0 {
			t0 = cputicks()
		}
		s.acquiretime = t0
	}
 
    // 进行加锁
	for {
	    // 加锁 注意是底层线程也就是M 
		lockWithRank(&root.lock, lockRankRoot)
		// Add ourselves to nwait to disable "easy case" in semrelease.
		// 等待的加一
		atomic.Xadd(&root.nwait, 1)
     
        // 判断是否能获得锁
		// Check cansemacquire to avoid missed wakeup.
		if cansemacquire(addr) {
			atomic.Xadd(&root.nwait, -1)
			unlock(&root.lock)
			break
		}
		// Any semrelease after the cansemacquire knows we're waiting
		// (we set nwait above), so go to sleep.
		// 加入队列
		root.queue(addr, s, lifo)
    
        // 调用gopark阻塞当前g
		goparkunlock(&root.lock, waitReasonSemacquire, traceEvGoBlockSync, 4+skipframes)
		// 恢复运行 判断是否可以拿到信号量 
		if s.ticket != 0 || cansemacquire(addr) {
			break
		}
	}
	if s.releasetime > 0 {
		blockevent(s.releasetime-t0, 3+skipframes)
	}
	// 释放sudog
	releaseSudog(s)
}

因为sodug之前在chan里面说过，这里主要比较重要的方法是cansemacquire，lock和unlock方法。

cansemacquire

这个就是根据信号量判断是否可以拿到锁，本质是一个CAS的操作

// 可以看出来这个就是CAS的操作
func cansemacquire(addr *uint32) bool {
	for {
		v := atomic.Load(addr)
		if v == 0 {
			return false
		}
		if atomic.Cas(addr, v, v-1) {
			return true
		}
	}
}

接下来就是go里面的底层的lock和unlock，需要注意的是这个基于底层线程的。需要注意的是上面使用的是lockWithRank，然后调用的是lock2。

func lockWithRank(l *mutex, rank lockRank) {
	lock2(l)
}
func lock2(l *mutex) {
    // 获取当前g
	gp := getg()
	if gp.m.locks < 0 {
		throw("runtime·lock: lock count")
	}
	gp.m.locks++

	// Speculative grab for lock.
	// 判断是否可以获取锁
	if atomic.Casuintptr(&l.key, 0, locked) {
		return
	}
	//创建线程锁 调用的是
	// pthread_mutex_init
	// pthread_cond_init
	semacreate(gp.m)

	// On uniprocessor's, no point spinning.
	// On multiprocessors, spin for ACTIVE_SPIN attempts.
	// 这个是判断是不是单核 如果是多核那么自旋 如果是单核 直接沉睡等待唤醒
	spin := 0
	if ncpu > 1 {
		spin = active_spin
	}
Loop:
	for i := 0; ; i++ {
	    // 尝试能不能加锁
		v := atomic.Loaduintptr(&l.key)
		if v&locked == 0 {
			// Unlocked. Try to lock.
			if atomic.Casuintptr(&l.key, v, v|locked) {
				return
			}
			i = 0
		}
		// 自旋
		if i < spin {
			procyield(active_spin_cnt)
		} else if i < spin+passive_spin {
			osyield()
		} else {
			// Someone else has it.
			// l->waitm points to a linked list of M's waiting
			// for this lock, chained through m->nextwaitm.
			// Queue this M.
			// 这边nextwaitm是一个链表 然后先l.key里面的其他M的地址
			// 放到nextwaitm中 然后自己当前的地址存到l.key
			// 这样解锁的时候根据l.key能找到需要恢复的M
			for {
				gp.m.nextwaitm = muintptr(v &^ locked)
				if atomic.Casuintptr(&l.key, v, uintptr(unsafe.Pointer(gp.m))|locked) {
					break
				}
				v = atomic.Loaduintptr(&l.key)
				if v&locked == 0 {
					continue Loop
				}
			}
			// 说明上面的操作成功了 那么当前的M可以沉睡了 
			if v&locked != 0 {
				// Queued. Wait.
				//调用
				semasleep(-1)
				i = 0
			}
		}
	}
}

看一下semasleep这个方法

//go:nosplit
func semasleep(ns int64) int32 {
	var start int64
	if ns >= 0 {
		start = nanotime()
	}
	// 先把底层的m加锁
	mp := getg().m
	pthread_mutex_lock(&mp.mutex)
	for {
	    // 尝试的次数
		if mp.count > 0 {
			mp.count--
			pthread_mutex_unlock(&mp.mutex)
			return 0
		}
		// 等待的时间
		if ns >= 0 {
			spent := nanotime() - start
			if spent >= ns {
				pthread_mutex_unlock(&mp.mutex)
				return -1
			}
			var t timespec
			t.setNsec(ns - spent)
			err := pthread_cond_timedwait_relative_np(&mp.cond, &mp.mutex, &t)
			if err == _ETIMEDOUT {
				pthread_mutex_unlock(&mp.mutex)
				return -1
			}
		} else {
		    // 释放锁并且等待唤醒
			pthread_cond_wait(&mp.cond, &mp.mutex)
		}
	}
}

所以底层的线程加锁就是CAS尝试获取锁，然后通过pthread_cond_wait等待唤醒，并且把自己的地址存在共享变量上面，等待唤醒。

然后看一下解锁的方法，这里是unlockWithRank

func unlockWithRank(l *mutex) {
	unlock2(l)
}
// We might not be holding a p in this code.
//
//go:nowritebarrier
func unlock2(l *mutex) {
	gp := getg()
	var mp *m
	for {
	    // 获取共享变量
		v := atomic.Loaduintptr(&l.key)
		// 如果是locked 说明没有被存入其他M的内存地址 那么释放后跳出去
		if v == locked {
			if atomic.Casuintptr(&l.key, locked, 0) {
				break
			}
		} else {
			// Other M's are waiting for the lock.
			// Dequeue an M.
			// 说明这个key存了其他m的地址  获取到然后唤醒
			mp = muintptr(v &^ locked).ptr()
			if atomic.Casuintptr(&l.key, v, uintptr(mp.nextwaitm)) {
				// Dequeued an M.  Wake it.
				// 其实就是调用pthread_cond_signal 进行唤醒
				semawakeup(mp)
				break
			}
		}
	}
	gp.m.locks--
	if gp.m.locks < 0 {
		throw("runtime·unlock: lock count")
	}
	if gp.m.locks == 0 && gp.preempt { // restore the preemption request in case we've cleared it in newstack
		gp.stackguard0 = stackPreempt
	}
}
//go:nosplit
func semawakeup(mp *m) {
    // 加锁
	pthread_mutex_lock(&mp.mutex)
	mp.count++
	if mp.count > 0 {
	// 唤醒其他等待的线程
		pthread_cond_signal(&mp.cond)
	}
	// 解锁
	pthread_mutex_unlock(&mp.mutex)
}

semaRoot 平衡树

看一下这个平衡树，可以看出semTable是一个251的数组。然后semaRoot的成员treap 就是所有等待的go。

// A semaRoot holds a balanced tree of sudog with distinct addresses (s.elem).
// Each of those sudog may in turn point (through s.waitlink) to a list
// of other sudogs waiting on the same address.
// The operations on the inner lists of sudogs with the same address
// are all O(1). The scanning of the top-level semaRoot list is O(log n),
// where n is the number of distinct addresses with goroutines blocked
// on them that hash to the given semaRoot.
// See golang.org/issue/17953 for a program that worked badly
// before we introduced the second level of list, and
// BenchmarkSemTable/OneAddrCollision/* for a benchmark that exercises this.
type semaRoot struct {
	lock  mutex
	treap *sudog // root of balanced tree of unique waiters.
	nwait uint32 // Number of waiters. Read w/o the lock.
}

var semtable semTable

// Prime to not correlate with any user patterns.
const semTabSize = 251

type semTable [semTabSize]struct {
	root semaRoot
	pad  [cpu.CacheLinePadSize - unsafe.Sizeof(semaRoot{})]byte
}

func (t *semTable) rootFor(addr *uint32) *semaRoot {
	return &t[(uintptr(unsafe.Pointer(addr))>>3)%semTabSize].root
}

sync_runtime_Semrelease

这个和上面一样也是通过go:linkname 编译期间关联上。还是一样先看看官方实现.这个如果上面的加锁可以看懂，这个也是类似的。

func semrelease1(addr *uint32, handoff bool, skipframes int) {
    // 获取当前地址的平衡树
	root := semtable.rootFor(addr)
	// 加1 让后续加锁方直接走easy逻辑
	atomic.Xadd(addr, 1)

	// Easy case: no waiters?
	// This check must happen after the xadd, to avoid a missed wakeup
	// (see loop in semacquire).
	// 如果没有等待者直接返回
	if atomic.Load(&root.nwait) == 0 {
		return
	}

	// Harder case: search for a waiter and wake it.
	// 加锁
	lockWithRank(&root.lock, lockRankRoot)
 
    // 加锁再判断一次
	if atomic.Load(&root.nwait) == 0 {
		// The count is already consumed by another goroutine,
		// so no need to wake up another goroutine.
		unlock(&root.lock)
		return
	}
  
    // 从这个地址获取等待队列
	s, t0 := root.dequeue(addr)
	if s != nil {
		atomic.Xadd(&root.nwait, -1)
	}
	unlock(&root.lock)
	if s != nil { // May be slow or even yield, so unlock first
		acquiretime := s.acquiretime
		if acquiretime != 0 {
			mutexevent(t0-acquiretime, 3+skipframes)
		}
		if s.ticket != 0 {
			throw("corrupted semaphore ticket")
		}
		if handoff && cansemacquire(addr) {
			s.ticket = 1
		}
		// 调用goready 恢复
		readyWithTime(s, 5+skipframes)
		if s.ticket == 1 && getg().m.locks == 0 {
		    
			// Direct G handoff
			// readyWithTime has added the waiter G as runnext in the
			// current P; we now call the scheduler so that we start running
			// the waiter G immediately.
			// Note that waiter inherits our time slice: this is desirable
			// to avoid having a highly contended semaphore hog the P
			// indefinitely. goyield is like Gosched, but it emits a
			// "preempted" trace event instead and, more importantly, puts
			// the current G on the local runq instead of the global one.
			// We only do this in the starving regime (handoff=true), as in
			// the non-starving case it is possible for a different waiter
			// to acquire the semaphore while we are yielding/scheduling,
			// and this would be wasteful. We wait instead to enter starving
			// regime, and then we start to do direct handoffs of ticket and
			// P.
			// See issue 33747 for discussion.
			// 恢复运行 再调度一次
			goyield()
		}
	}
}

不爱学习的王小小

关注

1
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
【go语言阻塞唤醒底层实现之sync_runtime_SemacquireMutex和runtime_Semrelease】

【go语言阻塞唤醒底层实现之sync_runtime_SemacquireMutex和runtime_Semrelease】
复制链接

扫一扫