在go中内存返还系统相关代码主要由sysUnusedOS实现
在Linux中默认是通过madvice方法的_MADV_FREE进行释放,在这种释放中内存其实是被延迟回收的。
func sysUnusedOS(v unsafe.Pointer, n uintptr) {
if uintptr(v)&(physPageSize-1) != 0 || n&(physPageSize-1) != 0 {
// 如果地址或大小不是物理页大小的整数倍,则抛出异常
throw("unaligned sysUnused")
}
advise := atomic.Load(&adviseUnused)
if debug.madvdontneed != 0 && advise != madviseUnsupported {
advise = _MADV_DONTNEED
}
switch advise {
case _MADV_FREE:
// 尝试使用 MADV_FREE 提示操作系统这些内存页可以被释放,但释放可能会延迟,直到出现内存压力
if madvise(v, n, _MADV_FREE) == 0 {
break
}
// 如果 MADV_FREE 不被支持,则设置 adviseUnused 为 _MADV_DONTNEED
atomic.Store(&adviseUnused, _MADV_DONTNEED)
fallthrough
case _MADV_DONTNEED:
// 使用 MADV_DONTNEED 提示操作系统这些内存页可以被释放。 内核可以自由地延迟释放页面,直到适当的时刻。 然而,调用进程的驻留集大小(RSS)将立即减少。
if madvise(v, n, _MADV_DONTNEED) == 0 {
break
}
// 如果 MADV_DONTNEED 不被支持,则设置 adviseUnused 为 madviseUnsupported
atomic.Store(&adviseUnused, madviseUnsupported)
fallthrough
case madviseUnsupported:
// 如果 madvise 不被支持,则使用 mmap 重新映射内存区域以释放内存
mmap(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE, -1, 0)
}
// 如果硬件提交被启用,则设置内存区域的权限为无
if debug.harddecommit > 0 {
p, err := mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE, -1, 0)
if p != v || err != 0 {
throw("runtime: cannot disable permissions in address space")
}
}
}
sysUnusedOS被scavengeOne调用,scavengeOne在给定的内存块(chunk)中搜索可回收的连续页面,并尝试回收指定数量的内存
值得注意的是,在回收之后内存还是可以再次分配的
//go:systemstack
// ci:要操作的块索引。
// searchIdx:在块中开始搜索的页面索引。
// max:最多要回收的字节数。
func (p *pageAlloc) scavengeOne(ci chunkIdx, searchIdx uint, max uintptr) uintptr {
// 回收的最大页面数计算
maxPages := max / pageSize
if max%pageSize != 0 {
maxPages++
}
// 最少要回收的页面数计算
minPages := physPageSize / pageSize
if minPages < 1 {
minPages = 1
}
lock(p.mheapLock)
// 如果在块中有至少minPages个空闲页面,继续操作
if p.summary[len(p.summary)-1][ci].max() >= uint(minPages) {
// 查找可回收的页面范围
base, npages := p.chunkOf(ci).findScavengeCandidate(searchIdx, minPages, maxPages)
if npages != 0 {
// 计算起始位置
addr := chunkBase(ci) + uintptr(base)*pageSize
// 标记页面范围为已分配,以防止在回收过程中被其他协程分配
p.chunkOf(ci).allocRange(base, npages)
// 标记为连续内存已分配
p.update(addr, uintptr(npages), true, true)
// With that done, it's safe to unlock.
unlock(p.mheapLock)
// 如果不是测试模式,执行系统回收操作,包括调用sysUnused释放系统内存,并更新全局统计信息
if !p.test {
pageTraceScav(getg().m.p.ptr(), 0, addr, uintptr(npages))
sysUnused(unsafe.Pointer(addr), uintptr(npages)*pageSize)
// Update global accounting only when not in test, otherwise
// the runtime's accounting will be wrong.
nbytes := int64(npages * pageSize)
gcController.heapReleased.add(nbytes)
gcController.heapFree.add(-nbytes)
stats := memstats.heapStats.acquire()
atomic.Xaddint64(&stats.committed, -nbytes)
atomic.Xaddint64(&stats.released, nbytes)
memstats.heapStats.release()
}
// Relock the heap, because now we need to make these pages
// available allocation. Free them back to the page allocator.
// 重新加锁,将回收的页面重新标记为可分配,并更新页面分配器的状态,标记页面范围为已回收
lock(p.mheapLock)
// 更新 searchAddr 以优化下一次内存分配的起始搜索地址
if b := (offAddr{addr}); b.lessThan(p.searchAddr) {
p.searchAddr = b
}
// 释放从 base 地址开始的 npages 页内存
p.chunkOf(ci).free(base, npages)
// 更新内存管理器的内部数据结构,表明这些连续页已经释放
p.update(addr, uintptr(npages), true, false)
// 将这些页标记为已回收
p.chunkOf(ci).scavenged.setRange(base, npages)
unlock(p.mheapLock)
return uintptr(npages) * pageSize
}
}
// Mark this chunk as having no free pages.
p.scav.index.setEmpty(ci)
unlock(p.mheapLock)
return 0
}
scavengeOne继而被scavenge调用
该过程分块进行,从最高地址开始,一直持续到清除指定字节数(nbytes)或耗尽堆。如果需要的话,它还可以通过忽略大页面启发式方法来强制清理。
func (p *pageAlloc) scavenge(nbytes uintptr, shouldStop func() bool, force bool) uintptr {
released := uintptr(0)
// 持续直到请求的nbytes被清除或堆耗尽
for released < nbytes {
// 找到清除候选者
ci, pageIdx := p.scav.index.find(force)
if ci == 0 {
break
}
// 执行清除
systemstack(func() {
released += p.scavengeOne(ci, pageIdx, nbytes-released)
})
if shouldStop != nil && shouldStop() {
break
}
}
return released
}
以下逐个说明哪些地方调用了系统内存返还
allocSpan
allocSpan用于分配mspan。
该 mspan 拥有 npages 个页面的内存。它根据给定的分配类型(typ)和跨度类(spanclass)来分配和初始化跨度,并进行内存清理和统计更新。这个函数需要在系统堆栈上调用,以确保堆锁和垃圾收集的安全性
//go:systemstack
func (h *mheap) allocSpan(npages uintptr, typ spanAllocType, spanclass spanClass) (s *mspan) {
// Function-global state.
gp := getg()
base, scav := uintptr(0), uintptr(0)
growth := uintptr(0)
// 检查是否需要物理页面对齐(针对某些平台的堆栈分配)
needPhysPageAlign := physPageAlignedStacks && typ == spanAllocStack && pageSize < physPageSize
// If the allocation is small enough, try the page cache!
// The page cache does not support aligned allocations, so we cannot use
// it if we need to provide a physical page aligned stack allocation.
// 尝试使用页面缓存分配
pp := gp.m.p.ptr()
// 如果页缓存未对齐或页数较小,尝试从页缓存分配内存
if !needPhysPageAlign && pp != nil && npages < pageCachePages/4 {
c := &pp.pcache
// If the cache is empty, refill it.
// 如果页缓存为空,则从全局页池填充
if c.empty() {
lock(&h.lock)
*c = h.pages.allocToCache()
unlock(&h.lock)
}
// Try to allocate from the cache.
// 如果成功从页面缓存分配,则尝试获取 mspan
base, scav = c.alloc(npages)
if base != 0 {
s = h.tryAllocMSpan()
if s != nil {
goto HaveSpan
}
// We have a base but no mspan, so we need
// to lock the heap.
}
}
// 锁定堆进行分配
// For one reason or another, we couldn't get the
// whole job done without the heap lock.
lock(&h.lock)
// 如果需要物理页面对齐,则进行额外页面的分配以确保对齐
if needPhysPageAlign {
// Overallocate by a physical page to allow for later alignment.
extraPages := physPageSize / pageSize
// 找到一个足够大的区域,但只分配对齐的部分
// 不能简单地分配然后释放边缘,因为需要考虑回收的内存,这在分配过程中很难处理
// 这里跳过了对 searchAddr 的更新。即使 searchAddr 过时并高于正常值,操作依然正确,只是性能会受到影响
// 查找足够大的区域进行分配,如果失败则增长堆
base, _ = h.pages.find(npages + extraPages)
if base == 0 {
var ok bool
growth, ok = h.grow(npages + extraPages)
if !ok {
unlock(&h.lock)
return nil
}
base, _ = h.pages.find(npages + extraPages)
if base == 0 {
throw("grew heap, but no adequate free space found")
}
}
// 对齐起始地址并分配范围
base = alignUp(base, physPageSize)
scav = h.pages.allocRange(base, npages)
}
// 如果基本地址未分配成功,再次尝试分配基本地址
if base == 0 {
// Try to acquire a base address.
base, scav = h.pages.alloc(npages)
if base == 0 {
var ok bool
growth, ok = h.grow(npages)
if !ok {
unlock(&h.lock)
return nil
}
base, scav = h.pages.alloc(npages)
if base == 0 {
throw("grew heap, but no adequate free space found")
}
}
}
// 如果前面没有分配到 mspan,则在持有堆锁的情况下分配 mspan
if s == nil {
// We failed to get an mspan earlier, so grab
// one now that we have the heap lock.
s = h.allocMSpanLocked()
}
unlock(&h.lock)
HaveSpan:
bytesToScavenge := uintptr(0)
forceScavenge := false
// 如果当前没有达到垃圾收集(GC)预设的CPU限制,并且内存使用超过了设定的内存限制(memoryLimit),则计算需要清理的字节数
if limit := gcController.memoryLimit.Load(); !gcCPULimiter.limiting() {
// Assist with scavenging to maintain the memory limit by the amount
// that we expect to page in.
inuse := gcController.mappedReady.Load()
// Be careful about overflow, especially with uintptrs. Even on 32-bit platforms
// someone can set a really big memory limit that isn't maxInt64.
// 如果当前使用的内存(inuse)加上新分配的内存(scav)超过了内存限制,则计算需要清理的字节数,并设置 forceScavenge 为 true,表示需要强制清理
if uint64(scav)+inuse > uint64(limit) {
bytesToScavenge = uintptr(uint64(scav) + inuse - uint64(limit))
forceScavenge = true
}
}
// 读取垃圾回收器的目标内存使用量,更新实际超出部分、或增长量、或原bytesToScavenge的最大值
if goal := scavenge.gcPercentGoal.Load(); goal != ^uint64(0) && growth > 0 {
// 通过内联回收处理内存碎片分配失败的问题,优先回收最不可能再次使用的内存碎片。由于没有使用内存限制,因此只需在不超过内存限制的情况下关注堆的增长,而之前的内存回收检查已经处理了这一点
// 检查当前保留的内存量与内存增长量是否超过了目标使用量
if retained := heapRetained(); retained+uint64(growth) > goal {
// 内存回收算法需要释放堆锁以减少其频繁获取的次数,这是一个可能很耗时的操作。这样做可以让其他 goroutine 在此期间继续分配内存,并且可以利用刚刚增加的内存
// 如果增长量超过实际超出量,则需要调整清理量(todo)为实际超出部分(overage)。最终更新 bytesToScavenge 为需要清理的最大值
todo := growth
if overage := uintptr(retained + uint64(growth) - goal); todo > overage {
todo = overage
}
if todo > bytesToScavenge {
bytesToScavenge = todo
}
}
}
// 系统内存回收
var now int64
if pp != nil && bytesToScavenge > 0 {
start := nanotime()
track := pp.limiterEvent.start(limiterEventScavengeAssist, start)
// Scavenge, but back out if the limiter turns on.
released := h.pages.scavenge(bytesToScavenge, func() bool {
return gcCPULimiter.limiting()
}, forceScavenge)
mheap_.pages.scav.releasedEager.Add(released)
// Finish up accounting.
now = nanotime()
if track {
pp.limiterEvent.stop(limiterEventScavengeAssist, now)
}
scavenge.assistTime.Add(now - start)
}
// Initialize the span.
h.initSpan(s, typ, spanclass, base, npages)
// Commit and account for any scavenged memory that the span now owns.
nbytes := npages * pageSize
// 如果存在被回收的内存 scav,调用 sysUsed 函数提交实际可用的页面,并更新垃圾回收控制器的统计信息,将回收的内存字节数从 heapReleased 中减去
if scav != 0 {
// sysUsed all the pages that are actually available
// in the span since some of them might be scavenged.
sysUsed(unsafe.Pointer(base), nbytes, scav)
gcController.heapReleased.add(-int64(scav))
}
// Update stats.
gcController.heapFree.add(-int64(nbytes - scav))
if typ == spanAllocHeap {
gcController.heapInUse.add(int64(nbytes))
}
// Update consistent stats.
stats := memstats.heapStats.acquire()
atomic.Xaddint64(&stats.committed, int64(scav))
atomic.Xaddint64(&stats.released, -int64(scav))
switch typ {
case spanAllocHeap:
atomic.Xaddint64(&stats.inHeap, int64(nbytes))
case spanAllocStack:
atomic.Xaddint64(&stats.inStacks, int64(nbytes))
case spanAllocPtrScalarBits:
atomic.Xaddint64(&stats.inPtrScalarBits, int64(nbytes))
case spanAllocWorkBuf:
atomic.Xaddint64(&stats.inWorkBufs, int64(nbytes))
}
memstats.heapStats.release()
pageTraceAlloc(pp, now, base, npages)
return s
}
从上面代码中可以看出只有在
- 没有达到垃圾收集(GC)预设的CPU限制,并且内存使用超过了设定的内存限制(memoryLimit)
- 当前保留的内存量与内存增长量超过了目标使用量
就会清除内存返还给系统
对于内存限制来说
默认设定的内存限制是maxInt64,可以认为是没有限制的。
也可以通过GOMEMLIMIT环境变量或者debug.setMemoryLimit()进行设置
而目标使用量则通过以下代码进行的计算
// Compute our scavenging goal.
goalRatio := float64(heapGoal) / float64(lastHeapGoal)
gcPercentGoal := uint64(float64(memstats.lastHeapInUse) * goalRatio)
// Add retainExtraPercent overhead to retainedGoal. This calculation
// looks strange but the purpose is to arrive at an integer division
// (e.g. if retainExtraPercent = 12.5, then we get a divisor of 8)
// that also avoids the overflow from a multiplication.
gcPercentGoal += gcPercentGoal / (1.0 / (retainExtraPercent / 100.0))
// Align it to a physical page boundary to make the following calculations
// a bit more exact.
gcPercentGoal = (gcPercentGoal + uint64(physPageSize) - 1) &^ (uint64(physPageSize) - 1)
// Represents where we are now in the heap's contribution to RSS in bytes.
//
// Guaranteed to always be a multiple of physPageSize on systems where
// physPageSize <= pageSize since we map new heap memory at a size larger than
// any physPageSize and released memory in multiples of the physPageSize.
//
// However, certain functions recategorize heap memory as other stats (e.g.
// stacks) and this happens in multiples of pageSize, so on systems
// where physPageSize > pageSize the calculations below will not be exact.
// Generally this is OK since we'll be off by at most one regular
// physical page.
heapRetainedNow := heapRetained()
// If we're already below our goal, or within one page of our goal, then indicate
// that we don't need the background scavenger for maintaining a memory overhead
// proportional to the heap goal.
if heapRetainedNow <= gcPercentGoal || heapRetainedNow-gcPercentGoal < uint64(physPageSize) {
scavenge.gcPercentGoal.Store(^uint64(0))
} else {
scavenge.gcPercentGoal.Store(gcPercentGoal)
}
总结以上计算过程为一个公式(去掉物理页对其部分)
gcPercentGoal
=
(
lastHeapInUse
×
heapGoal
lastHeapGoal
)
×
(
1
+
1
(
retainExtraPercent
100
)
)
\text{gcPercentGoal} = \left(\text{lastHeapInUse} \times \frac{\text{heapGoal}}{\text{lastHeapGoal}}\right) \times(1+ \frac{1}{\left(\frac{\text{retainExtraPercent}}{100}\right)} )
gcPercentGoal=(lastHeapInUse×lastHeapGoalheapGoal)×(1+(100retainExtraPercent)1)
从以上公式中可以看出
- 当前堆大小期望越大,目标越大,也越可能无效
- 上一个堆大小期望越小,目标越大,也越可能无效
- 上一次使用堆越大,目标越大,也越可能无效
retainExtraPercent是常量10
综合来说,在分配span时什么情况下会触发返还系统内存呢?
- 超出设定内存
- 内存保有量足够大(保有量包括已分配但未使用的内存、已经写入数据但尚未释放的内存)
- 当前堆期望相对更小,或者上次堆使用量更小,导致的目标堆值小
debug.FreeOSMemory
主动调用debug.FreeOSMemory会清理所有,并尝试返还系统内存
// scavengeAll acquires the heap lock (blocking any additional
// manipulation of the page allocator) and iterates over the whole
// heap, scavenging every free page available.
//
// Must run on the system stack because it acquires the heap lock.
//
//go:systemstack
func (h *mheap) scavengeAll() {
// Disallow malloc or panic while holding the heap lock. We do
// this here because this is a non-mallocgc entry-point to
// the mheap API.
gp := getg()
gp.m.mallocing++
// Force scavenge everything.
released := h.pages.scavenge(^uintptr(0), nil, true)
gp.m.mallocing--
if debug.scavtrace > 0 {
printScavTrace(0, released, true)
}
}
bgscavenge
bgscavenge是Go运行时用于后台gc的函数。主要用于维护应用程序的 RSS(驻留集大小),确保其保持在合理的范围内
func bgscavenge(c chan int) {
// 初始化scavenger。赋予实际清理方法
scavenger.init()
// 通知其他 goroutine,后台 scavenger 已经启动
c <- 1
// 等待需要进行垃圾收集的信号
scavenger.park()
// 不断执行实际的垃圾收集工作,返回释放的内存量 released 和需要休眠的时间 workTime
for {
released, workTime := scavenger.run()
// 如果没有释放内存,则再次等待下一次需要进行垃圾收集的信号
if released == 0 {
scavenger.park()
continue
}
mheap_.pages.scav.releasedBg.Add(released)
scavenger.sleep(workTime)
}
}
run方法是实际执行内存清理的。
func (s *scavengerState) run() (released uintptr, worked float64) {
// 锁定和检查 goroutine是否scavenger所属goroutine
lock(&s.lock)
if getg() != s.g {
throw("tried to run scavenger from another goroutine")
}
unlock(&s.lock)
// 持续执行,直到至少minScavWorkTime时间
for worked < minScavWorkTime {
// If something from outside tells us to stop early, stop.
if s.shouldStop() {
break
}
// 较小的值使得回收器对调度器更加响应,适合抢占。
// 较大的值则能更好地摊薄回收开销,提高回收效率。
// 当前值基于假设每个物理页面的回收成本约为 10 微秒,4 KiB 物理页面的最坏情况延迟约为 160 微秒,偏向于响应时间而非吞吐量。
// 每次尝试释放64KB内存
const scavengeQuantum = 64 << 10
// Accumulate the amount of time spent scavenging.
r, duration := s.scavenge(scavengeQuantum)
// 在某些平台上,当回收内存时间小于时钟最小粒度或由于时钟错误导致 end >= start 时
// 假设每个物理页面的回收时间为 10 微秒,并忽略大页面对时间的影响。
// 如果 duration 为零,则按每个物理页面 10 微秒计算 worked 时间。
// 否则,使用实际的 duration 更新 worked 时间,并累加释放的内存量 released。
const approxWorkedNSPerPhysicalPage = 10e3
if duration == 0 {
worked += approxWorkedNSPerPhysicalPage * float64(r/physPageSize)
} else {
// TODO(mknyszek): If duration is small compared to worked, it could be
// rounded down to zero. Probably not a problem in practice because the
// values are all within a few orders of magnitude of each other but maybe
// worth worrying about.
worked += float64(duration)
}
released += r
// 如果释放的内存量 r 小于 scavengeQuantum,表示堆内存耗尽,退出循环
if r < scavengeQuantum {
break
}
// When using fake time just do one loop.
if faketime != 0 {
break
}
}
// 确保释放的内存至少有一个物理页面大小,否则抛出错误以避免内存损坏
if released > 0 && released < physPageSize {
throw("released less than one physical page of memory")
}
return
}
重点看看什么时候会被唤醒呢?
唤醒方法是scavenger.wake(),被以下方法调用
-
scavengerState.timer.f
scavengerState.sleep()方法会重置timer,使得被唤醒
-
mgcsweep.go finishsweep_m()
是在 GC 进行到标记阶段之前,确保所有的内存区域都已经完成了扫描操作,以便在标记阶段开始时能够顺利进行内存标记和回收工作
主要是这个地方唤醒的
-
proc.go sysmon()
实际debug来看bgscanvenge的调用频率非常之高,主要都是来自于finishsweep_m方法唤醒的,那么完全可以认为每次gc都会进行返还系统内存
综合来看
allocSpan与debug.FreeOSMemory都不是常态化的清理,只有bgscavenge会每次gc都会执行返还64KB内存的
Ref
- go1.22.3 source code