Golang源码探索----GC的实现原理(2)

最新推荐文章于 2024-05-07 09:58:15 发布

Golang语言社区

最新推荐文章于 2024-05-07 09:58:15 发布

阅读量671

点赞数

文章标签：分布式 java python 算法 jvm

mcentral向mheap申请一个新的span会使用grow函数:

 1// grow allocates a new empty span from the heap and initializes it for c's size class.
 2func (c *mcentral) grow() *mspan {
 3    // 根据mcentral的类型计算需要申请的span的大小(除以8K = 有多少页)和可以保存多少个元素
 4    npages := uintptr(class_to_allocnpages[c.spanclass.sizeclass()])
 5    size := uintptr(class_to_size[c.spanclass.sizeclass()])
 6    n := (npages << _PageShift) / size
 7    // 向mheap申请一个新的span, 以页(8K)为单位
 8    s := mheap_.alloc(npages, c.spanclass, false, true)
 9    if s == nil {
10        return nil
11    }
12    p := s.base()
13    s.limit = p + size*n
14    // 分配并初始化span的allocBits和gcmarkBits
15    heapBitsForSpan(s.base()).initSpan(s)
16    return s
17}

mheap分配span的函数是alloc:

 1func (h *mheap) alloc(npage uintptr, spanclass spanClass, large bool, needzero bool) *mspan {
 2    // 在g0的栈空间中调用alloc_m函数
 3    // 关于systemstack的说明请看前一篇文章
 4    // Don't do any operations that lock the heap on the G stack.
 5    // It might trigger stack growth, and the stack growth code needs
 6    // to be able to allocate heap.
 7    var s *mspan
 8    systemstack(func() {
 9        s = h.alloc_m(npage, spanclass, large)
10    })
11    if s != nil {
12        if needzero && s.needzero != 0 {
13            memclrNoHeapPointers(unsafe.Pointer(s.base()), s.npages<<_PageShift)
14        }
15        s.needzero = 0
16    }
17    return s
18}

alloc函数会在g0的栈空间中调用alloc_m函数:

  1// Allocate a new span of npage pages from the heap for GC'd memory
  2// and record its size class in the HeapMap and HeapMapCache.
  3func (h *mheap) alloc_m(npage uintptr, spanclass spanClass, large bool) *mspan {
  4    _g_ := getg()
  5    if _g_ != _g_.m.g0 {
  6        throw("_mheap_alloc not on g0 stack")
  7    }
  8    // 对mheap上锁, 这里的锁是全局锁
  9    lock(&h.lock)
 10    // 为了防止heap增速太快, 在分配n页之前要先sweep和回收n页
 11    // 会先枚举busy列表然后再枚举busyLarge列表进行sweep, 具体参考reclaim和reclaimList函数
 12    // To prevent excessive heap growth, before allocating n pages
 13    // we need to sweep and reclaim at least n pages.
 14    if h.sweepdone == 0 {
 15        // TODO(austin): This tends to sweep a large number of
 16        // spans in order to find a few completely free spans
 17        // (for example, in the garbage benchmark, this sweeps
 18        // ~30x the number of pages its trying to allocate).
 19        // If GC kept a bit for whether there were any marks
 20        // in a span, we could release these free spans
 21        // at the end of GC and eliminate this entirely.
 22        if trace.enabled {
 23            traceGCSweepStart()
 24        }
 25        h.reclaim(npage)
 26        if trace.enabled {
 27            traceGCSweepDone()
 28        }
 29    }
 30    // 把mcache中的本地统计数据加到全局
 31    // transfer stats from cache to global
 32    memstats.heap_scan += uint64(_g_.m.mcache.local_scan)
 33    _g_.m.mcache.local_scan = 0
 34    memstats.tinyallocs += uint64(_g_.m.mcache.local_tinyallocs)
 35    _g_.m.mcache.local_tinyallocs = 0
 36    // 调用allocSpanLocked分配span, allocSpanLocked函数要求当前已经对mheap上锁
 37    s := h.allocSpanLocked(npage, &memstats.heap_inuse)
 38    if s != nil {
 39        // Record span info, because gc needs to be
 40        // able to map interior pointer to containing span.
 41        // 设置span的sweepgen = 全局sweepgen
 42        atomic.Store(&s.sweepgen, h.sweepgen)
 43        // 放到全局span列表中, 这里的sweepSpans的长度是2
 44        // sweepSpans[h.sweepgen/2%2]保存当前正在使用的span列表
 45        // sweepSpans[1-h.sweepgen/2%2]保存等待sweep的span列表
 46        // 因为每次gcsweepgen都会加2, 每次gc这两个列表都会交换
 47        h.sweepSpans[h.sweepgen/2%2].push(s) // Add to swept in-use list.
 48        // 初始化span成员
 49        s.state = _MSpanInUse
 50        s.allocCount = 0
 51        s.spanclass = spanclass
 52        if sizeclass := spanclass.sizeclass(); sizeclass == 0 {
 53            s.elemsize = s.npages << _PageShift
 54            s.divShift = 0
 55            s.divMul = 0
 56            s.divShift2 = 0
 57            s.baseMask = 0
 58        } else {
 59            s.elemsize = uintptr(class_to_size[sizeclass])
 60            m := &class_to_divmagic[sizeclass]
 61            s.divShift = m.shift
 62            s.divMul = m.mul
 63            s.divShift2 = m.shift2
 64            s.baseMask = m.baseMask
 65        }
 66        // update stats, sweep lists
 67        h.pagesInUse += uint64(npage)
 68        // 上面grow函数会传入true, 也就是通过grow调用到这里large会等于true
 69        // 添加已分配的span到busy列表, 如果页数超过_MaxMHeapList(128页=8K*128=1M)则放到busylarge列表
 70        if large {
 71            memstats.heap_objects++
 72            mheap_.largealloc += uint64(s.elemsize)
 73            mheap_.nlargealloc++
 74            atomic.Xadd64(&memstats.heap_live, int64(npage<<_PageShift))
 75            // Swept spans are at the end of lists.
 76            if s.npages < uintptr(len(h.busy)) {
 77                h.busy[s.npages].insertBack(s)
 78            } else {
 79                h.busylarge.insertBack(s)
 80            }
 81        }
 82    }
 83    // 如果当前在GC中, 因为heap_live改变了, 重新调整G辅助标记工作的值
 84    // 详细请参考下面对revise函数的解析
 85    // heap_scan and heap_live were updated.
 86    if gcBlackenEnabled != 0 {
 87        gcController.revise()
 88    }
 89    // 跟踪处理
 90    if trace.enabled {
 91        traceHeapAlloc()
 92    }
 93    // h.spans is accessed concurrently without synchronization
 94    // from other threads. Hence, there must be a store/store
 95    // barrier here to ensure the writes to h.spans above happen
 96    // before the caller can publish a pointer p to an object
 97    // allocated from s. As soon as this happens, the garbage
 98    // collector running on another processor could read p and
 99    // look up s in h.spans. The unlock acts as the barrier to
100    // order these writes. On the read side, the data dependency
101    // between p and the index in h.spans orders the reads.
102    unlock(&h.lock)
103    return s
104}

继续查看allocSpanLocked函数:

 1// Allocates a span of the given size.  h must be locked.
 2// The returned span has been removed from the
 3// free list, but its state is still MSpanFree.
 4func (h *mheap) allocSpanLocked(npage uintptr, stat *uint64) *mspan {
 5    var list *mSpanList
 6    var s *mspan
 7    // 尝试在mheap中的自由列表分配
 8    // 页数小于_MaxMHeapList(128页=1M)的自由span都会在free列表中
 9    // 页数大于_MaxMHeapList的自由span都会在freelarge列表中
10    // Try in fixed-size lists up to max.
11    for i := int(npage); i < len(h.free); i++ {
12        list = &h.free[i]
13        if !list.isEmpty() {
14            s = list.first
15            list.remove(s)
16            goto HaveSpan
17        }
18    }
19    // free列表找不到则查找freelarge列表
20    // 查找不到就向arena区域申请一个新的span加到freelarge中, 然后再查找freelarge列表
21    // Best fit in list of large spans.
22    s = h.allocLarge(npage) // allocLarge removed s from h.freelarge for us
23    if s == nil {
24        if !h.grow(npage) {
25            return nil
26        }
27        s = h.allocLarge(npage)
28        if s == nil {
29            return nil
30        }
31    }
32HaveSpan:
33    // Mark span in use.
34    if s.state != _MSpanFree {
35        throw("MHeap_AllocLocked - MSpan not free")
36    }
37    if s.npages < npage {
38        throw("MHeap_AllocLocked - bad npages")
39    }
40    // 如果span有已释放(解除虚拟内存和物理内存关系)的页, 提醒这些页会被使用然后更新统计数据
41    if s.npreleased > 0 {
42        sysUsed(unsafe.Pointer(s.base()), s.npages<<_PageShift)
43        memstats.heap_released -= uint64(s.npreleased << _PageShift)
44        s.npreleased = 0
45    }
46    // 如果获取到的span页数比要求的页数多
47    // 分割剩余的页数到另一个span并且放到自由列表中
48    if s.npages > npage {
49        // Trim extra and put it back in the heap.
50        t := (*mspan)(h.spanalloc.alloc())
51        t.init(s.base()+npage<<_PageShift, s.npages-npage)
52        s.npages = npage
53        p := (t.base() - h.arena_start) >> _PageShift
54        if p > 0 {
55            h.spans[p-1] = s
56        }
57        h.spans[p] = t
58        h.spans[p+t.npages-1] = t
59        t.needzero = s.needzero
60        s.state = _MSpanManual // prevent coalescing with s
61        t.state = _MSpanManual
62        h.freeSpanLocked(t, false, false, s.unusedsince)
63        s.state = _MSpanFree
64    }
65    s.unusedsince = 0
66    // 设置spans区域, 哪些地址对应哪个mspan对象
67    p := (s.base() - h.arena_start) >> _PageShift
68    for n := uintptr(0); n < npage; n++ {
69        h.spans[p+n] = s
70    }
71    // 更新统计数据
72    *stat += uint64(npage << _PageShift)
73    memstats.heap_idle -= uint64(npage << _PageShift)
74    //println("spanalloc", hex(s.start<<_PageShift))
75    if s.inList() {
76        throw("still in list")
77    }
78    return s
79}

继续查看allocLarge函数:

1// allocLarge allocates a span of at least npage pages from the treap of large spans.
2// Returns nil if no such span currently exists.
3func (h *mheap) allocLarge(npage uintptr) *mspan {
4    // Search treap for smallest span with >= npage pages.
5    return h.freelarge.remove(npage)
6}

freelarge的类型是mTreap, 调用remove函数会在树里面搜索一个至少npage且在树中的最小的span返回:

 1// remove searches for, finds, removes from the treap, and returns the smallest
 2// span that can hold npages. If no span has at least npages return nil.
 3// This is slightly more complicated than a simple binary tree search
 4// since if an exact match is not found the next larger node is
 5// returned.
 6// If the last node inspected > npagesKey not holding
 7// a left node (a smaller npages) is the "best fit" node.
 8func (root *mTreap) remove(npages uintptr) *mspan {
 9    t := root.treap
10    for t != nil {
11        if t.spanKey == nil {
12            throw("treap node with nil spanKey found")
13        }
14        if t.npagesKey < npages {
15            t = t.right
16        } else if t.left != nil && t.left.npagesKey >= npages {
17            t = t.left
18        } else {
19            result := t.spanKey
20            root.removeNode(t)
21            return result
22        }
23    }
24    return nil
25}

向arena区域申请新span的函数是mheap类的grow函数:

 1// Try to add at least npage pages of memory to the heap,
 2// returning whether it worked.
 3//
 4// h must be locked.
 5func (h *mheap) grow(npage uintptr) bool {
 6    // Ask for a big chunk, to reduce the number of mappings
 7    // the operating system needs to track; also amortizes
 8    // the overhead of an operating system mapping.
 9    // Allocate a multiple of 64kB.
10    npage = round(npage, (64<<10)/_PageSize)
11    ask := npage << _PageShift
12    if ask < _HeapAllocChunk {
13        ask = _HeapAllocChunk
14    }
15    // 调用mheap.sysAlloc函数申请
16    v := h.sysAlloc(ask)
17    if v == nil {
18        if ask > npage<<_PageShift {
19            ask = npage << _PageShift
20            v = h.sysAlloc(ask)
21        }
22        if v == nil {
23            print("runtime: out of memory: cannot allocate ", ask, "-byte block (", memstats.heap_sys, " in use)\n")
24            return false
25        }
26    }
27    // 创建一个新的span并加到自由列表中
28    // Create a fake "in use" span and free it, so that the
29    // right coalescing happens.
30    s := (*mspan)(h.spanalloc.alloc())
31    s.init(uintptr(v), ask>>_PageShift)
32    p := (s.base() - h.arena_start) >> _PageShift
33    for i := p; i < p+s.npages; i++ {
34        h.spans[i] = s
35    }
36    atomic.Store(&s.sweepgen, h.sweepgen)
37    s.state = _MSpanInUse
38    h.pagesInUse += uint64(s.npages)
39    h.freeSpanLocked(s, false, true, 0)
40    return true
41}

继续查看mheap的sysAlloc函数:

  1// sysAlloc allocates the next n bytes from the heap arena. The
  2// returned pointer is always _PageSize aligned and between
  3// h.arena_start and h.arena_end. sysAlloc returns nil on failure.
  4// There is no corresponding free function.
  5func (h *mheap) sysAlloc(n uintptr) unsafe.Pointer {
  6    // strandLimit is the maximum number of bytes to strand from
  7    // the current arena block. If we would need to strand more
  8    // than this, we fall back to sysAlloc'ing just enough for
  9    // this allocation.
 10    const strandLimit = 16 << 20
 11    // 如果arena区域当前已提交的区域不足, 则调用sysReserve预留更多的空间, 然后更新arena_end
 12    // sysReserve在linux上调用的是mmap函数
 13    // mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
 14    if n > h.arena_end-h.arena_alloc {
 15        // If we haven't grown the arena to _MaxMem yet, try
 16        // to reserve some more address space.
 17        p_size := round(n+_PageSize, 256<<20)
 18        new_end := h.arena_end + p_size // Careful: can overflow
 19        if h.arena_end <= new_end && new_end-h.arena_start-1 <= _MaxMem {
 20            // TODO: It would be bad if part of the arena
 21            // is reserved and part is not.
 22            var reserved bool
 23            p := uintptr(sysReserve(unsafe.Pointer(h.arena_end), p_size, &reserved))
 24            if p == 0 {
 25                // TODO: Try smaller reservation
 26                // growths in case we're in a crowded
 27                // 32-bit address space.
 28                goto reservationFailed
 29            }
 30            // p can be just about anywhere in the address
 31            // space, including before arena_end.
 32            if p == h.arena_end {
 33                // The new block is contiguous with
 34                // the current block. Extend the
 35                // current arena block.
 36                h.arena_end = new_end
 37                h.arena_reserved = reserved
 38            } else if h.arena_start <= p && p+p_size-h.arena_start-1 <= _MaxMem && h.arena_end-h.arena_alloc < strandLimit {
 39                // We were able to reserve more memory
 40                // within the arena space, but it's
 41                // not contiguous with our previous
 42                // reservation. It could be before or
 43                // after our current arena_used.
 44                //
 45                // Keep everything page-aligned.
 46                // Our pages are bigger than hardware pages.
 47                h.arena_end = p + p_size
 48                p = round(p, _PageSize)
 49                h.arena_alloc = p
 50                h.arena_reserved = reserved
 51            } else {
 52                // We got a mapping, but either
 53                //
 54                // 1) It's not in the arena, so we
 55                // can't use it. (This should never
 56                // happen on 32-bit.)
 57                //
 58                // 2) We would need to discard too
 59                // much of our current arena block to
 60                // use it.
 61                //
 62                // We haven't added this allocation to
 63                // the stats, so subtract it from a
 64                // fake stat (but avoid underflow).
 65                //
 66                // We'll fall back to a small sysAlloc.
 67                stat := uint64(p_size)
 68                sysFree(unsafe.Pointer(p), p_size, &stat)
 69            }
 70        }
 71    }
 72    // 预留的空间足够时只需要增加arena_alloc
 73    if n <= h.arena_end-h.arena_alloc {
 74        // Keep taking from our reservation.
 75        p := h.arena_alloc
 76        sysMap(unsafe.Pointer(p), n, h.arena_reserved, &memstats.heap_sys)
 77        h.arena_alloc += n
 78        if h.arena_alloc > h.arena_used {
 79            h.setArenaUsed(h.arena_alloc, true)
 80        }
 81        if p&(_PageSize-1) != 0 {
 82            throw("misrounded allocation in MHeap_SysAlloc")
 83        }
 84        return unsafe.Pointer(p)
 85    }
 86    // 预留空间失败后的处理
 87reservationFailed:
 88    // If using 64-bit, our reservation is all we have.
 89    if sys.PtrSize != 4 {
 90        return nil
 91    }
 92    // On 32-bit, once the reservation is gone we can
 93    // try to get memory at a location chosen by the OS.
 94    p_size := round(n, _PageSize) + _PageSize
 95    p := uintptr(sysAlloc(p_size, &memstats.heap_sys))
 96    if p == 0 {
 97        return nil
 98    }
 99    if p < h.arena_start || p+p_size-h.arena_start > _MaxMem {
100        // This shouldn't be possible because _MaxMem is the
101        // whole address space on 32-bit.
102        top := uint64(h.arena_start) + _MaxMem
103        print("runtime: memory allocated by OS (", hex(p), ") not in usable range [", hex(h.arena_start), ",", hex(top), ")\n")
104        sysFree(unsafe.Pointer(p), p_size, &memstats.heap_sys)
105        return nil
106    }
107    p += -p & (_PageSize - 1)
108    if p+n > h.arena_used {
109        h.setArenaUsed(p+n, true)
110    }
111    if p&(_PageSize-1) != 0 {
112        throw("misrounded allocation in MHeap_SysAlloc")
113    }
114    return unsafe.Pointer(p)
115}

以上就是分配对象的完整流程了, 接下来分析GC标记和回收对象的处理.

回收对象的处理

回收对象的流程

GO的GC是并行GC, 也就是GC的大部分处理和普通的go代码是同时运行的, 这让GO的GC流程比较复杂.
首先GC有四个阶段, 它们分别是:

Sweep Termination: 对未清扫的span进行清扫, 只有上一轮的GC的清扫工作完成才可以开始新一轮的GC
Mark: 扫描所有根对象, 和根对象可以到达的所有对象, 标记它们不被回收
Mark Termination: 完成标记工作, 重新扫描部分根对象(要求STW)
Sweep: 按标记结果清扫span

下图是比较完整的GC流程, 并按颜色对这四个阶段进行了分类:

在GC过程中会有两种后台任务(G), 一种是标记用的后台任务, 一种是清扫用的后台任务.
标记用的后台任务会在需要时启动, 可以同时工作的后台任务数量大约是P的数量的25%, 也就是go所讲的让25%的cpu用在GC上的根据.
清扫用的后台任务在程序启动时会启动一个, 进入清扫阶段时唤醒.

目前整个GC流程会进行两次STW(Stop The World), 第一次是Mark阶段的开始, 第二次是Mark Termination阶段.
第一次STW会准备根对象的扫描, 启动写屏障(Write Barrier)和辅助GC(mutator assist).
第二次STW会重新扫描部分根对象, 禁用写屏障(Write Barrier)和辅助GC(mutator assist).
需要注意的是, 不是所有根对象的扫描都需要STW, 例如扫描栈上的对象只需要停止拥有该栈的G.
从go 1.9开始, 写屏障的实现使用了Hybrid Write Barrier, 大幅减少了第二次STW的时间.

GC的触发条件

GC在满足一定条件后会被触发, 触发条件有以下几种:

gcTriggerAlways: 强制触发GC
gcTriggerHeap: 当前分配的内存达到一定值就触发GC
gcTriggerTime: 当一定时间没有执行过GC就触发GC
gcTriggerCycle: 要求启动新一轮的GC, 已启动则跳过, 手动触发GC的runtime.GC()会使用这个条件

触发条件的判断在gctrigger的test函数.
其中gcTriggerHeap和gcTriggerTime这两个条件是自然触发的, gcTriggerHeap的判断代码如下:

return memstats.heap_live >= memstats.gc_trigger

heap_live的增加在上面对分配器的代码分析中可以看到, 当值达到gc_trigger就会触发GC, 那么gc_trigger是如何决定的?
gc_trigger的计算在gcSetTriggerRatio函数中, 公式是:

trigger = uint64(float64(memstats.heap_marked) * (1 + triggerRatio))

当前标记存活的大小乘以1+系数triggerRatio, 就是下次出发GC需要的分配量.
triggerRatio在每次GC后都会调整, 计算triggerRatio的函数是encCycle, 公式是:

 1const triggerGain = 0.5
 2// 目标Heap增长率, 默认是1.0
 3goalGrowthRatio := float64(gcpercent) / 100
 4// 实际Heap增长率, 等于总大小/存活大小-1
 5actualGrowthRatio := float64(memstats.heap_live)/float64(memstats.heap_marked) - 1
 6// GC标记阶段的使用时间(因为endCycle是在Mark Termination阶段调用的)
 7assistDuration := nanotime() - c.markStartTime
 8// GC标记阶段的CPU占用率, 目标值是0.25
 9utilization := gcGoalUtilization
10if assistDuration > 0 {
11    // assistTime是G辅助GC标记对象所使用的时间合计
12    // (nanosecnds spent in mutator assists during this cycle)
13    // 额外的CPU占用率 = 辅助GC标记对象的总时间 / (GC标记使用时间 * P的数量)
14    utilization += float64(c.assistTime) / float64(assistDuration*int64(gomaxprocs))
15}
16// 触发系数偏移值 = 目标增长率 - 原触发系数 - CPU占用率 / 目标CPU占用率 * (实际增长率 - 原触发系数)
17// 参数的分析:
18// 实际增长率越大, 触发系数偏移值越小, 小于0时下次触发GC会提早
19// CPU占用率越大, 触发系数偏移值越小, 小于0时下次触发GC会提早
20// 原触发系数越大, 触发系数偏移值越小, 小于0时下次触发GC会提早
21triggerError := goalGrowthRatio - memstats.triggerRatio - utilization/gcGoalUtilization*(actualGrowthRatio-memstats.triggerRatio)
22// 根据偏移值调整触发系数, 每次只调整偏移值的一半(渐进式调整)
23triggerRatio := memstats.triggerRatio + triggerGain*triggerError

公式中的"目标Heap增长率"可以通过设置环境变量"GOGC"调整, 默认值是100, 增加它的值可以减少GC的触发.
设置"GOGC=off"可以彻底关掉GC.

gcTriggerTime的判断代码如下:

lastgc := int64(atomic.Load64(&memstats.last_gc_nanotime))return lastgc != 0 && t.now-lastgc > forcegcperiod

forcegcperiod的定义是2分钟, 也就是2分钟内没有执行过GC就会强制触发.

三色的定义(黑, 灰, 白)

我看过的对三色GC的"三色"这个概念解释的最好的文章就是这一篇了, 强烈建议先看这一篇中的讲解.
"三色"的概念可以简单的理解为:

黑色: 对象在这次GC中已标记, 且这个对象包含的子对象也已标记
灰色: 对象在这次GC中已标记, 但这个对象包含的子对象未标记
白色: 对象在这次GC中未标记

在go内部对象并没有保存颜色的属性, 三色只是对它们的状态的描述,
白色的对象在它所在的span的gcmarkBits中对应的bit为0,
灰色的对象在它所在的span的gcmarkBits中对应的bit为1, 并且对象在标记队列中,
黑色的对象在它所在的span的gcmarkBits中对应的bit为1, 并且对象已经从标记队列中取出并处理.
gc完成后, gcmarkBits会移动到allocBits然后重新分配一个全部为0的bitmap, 这样黑色的对象就变为了白色.

写屏障(Write Barrier)

因为go支持并行GC, GC的扫描和go代码可以同时运行, 这样带来的问题是GC扫描的过程中go代码有可能改变了对象的依赖树,
例如开始扫描时发现根对象A和B, B拥有C的指针, GC先扫描A, 然后B把C的指针交给A, GC再扫描B, 这时C就不会被扫描到.
为了避免这个问题, go在GC的标记阶段会启用写屏障(Write Barrier).

启用了写屏障(Write Barrier)后, 当B把C的指针交给A时, GC会认为在这一轮的扫描中C的指针是存活的,
即使A可能会在稍后丢掉C, 那么C就在下一轮回收.
写屏障只针对指针启用, 而且只在GC的标记阶段启用, 平时会直接把值写入到目标地址.

go在1.9开始启用了混合写屏障(Hybrid Write Barrier), 伪代码如下:

1writePointer(slot, ptr):
2    shade(*slot)
3    if any stack is grey:
4        shade(ptr)
5    *slot = ptr

混合写屏障会同时标记指针写入目标的"原指针"和“新指针".

标记原指针的原因是, 其他运行中的线程有可能会同时把这个指针的值复制到寄存器或者栈上的本地变量,
因为复制指针到寄存器或者栈上的本地变量不会经过写屏障, 所以有可能会导致指针不被标记, 试想下面的情况:

[go] b = obj
[go] oldx = nil
[gc] scan oldx...
[go] oldx = b.x // 复制b.x到本地变量, 不进过写屏障
[go] b.x = ptr // 写屏障应该标记b.x的原值
[gc] scan b...
如果写屏障不标记原值, 那么oldx就不会被扫描到.

标记新指针的原因是, 其他运行中的线程有可能会转移指针的位置, 试想下面的情况:

[go] a = ptr
[go] b = obj
[gc] scan b...
[go] b.x = a // 写屏障应该标记b.x的新值
[go] a = nil
[gc] scan a...
如果写屏障不标记新值, 那么ptr就不会被扫描到.

混合写屏障可以让GC在并行标记结束后不需要重新扫描各个G的堆栈, 可以减少Mark Termination中的STW时间.
除了写屏障外, 在GC的过程中所有新分配的对象都会立刻变为黑色, 在上面的mallocgc函数中可以看到.

辅助GC(mutator assist)

为了防止heap增速太快, 在GC执行的过程中如果同时运行的G分配了内存, 那么这个G会被要求辅助GC做一部分的工作.
在GC的过程中同时运行的G称为"mutator", "mutator assist"机制就是G辅助GC做一部分工作的机制.

辅助GC做的工作有两种类型, 一种是标记(Mark), 另一种是清扫(Sweep).
辅助标记的触发可以查看上面的mallocgc函数, 触发时G会帮助扫描"工作量"个对象, 工作量的计算公式是:

debtBytes * assistWorkPerByte

意思是分配的大小乘以系数assistWorkPerByte, assistWorkPerByte的计算在函数revise中, 公式是:

 1// 等待扫描的对象数量 = 未扫描的对象数量 - 已扫描的对象数量
 2scanWorkExpected := int64(memstats.heap_scan) - c.scanWork
 3if scanWorkExpected < 1000 {
 4    scanWorkExpected = 1000
 5}
 6// 距离触发GC的Heap大小 = 期待触发GC的Heap大小 - 当前的Heap大小
 7// 注意next_gc的计算跟gc_trigger不一样, next_gc等于heap_marked * (1 + gcpercent / 100)
 8heapDistance := int64(memstats.next_gc) - int64(atomic.Load64(&memstats.heap_live))
 9if heapDistance <= 0 {
10    heapDistance = 1
11}
12// 每分配1 byte需要辅助扫描的对象数量 = 等待扫描的对象数量 / 距离触发GC的Heap大小
13c.assistWorkPerByte = float64(scanWorkExpected) / float64(heapDistance)
14c.assistBytesPerWork = float64(heapDistance) / float64(scanWorkExpected)

和辅助标记不一样的是, 辅助清扫申请新span时才会检查, 而辅助标记是每次分配对象时都会检查.
辅助清扫的触发可以看上面的cacheSpan函数, 触发时G会帮助回收"工作量"页的对象, 工作量的计算公式是:

spanBytes * sweepPagesPerByte // 不完全相同, 具体看deductSweepCredit函数

意思是分配的大小乘以系数sweepPagesPerByte, sweepPagesPerByte的计算在函数gcSetTriggerRatio中, 公式是:

 1// 当前的Heap大小
 2heapLiveBasis := atomic.Load64(&memstats.heap_live)
 3// 距离触发GC的Heap大小 = 下次触发GC的Heap大小 - 当前的Heap大小
 4heapDistance := int64(trigger) - int64(heapLiveBasis)
 5heapDistance -= 1024 * 1024
 6if heapDistance < _PageSize {
 7    heapDistance = _PageSize
 8}
 9// 已清扫的页数
10pagesSwept := atomic.Load64(&mheap_.pagesSwept)
11// 未清扫的页数 = 使用中的页数 - 已清扫的页数
12sweepDistancePages := int64(mheap_.pagesInUse) - int64(pagesSwept)
13if sweepDistancePages <= 0 {
14    mheap_.sweepPagesPerByte = 0
15} else {
16    // 每分配1 byte(的span)需要辅助清扫的页数 = 未清扫的页数 / 距离触发GC的Heap大小
17    mheap_.sweepPagesPerByte = float64(sweepDistancePages) / float64(heapDistance)
18}