深入理解高性能字节池 bytebufferpool

最新推荐文章于 2024-05-15 09:48:00 发布

AirGo.

最新推荐文章于 2024-05-15 09:48:00 发布

阅读量1.3k

点赞数

分类专栏： GoLang 文章标签： GoLang GC优化 bytebufferpool 高性能源码

码字不易，转载请附原链，搬砖繁忙回复不及时见谅，技术交流请加QQ群：909211071，或关注微信公众号：程序猿AirGo

本文链接：https://blog.csdn.net/why444216978/article/details/122016389

版权

GoLang 专栏收录该内容

53 篇文章 7 订阅

订阅专栏

搬砖繁忙回复不及时见谅，技术交流请加QQ群：909211071，或关注公众号：程序猿AirGo

为何需要字节池

在 Go 语言编程中，在从 io.Reader 中读取数据时，我们都要创建一个字节切片 []byte 去存储，在高频调用或并发比较高的场景中，需要频繁的进行内存申请和释放，增大了 GC 的压力，所以这时候需要采用 “字节池” 来优化。

sync.Pool

Go 标准库中为我们提供了 sync.Pool，可以很简单地实现一个字节池：

pool := &sync.Pool{
    New: func() interface{} {
        return make([]byte, 256)
    },
}

但是这种方式实现的字节池有几个缺点：

每个资源大小都是固定的，有些场景不需要这么多的内存。
出现非常大的数据时，会导致 []byte 扩容，再放回池子中会占用很大内存。

为了解决上面的两个问题，就引出我们本文的主角：bytebufferpool。

bytebufferpool

这是一个大佬写的开源字节池，著名的 fasthttp 就使用了它，作者也是同一个，相比 sync.Pool 它它提供了如下能力：

按照数据大小，对比特池分组，不同长度的数据放在能容纳的最小组里。
占用内存过大的 []byte 禁止放回池内。

下面我们就从源码角度去剖析一下它如何实现的，为了方便阅读，直接把解析以注释的形式标注在源码中了：

package bytebufferpool
 
import (
	"sort"
	"sync"
	"sync/atomic"
)
 
const (
	minBitSize = 6  //CPU L1缓存大小 64K，内存连续布局，亲和cache命中
	steps      = 20 //区间数20
 
	minSize = 1 << minBitSize               //(0, 64]为最小区间
	maxSize = 1 << (minBitSize + steps - 1) //(16777216, 33554432]为最大区间
 
	calibrateCallsThreshold = 42000 //触发calibrate最大使用次数阈值
	maxPercentile           = 0.95  //由高到低加和使用次数，大于95%的丢弃
)
 
// Pool represents byte buffer pool.
//
// Distinct pools may be used for distinct types of byte buffers.
// Properly determined byte buffer types with their own pools may help reducing
// memory waste.
type Pool struct {
	calls       [steps]uint64 //不同区间的使用次数
	calibrating uint64        //并发calibrate标记
 
	defaultSize uint64 //申请[]byte默认大小，取使用次数最高的
	maxSize     uint64 //可放回pool中的最大[]byte长度
 
	pool sync.Pool
}
 
var defaultPool Pool
 
// Get returns an empty byte buffer from the pool.
//
// Got byte buffer may be returned to the pool via Put call.
// This reduces the number of memory allocations required for byte buffer
// management.
func Get() *ByteBuffer { return defaultPool.Get() }
 
// Get returns new byte buffer with zero length.
//
// The byte buffer may be returned to the pool via Put after the use
// in order to minimize GC overhead.
func (p *Pool) Get() *ByteBuffer {
	//如果sync.Pool有可用buf直接返回
	v := p.pool.Get()
	if v != nil {
		return v.(*ByteBuffer)
	}
	//没有则新建一个容量为defaultSize的ByteBuffer
	return &ByteBuffer{
		B: make([]byte, 0, atomic.LoadUint64(&p.defaultSize)),
	}
}
 
// Put returns byte buffer to the pool.
//
// ByteBuffer.B mustn't be touched after returning it to the pool.
// Otherwise data races will occur.
func Put(b *ByteBuffer) { defaultPool.Put(b) }
 
// Put releases byte buffer obtained via Get to the pool.
//
// The buffer mustn't be accessed after returning to the pool.
func (p *Pool) Put(b *ByteBuffer) {
	//获得对应长度区间下标
	idx := index(len(b.B))
 
	//使用次数加一，如果超过阈值则执行calibrate
	if atomic.AddUint64(&p.calls[idx], 1) > calibrateCallsThreshold {
		p.calibrate()
	}
 
	//未设置maxSize或容量小于maxSize才放回，否则直接丢弃
	maxSize := int(atomic.LoadUint64(&p.maxSize))
	if maxSize == 0 || cap(b.B) <= maxSize {
		b.Reset()
		p.pool.Put(b)
	}
}
 
func (p *Pool) calibrate() {
	//CAS保证只有一个goroutine执行
	if !atomic.CompareAndSwapUint64(&p.calibrating, 0, 1) {
		return
	}
 
	//按照使用次数从大到小排序
	a := make(callSizes, 0, steps)
	var callsSum uint64
	for i := uint64(0); i < steps; i++ {
		calls := atomic.SwapUint64(&p.calls[i], 0)
		callsSum += calls
		a = append(a, callSize{
			calls: calls,
			size:  minSize << i,
		})
	}
	sort.Sort(a)
 
	defaultSize := a[0].size
	maxSize := defaultSize
 
	//只取使用次数前95%的，计算对应范围内的maxSize，防止将使用较少的大容量对象放回对象池，从而占用太多内存
	maxSum := uint64(float64(callsSum) * maxPercentile)
	callsSum = 0
	for i := 0; i < steps; i++ {
		if callsSum > maxSum {
			break
		}
		callsSum += a[i].calls
		size := a[i].size
		if size > maxSize {
			maxSize = size
		}
	}
 
	//更新defaultSize和maxSize，atomic保证并发安全
	atomic.StoreUint64(&p.defaultSize, defaultSize)
	atomic.StoreUint64(&p.maxSize, maxSize)
 
	atomic.StoreUint64(&p.calibrating, 0)
}
 
type callSize struct {
	calls uint64
	size  uint64
}
 
type callSizes []callSize
 
func (ci callSizes) Len() int {
	return len(ci)
}
 
func (ci callSizes) Less(i, j int) bool {
	return ci[i].calls > ci[j].calls
}
 
func (ci callSizes) Swap(i, j int) {
	ci[i], ci[j] = ci[j], ci[i]
}
 
// index 根据存入[]byte长度向左移运算，计算区间下标
func index(n int) int {
	n--
	n >>= minBitSize
	idx := 0
	for n > 0 {
		n >>= 1
		idx++
	}
	if idx >= steps {
		idx = steps - 1
	}
	return idx
}

我们可以看到，仅用了短短的一百多行代码，就实现了一个能够分组，且可动态调整池分组大小的字节池。