简单缓存池设计

__int256

已于 2023-10-27 11:51:37 修改

阅读量83

点赞数

分类专栏： CoreKV学习笔记文章标签：缓存 go 数据结构算法

于 2023-10-25 23:06:40 首次发布

本文链接：https://blog.csdn.net/qq_42917242/article/details/134041967

版权

CoreKV学习笔记专栏收录该内容

6 篇文章 1 订阅

订阅专栏

缓存池

什么使缓存池？我们设想一个情景，有大量的用户需要访问数据，然而这些数据非常多，ram存不下，都存在磁盘上，每次取数据需要从磁盘里读，这样就会非常缓慢，对于一些低延时的服务无法满足。
如果我们将拿出来的数据暂时存在ram中，ram存不下了就放弃掉一些数据，当我们再次访问到一些最近访问的数据时就可以快速的从缓存中取出了。

缓存 = HashMap + 淘汰算法
LRU与LFU都是设计缓存池的算法

LRU-最近最少使用算法

当缓存池存满时，放弃到最久没有访问到的数据，每次访问或修改都将缓存中的元素放到最开头
采用双向链表的方式模拟实现
以下是力扣LRU题目

package main

import (
	"fmt"
)

type List struct {
	key int
	val int
	nxt *List
	pre *List
}

const N int = 1e5 + 5

var data [N]List
var cnt int

type LRUCache struct {
	cap   int
	n     int
	dummy *List
	tp    map[int]*List
}

func Constructor(capacity int) LRUCache {
	res := LRUCache{
		cap:   capacity,
		n:     0,
		dummy: &data[cnt],
		tp:    map[int]*List{},
	}
	cnt++
	res.dummy.pre = res.dummy
	res.dummy.nxt = res.dummy
	return res
}

func (this *LRUCache) Get(key int) int {
	if this.tp[key] == nil {
		return -1
	}
	tmp := this.tp[key]
	tmp.pre.nxt = tmp.nxt
	tmp.nxt.pre = tmp.pre
	tmp.pre = this.dummy
	tmp.nxt = this.dummy.nxt
	this.dummy.nxt.pre = tmp
	this.dummy.nxt = tmp
	return tmp.val
}

func (this *LRUCache) Put(key int, value int) {
	if this.tp[key] != nil {
		tmp := this.tp[key]
		tmp.pre.nxt = tmp.nxt
		tmp.nxt.pre = tmp.pre

		tmp.pre = this.dummy
		tmp.nxt = this.dummy.nxt
		this.dummy.nxt.pre = tmp
		this.dummy.nxt = tmp
		tmp.val = value
	} else {
		if this.n == this.cap {
			tmp := this.dummy.pre
			delete(this.tp, tmp.key)
			tmp.pre.nxt = tmp.nxt
			tmp.nxt.pre = tmp.pre //go 会自动回收内存
			this.n--
		}
		tmp := &data[cnt]
		cnt++
		tmp.key = key
		tmp.val = value
		this.tp[key] = tmp
		tmp.pre = this.dummy
		tmp.nxt = this.dummy.nxt
		this.dummy.nxt.pre = tmp
		this.dummy.nxt = tmp
		this.n++
	}
}

func main() {
	lRUCache := Constructor(2)
	lRUCache.Put(1, 1)
	lRUCache.Put(2, 2)
	fmt.Println(lRUCache.Get(1)) // 返回 1
	lRUCache.Put(3, 3)           // 该操作会使得关键字 2 作废，缓存是 {1=1, 3=3}
	fmt.Println(lRUCache.Get(2)) // 返回 -1 (未找到)
	lRUCache.Put(4, 4)           // 该操作会使得关键字 1 作废，缓存是 {4=4, 3=3}
	fmt.Println(lRUCache.Get(1)) // 返回 -1 (未找到)
	fmt.Println(lRUCache.Get(3)) // 返回 3
	fmt.Println(lRUCache.Get(4))
}

LFU-最不经常使用算法

LFU 算法是 LRU 算法的改进，我们想象LRU算法的不合理之处
如果之前某个数据出现了非常多次，但是后面出现了大量新数据，就会把之前出现的数据顶替掉
为了解决这个问题我们再记录其访问频率，将缓存池中的数据按照访问频率插入到对应位置，如果访问频率相同，再按照最近访问进行比较

LFU 的优点

对于热点数据命中率会提高

LFU 的缺点

对于突发的稀疏流量处理较差

实现

按层维护LRU，每次将一个节点从当前层拿出来后放入下一层

定义

type List struct {
	key int
	val int
	cnt int//使用次数
	nxt *List
	pre *List
}
type LFUCache struct {
	cap     int
	n       int
	minFreq int//最小使用次数
	maxFreq int//最大使用次数
	dummy   map[int]*List //每层哨兵节点
	tp      map[int]*List //每个 key 对应每层一个节点
}

完整代码

package main

import (
	"fmt"
)

type List struct {
	key int
	val int
	cnt int
	nxt *List
	pre *List
}

func (this *List) Remove() {
	this.nxt.pre = this.pre
	this.pre.nxt = this.nxt
}

const N int = 1e5 + 5

type LFUCache struct {
	cap     int
	n       int
	minFreq int
	maxFreq int
	dummy   map[int]*List
	tp      map[int]*List
}

func Constructor(capacity int) LFUCache {
	res := LFUCache{
		cap:     capacity,
		n:       0,
		minFreq: 0,
		maxFreq: 0,
		dummy: map[int]*List{
			0: &List{
				key: 0,
				val: 0,
				cnt: 0,
				nxt: nil,
				pre: nil,
			},
		},
		tp: map[int]*List{},
	}
	res.dummy[0].pre = res.dummy[0]
	res.dummy[0].nxt = res.dummy[0]
	return res
}
func (this *LFUCache) update() {
	if this.dummy[this.maxFreq] == nil {
		this.dummy[this.maxFreq] = &List{
			key: 0,
			val: 0,
			cnt: 0,
			nxt: nil,
			pre: nil,
		}
		this.dummy[this.maxFreq].nxt = this.dummy[this.maxFreq]
		this.dummy[this.maxFreq].pre = this.dummy[this.maxFreq]
	}
}
func (this *LFUCache) Get(key int) int {
	if this.tp[key] == nil {
		return -1
	}
	tmp := this.tp[key]
	tmp.Remove()
	tmp.cnt++
	if tmp.cnt > this.maxFreq {
		this.maxFreq = tmp.cnt
		this.update()
	}
	tmp.pre = this.dummy[tmp.cnt]
	tmp.nxt = this.dummy[tmp.cnt].nxt
	this.dummy[tmp.cnt].nxt.pre = tmp
	this.dummy[tmp.cnt].nxt = tmp
	return tmp.val
}

func (this *LFUCache) Put(key int, value int) {
	if this.tp[key] != nil {
		tmp := this.tp[key]
		tmp.Remove()
		tmp.cnt++
		if tmp.cnt > this.maxFreq {
			this.maxFreq = tmp.cnt
			this.update()
		}
		tmp.pre = this.dummy[tmp.cnt]
		tmp.nxt = this.dummy[tmp.cnt].nxt
		this.dummy[tmp.cnt].nxt.pre = tmp
		this.dummy[tmp.cnt].nxt = tmp
		tmp.val = value
	} else {
		if this.n == this.cap {
			for this.n > 0 && this.dummy[this.minFreq].pre == this.dummy[this.minFreq] {
				this.minFreq++
			}
			tmp := this.dummy[this.minFreq].pre
			delete(this.tp, tmp.key)
			tmp.Remove() //go 会自动回收内存
			this.n--
		}
		tmp := &List{
			key: 0,
			val: 0,
			cnt: 0,
			nxt: nil,
			pre: nil,
		}
		tmp.key = key
		tmp.val = value
		this.tp[key] = tmp
		this.minFreq = 0
		tmp.pre = this.dummy[tmp.cnt]
		tmp.nxt = this.dummy[tmp.cnt].nxt
		this.dummy[tmp.cnt].nxt.pre = tmp
		this.dummy[tmp.cnt].nxt = tmp
		this.n++
	}
}

func main() {
	lRUCache := Constructor(2)
	lRUCache.Put(1, 1)
	lRUCache.Put(2, 2)
	fmt.Println(lRUCache.Get(1)) // 返回 1
	lRUCache.Put(3, 3)           // 该操作会使得关键字 2 作废，缓存是 {1=1, 3=3}
	fmt.Println(lRUCache.Get(2)) // 返回 -1 (未找到)
	fmt.Println(lRUCache.Get(3)) // 返回 -1 (未找到)
	fmt.Println(lRUCache.Get(1)) // 返回 -1 (未找到)
	fmt.Println(lRUCache.Get(3)) // 返回 3
	fmt.Println(lRUCache.Get(4))
}

W-TinyLFU 算法

现在我们总结一下 LRU 和 LFU 的缺点

LRU 容易忽略高频数据
LFU 难以应对突发流量，很久以前出现的老数据无法清除

那么我们将 LFU 和 LRU 的思想结合起来，就是 W-TinyLFU
对于老数据，我们可以加上时间维度，把他遗忘掉

窗口设计

将缓存分为 99% 的主缓存和 1% 的 LRU缓存（LRU就像是在主缓存上开了一个窗口所以叫 W-LRU）
新数据进入时，首先进入 LRU 窗口，在 LRU 中被淘汰的数据的数据根据 SLRU策略放入主缓存

SLRU策略

SLRU 分为 Stage1(20%) 和 Stage2(80%) 两个区域，Stage1 中存储非高频数据，Stage2 中存储高频数
Stage1 和 Stage2 分别是两个 LRU，当 Stage1 中的数据被再次访问到时，将会进入 Stage2
若 Stage2 缓存满了，则交换两个数据

Count-Min Sketch-CmSketch计数器

Sketch 计数器

考虑到每条数据访问次数大概很少，而且还有遗忘策略，不需要非常高的访问次数，我们可以用 $4$ 个 $bi t$ 来统计它
对每条数据用哈希映射到块大小为 $4$ 的 bitMap 上，但是可能会哈希冲突，如果我们每次都让其自然增加，那么它的频率会可能会大于实际频率
但是这个问题并不会造成很大的影响，因为我们并不需要精确知道每个数字的出现频率，只需要知道其大概分布即可
这就是 Sketch 计数器

代码

维护一个 byte 数组

type cmRow []byte

func newCmRow(num int64) cmRow {
	return make(cmRow, num/2)
}

func (this cmRow) get(n uint64) byte {
	return this[n>>1] >> ((n & 1) << 2) & 0x0f
}
func (this cmRow) increment(n uint64) {
	offset := (n & 1) << 2
	i := n >> 1
	if (this[i]>>offset)&0x0f < 15 {
		this[i] += 1 << offset
	}
}
func (this cmRow) reset() {
	for i := range this {
		this[i] = (this[i] >> 1) & 0x77
	}
}
func (this cmRow) clear() {
	for i := range this {
		this[i] = 0
	}
}

CM-Sketch 进一步提高 Sketch 计数器的精度

前面我们提到，其精度降低是因为哈希冲突，如果我们多用几个哈希函数，映射到不同的 $bi tM a p$ 上，查询时对不同的 bitMap 取 $min$ ，就可以有效降低精度

概率分析

一个计数器中某个数冲突的概率是存在另一个数和他映射的区域相同，设 $bi tM a p$ 存储了 $n$ 个数据，长度为 $m$
$\frac n m$
假设每个数的频率是完全随机的，设原本数的频率为 $x$ ，那么这个数增大超过 $i$ 的概率大概是
$\frac {(16 - x - (i - x)) p} {16}(i-x) [i > x]$
如果我们多开 $q$ 个哈希表，再对他们的值进行取 $min$ 操作，那么一个数想要增大就必须所有哈希表中冲突的数都大于某个数，根据全概率公式，其增大期望为
$\sum_{i = x}^{16} P^q(i)$
可以看到， $i$ 与 $x$ 的距离越远，哈希表的个数越多其 $E (x)$ 越趋近于某个数

代码

直接套用上面的 Sketch

func next2Power(x int64) int64 { // 最近上取整2的整数次幂
	x-- // 100 返回 100，101 返回 1000
	x |= x >> 1
	x |= x >> 2
	x |= x >> 4
	x |= x >> 8
	x |= x >> 16
	x |= x >> 32
	x++
	return x
}

const Size = 4

type cmSketch struct {
	rows [Size]cmRow
	seed [Size]uint64
	mask uint64 //用来将哈希结果映射到行中
}

func newCmSketch(num int64) *cmSketch { // num > 0
	num = next2Power(num)
	sketch := &cmSketch{mask: uint64(num - 1)}
	rd := rand.New(rand.NewSource(time.Now().UnixNano()))
	for i := 0; i < Size; i++ {
		sketch.seed[i] = rd.Uint64()
		sketch.rows[i] = newCmRow(num)
	}
	return sketch
}
func (this *cmSketch) Increment(hashed uint64) {
	for i := range this.rows {
		this.rows[i].increment((hashed ^ this.seed[i]) & this.mask)
	}
}
func (this *cmSketch) Estimate(hashed uint64) int64 {
	mn := byte(255)
	for i := range this.rows {
		val := this.rows[i].get((hashed ^ this.seed[i]) & this.mask)
		if val < mn {
			mn = val
		}
	}
	return int64(mn)
}
func (this *cmSketch) Reset() {//遗忘策略
	for _, row := range this.rows {
		row.reset()
	}
}
func (this *cmSketch) Clear() {
	for _, row := range this.rows {
		row.clear()
	}
}

遗忘策略

每次将所有频率右移一位即可

TinyLFU算法

TinyLFU 作为桥梁连结了窗口与主存
数据在 W-LRU 中被淘汰时需要进入 Stage1，如果 Stage1 满了，这时就要比较谁比较优秀，从Cm-Sketch 中拿到访问频率进行比较

布隆过滤器

考虑一种突发情况，大量数据只出现一次，因为我们存在遗忘策略，这种情况会导致 LRU 窗口和 Stage1 频繁交换数据，用布隆过滤器记录哪些数据至少出现了一次，出现过的再放入 Stage1

代码

定义

type Cache struct {
	wLRU        *LRUCache
	sLRU        *SLRU
	cms         *cmSketch
	bloom       *Bloom
	maxCnt, cnt int
	tp          map[uint64]*List
}

type List struct {
	key   uint64
	val   interface{}
	stage int // 0->wlru, 1->Stage1, 2->Stage2
	nxt   *List
	pre   *List
}

var cnt int

type LRUCache struct {
	cap   int
	n     int
	dummy *List
	tp    map[uint64]*List
}

type SLRU struct {
	tp             map[uint64]*List
	cap1, cap2     int
	stage1, stage2 *LRUCache
}

函数与接口

Cache

func (this *Cache) update(list *List) {
	this.cms.Increment(list.key)
	if list.stage == 1 {
		list.pre.nxt = list.nxt
		list.nxt.pre = list.pre
		this.sLRU.stage1.n--
		lft := this.sLRU.stage2.Put(list.key, list.val, 2) //待优化，直接把指针插进去
		if lft != nil {
			this.sLRU.stage1.Put(list.key, list.val, 1)
		}
	}
}
func (this *Cache) Get(key uint64) interface{} {
	this.cnt++
	if this.cnt == this.maxCnt {
		this.cms.Reset()
		this.cnt = 0
	}
	res := this.tp[key]
	if res != nil {
		this.update(res)
	}
	return res
}
func (this *Cache) Put(key uint64, value interface{}) {
	this.cnt++
	if this.cnt == this.maxCnt {
		this.cms.Reset()
		this.cnt = 0
	}
	list := this.tp[key]
	if list != nil {
		list.val = value
		this.update(list)
		return
	}
	list = this.wLRU.Put(key, value, 0)
	if list != nil {
		if !this.bloom.CheckUint(list.key) {
			this.bloom.InsertUint(list.key)
			return
		}
		this.sLRU.PutWithCompare(list.key, list.val, this.cms)
	}
}

LRU

func (this *LRUCache) Get(key uint64) *List {
	if this.tp[key] == nil {
		return nil
	}
	tmp := this.tp[key]
	tmp.pre.nxt = tmp.nxt
	tmp.nxt.pre = tmp.pre
	tmp.pre = this.dummy
	tmp.nxt = this.dummy.nxt
	this.dummy.nxt.pre = tmp
	this.dummy.nxt = tmp
	return tmp
}

func (this *LRUCache) Put(key uint64, value interface{}, st int) *List { //拿到溢出的元素
	var res *List = nil
	if this.tp[key] != nil {
		tmp := this.tp[key]
		tmp.pre.nxt = tmp.nxt
		tmp.nxt.pre = tmp.pre

		tmp.pre = this.dummy
		tmp.nxt = this.dummy.nxt
		this.dummy.nxt.pre = tmp
		this.dummy.nxt = tmp
		tmp.val = value
	} else {
		if this.n == this.cap {
			tmp := this.dummy.pre
			delete(this.tp, tmp.key)
			tmp.pre.nxt = tmp.nxt
			tmp.nxt.pre = tmp.pre //go 会自动回收内存
			res = tmp
			this.n--
		}
		tmp := new(List)
		tmp.key = key
		tmp.val = value
		tmp.stage = st
		this.tp[key] = tmp
		tmp.pre = this.dummy
		tmp.nxt = this.dummy.nxt
		this.dummy.nxt.pre = tmp
		this.dummy.nxt = tmp
		this.n++
	}
	return res
}

SLRU

func NewSLRU(c1, c2 int, tp map[uint64]*List) *SLRU {
	return &SLRU{
		tp:     tp,
		cap1:   c1,
		cap2:   c2,
		stage1: NewLRU(c1, tp),
		stage2: NewLRU(c2, tp),
	}
}
func (this *SLRU) PutWithCompare(key uint64, value interface{}, cms *cmSketch) {
	if this.tp[key] != nil {
		tmp := this.tp[key]
		tmp.pre.nxt = tmp.nxt
		tmp.nxt.pre = tmp.pre

		tmp.pre = this.stage1.dummy
		tmp.nxt = this.stage1.dummy.nxt
		this.stage1.dummy.nxt.pre = tmp
		this.stage1.dummy.nxt = tmp
		tmp.val = value
	} else {
		if this.stage1.n == this.stage1.cap {
			tmp := this.stage1.dummy.pre
			if cms.Estimate(key) < cms.Estimate(tmp.key) {
				return
			}
			delete(this.tp, tmp.key)
			tmp.pre.nxt = tmp.nxt
			tmp.nxt.pre = tmp.pre //go 会自动回收内存
			this.stage1.n--
		}
		tmp := new(List)
		tmp.key = key
		tmp.val = value
		tmp.stage = 1
		this.tp[key] = tmp
		tmp.pre = this.stage1.dummy
		tmp.nxt = this.stage1.dummy.nxt
		this.stage1.dummy.nxt.pre = tmp
		this.stage1.dummy.nxt = tmp
		this.stage1.n++
	}
}