数据库内核缓冲池替换策略笔记

编程小白进阶札记

已于 2023-09-18 20:02:32 修改

阅读量177

点赞数

文章标签：数据库笔记

于 2023-08-10 21:58:59 首次发布

本文链接：https://blog.csdn.net/weixin_47895938/article/details/132219283

版权

数据库内核缓冲池替换策略笔记

LRU

LRU（Least Recently Used）是一种缓存淘汰策略，用于在缓存空间不足时决定哪些项目被移除以腾出空间来存储新的数据。LRU策略的基本思想是：当需要淘汰一个缓存项时，选择最近最少使用的缓存项进行移除。

具体来说，LRU策略会维护一个访问顺序列表，当有数据被访问时，该数据项会被移动到列表的最前面，表示它是最近访问过的。当缓存达到容量上限且需要插入新数据时，LRU策略会移除列表末尾的数据项，因为那些在最长时间内没有被访问的数据项被认为是最近最少使用的。

例如，假设缓存容量为3，按以下访问顺序访问缓存项：A -> B -> C -> A -> D。那么在进行D的插入时，缓存中的数据项顺序可能变为：C -> A -> D，B被淘汰，因为它是最近最少使用的。

LRU策略在许多情况下表现良好，特别适合对数据访问模式有时局部性（局部数据集会被反复访问）的场景。

实现

#include <iostream>
#include <list>
#include <unordered_map>

class LRUCache {
public:
    LRUCache(int capacity) : capacity(capacity) {}

    int get(int key) {
        if (cacheMap.find(key) == cacheMap.end()) {
            return -1; // Key not found
        }

        // Move the accessed item to the front of the list (most recent)
        cacheList.splice(cacheList.begin(), cacheList, cacheMap[key]);
        return cacheMap[key]->second;
    }

    void put(int key, int value) {
        if (cacheMap.find(key) != cacheMap.end()) {
            // Key already exists, update its value and move to the front
            cacheMap[key]->second = value;
            cacheList.splice(cacheList.begin(), cacheList, cacheMap[key]);
        } else {
            if (cacheMap.size() >= capacity) {
                // Remove the least recently used item
                int keyToRemove = cacheList.back().first;
                cacheMap.erase(keyToRemove);
                cacheList.pop_back();
            }

            // Insert the new item to the front of the list
            cacheList.emplace_front(key, value);
            cacheMap[key] = cacheList.begin();
        }
    }

private:
    int capacity;
    std::list<std::pair<int, int>> cacheList; // (key, value) pairs
    std::unordered_map<int, std::list<std::pair<int, int>>::iterator> cacheMap; // key -> list iterator
};

int main() {
    LRUCache lruCache(2); // Create an LRU cache with capacity 2

    lruCache.put(1, 1);
    lruCache.put(2, 2);
    std::cout << lruCache.get(1) << std::endl; // Output: 1
    lruCache.put(3, 3); // Evicts key 2
    std::cout << lruCache.get(2) << std::endl; // Output: -1 (not found)
    std::cout << lruCache.get(3) << std::endl; // Output: 3

    return 0;
}

使用链表实现的LRU

#include <iostream>
#include <unordered_map>

class LRUCache {
private:
    struct Node {
        int key;
        int value;
        Node* prev;
        Node* next;
        Node(int k, int v) : key(k), value(v), prev(nullptr), next(nullptr) {}
    };

    int capacity;
    Node* head;
    Node* tail;
    std::unordered_map<int, Node*> cacheMap;

public:
    LRUCache(int c) : capacity(c), head(nullptr), tail(nullptr) {}

    int get(int key) {
        if (cacheMap.find(key) == cacheMap.end()) {
            return -1; // Key not found
        }

        updateAccess(cacheMap[key]);
        return cacheMap[key]->value;
    }

    void put(int key, int value) {
        if (capacity <= 0) {
            return;
        }

        if (cacheMap.find(key) != cacheMap.end()) {
            cacheMap[key]->value = value;
            updateAccess(cacheMap[key]);
        } else {
            if (cacheMap.size() >= capacity) {
                evict();
            }

            Node* newNode = new Node(key, value);
            cacheMap[key] = newNode;
            addToFront(newNode);
        }
    }

    ~LRUCache() {
        for (auto it = cacheMap.begin(); it != cacheMap.end(); ++it) {
            delete it->second;
        }
    }

private:
    void addToFront(Node* node) {
        if (!head) {
            head = tail = node;
        } else {
            node->next = head;
            head->prev = node;
            head = node;
        }
    }

    void updateAccess(Node* node) {
        if (node->prev) {
            node->prev->next = node->next;
            if (node->next) {
                node->next->prev = node->prev;
            } else {
                tail = node->prev;
            }
            addToFront(node);
        }
    }

    void evict() {
        if (tail) {
            cacheMap.erase(tail->key);
            if (tail->prev) {
                tail->prev->next = nullptr;
            } else {
                head = nullptr;
            }
            Node* temp = tail;
            tail = tail->prev;
            delete temp;
        }
    }
};

int main() {
    LRUCache lruCache(3); // Create an LRU cache with capacity 3

    lruCache.put(1, 1);
    lruCache.put(2, 2);
    std::cout << lruCache.get(1) << std::endl; // Output: 1
    lruCache.put(3, 3);
    std::cout << lruCache.get(2) << std::endl; // Output: 2
    lruCache.put(4, 4); // Evicts key 1
    std::cout << lruCache.get(1) << std::endl; // Output: -1 (not found)
    std::cout << lruCache.get(3) << std::endl; // Output: 3

    return 0;
}

双向链表实现的LRU

#include <iostream>
#include <vector>
#include <unordered_map>

using namespace std;

class LRUCache{
public:
	LRUCache(int c) : capacity_(c) {
		L = new Node(-1, -1);
		R = New Node(-1. -1);
		L->right = R;
		R->left = L;
	}
	int get(int key){
		if(hash.count(key) == 0) return -1;
		auto p = hash[key];
		remove(p);
		insert(p);
		return p->value;
	}
	
	void put(int key, int value){
		if(hash.count(key)){
			auto p = hash[key];
			p->value = value;
			remove(p);
			insert(p);
		}else {
			if(hash.size() == n){
				auto p = R->left;
				remove(p);
				hash.earse(p->key);
				delete p;
			}
			auto p = new Node(key.value);
			hash[key] = p;
			insert(p);
		}
	}
	
private:
	struct Node{
		int key, value;
		Node* left, *right;
		Node(int k, int v): key(k), value(v), left(nullptr), right(nullptr) {}
	}*L, *R;
	
	int capacity_;
	unordered_map<int, Node*>

	void remove(Node* p){
		p->right->left = p->left;
		p->left->right = p->right;
	}
	void insert(Node* p){
		p->right = L->right;
		p->left = L;
		L->right->left = p;
		L->right = p;
	}
};

	

int main(){
	
	return 0;
}

CLOCK 优化 LRU

CLOCK算法是基于类似于物理时钟的概念的，它尝试通过设置一个类似于"时钟指针"的标记，来遍历缓存中的元素，并判断哪些元素是"最近使用过"的。CLOCK算法可以看作是LRU算法的一种近似实现，但相对更加高效。

以下是CLOCK算法的基本思想：

每个缓存项都有一个状态位（通常是一个额外的比特位），用于表示是否最近被访问过。
通过一个类似于时钟指针的方式遍历缓存，如果指针指向的缓存项状态位为0（表示最近没有访问过），则选择该项进行替换。
如果指针指向的缓存项状态位为1（表示最近访问过），则将该状态位设置为0，并将指针向前移动。

CLOCK算法的主要优势在于，它可以较为有效地识别近期访问过的元素，同时也能在一定程度上避免了LRU算法可能存在的性能问题（如冷启动、长尾效应等）。然而，CLOCK算法也可能会引入一些代价，例如需要维护额外的状态位，以及在遍历缓存时可能引起一些开销。

需要注意的是，CLOCK算法并不是一种严格的LRU算法，而是一种近似实现。

实现

#include <iostream>
#include <unordered_map>
#include <list>

class CLOCKCache {
private:
    struct CacheNode {
        int key;
        int value;
        bool secondChance;
        CacheNode(int k, int v) : key(k), value(v), secondChance(true) {}
    };

    int capacity;
    std::unordered_map<int, CacheNode> cacheMap;
    std::list<int> clockList;

public:
    CLOCKCache(int c) : capacity(c) {}

    int get(int key) {
        if (cacheMap.find(key) == cacheMap.end()) {
            return -1; // Key not found
        }

        cacheMap[key].secondChance = true;
        return cacheMap[key].value;
    }

    void put(int key, int value) {
        if (capacity <= 0) {
            return;
        }

        if (cacheMap.find(key) != cacheMap.end()) {
            cacheMap[key].value = value;
            cacheMap[key].secondChance = true;
        } else {
            if (cacheMap.size() >= capacity) {
                evict();
            }

            cacheMap[key] = CacheNode(key, value);
            clockList.push_back(key);
        }
    }

private:
    void evict() {
        while (true) {
            int victim = clockList.front();
            clockList.pop_front();

            if (cacheMap[victim].secondChance) {
                cacheMap[victim].secondChance = false;
                clockList.push_back(victim);
            } else {
                cacheMap.erase(victim);
                break;
            }
        }
    }
};

int main() {
    CLOCKCache clockCache(3); // Create a CLOCK cache with capacity 3

    clockCache.put(1, 1);
    clockCache.put(2, 2);
    std::cout << clockCache.get(1) << std::endl; // Output: 1
    clockCache.put(3, 3);
    clockCache.put(4, 4); // Evicts key 2
    std::cout << clockCache.get(2) << std::endl; // Output: -1 (not found)
    std::cout << clockCache.get(3) << std::endl; // Output: 3

    return 0;
}

链表实现

#include <iostream>
#include <unordered_map>

class CLOCKCache {
private:
    struct Node {
        int key;
        int value;
        bool secondChance;
        Node* prev;
        Node* next;
        Node(int k, int v) : key(k), value(v), secondChance(true), prev(nullptr), next(nullptr) {}
    };

    int capacity;
    Node* head;
    Node* tail;
    std::unordered_map<int, Node*> cacheMap;

public:
    CLOCKCache(int c) : capacity(c), head(nullptr), tail(nullptr) {}

    int get(int key) {
        if (cacheMap.find(key) == cacheMap.end()) {
            return -1; // Key not found
        }

        cacheMap[key]->secondChance = true;
        return cacheMap[key]->value;
    }

    void put(int key, int value) {
        if (capacity <= 0) {
            return;
        }

        if (cacheMap.find(key) != cacheMap.end()) {
            cacheMap[key]->value = value;
            cacheMap[key]->secondChance = true;
        } else {
            if (cacheMap.size() >= capacity) {
                evict();
            }

            Node* newNode = new Node(key, value);
            cacheMap[key] = newNode;
            addToFront(newNode);
        }
    }

    ~CLOCKCache() {
        for (auto it = cacheMap.begin(); it != cacheMap.end(); ++it) {
            delete it->second;
        }
    }

private:
    void addToFront(Node* node) {
        if (!head) {
            head = tail = node;
        } else {
            node->next = head;
            head->prev = node;
            head = node;
        }
    }

    void evict() {
        while (true) {
            if (!tail) {
                return;
            }

            if (tail->secondChance) {
                tail->secondChance = false;
                // Move the tail to the head and adjust pointers
                tail->prev->next = nullptr;
                tail->prev = nullptr;
                tail->next = head;
                head->prev = tail;
                head = tail;
                tail = tail->prev;
            } else {
                cacheMap.erase(tail->key);
                if (tail->prev) {
                    tail->prev->next = nullptr;
                } else {
                    head = nullptr;
                }
                Node* temp = tail;
                tail = tail->prev;
                delete temp;
                break;
            }
        }
    }
};

int main() {
    CLOCKCache clockCache(3); // Create a CLOCK cache with capacity 3

    clockCache.put(1, 1);
    clockCache.put(2, 2);
    std::cout << clockCache.get(1) << std::endl; // Output: 1
    clockCache.put(3, 3);
    clockCache.put(4, 4); // Evicts key 2
    std::cout << clockCache.get(2) << std::endl; // Output: -1 (not found)
    std::cout << clockCache.get(3) << std::endl; // Output: 3

    return 0;
}

CLOK 缺点

CLOCK替换策略在某些情况下可能受到顺序泛洪（Sequential Flooding）的影响，从而导致性能下降或不佳的缓存命中率。顺序泛洪是指一系列连续访问的数据块，它们可能会引发缓存替换策略的不适应，因为这些策略主要是基于"时间"的考虑，而不是数据的访问模式。

在顺序泛洪情况下，比如遍历数组、顺序读取大数据集等，连续的访问会导致LRU和CLOCK算法无法有效地维护访问历史，从而可能造成缓存项的频繁替换。这是因为这两种策略都不适合处理连续访问模式，它们更适用于混合、随机访问模式，即不同数据块之间的访问有较大的时间间隔。

一些针对顺序泛洪的替代策略包括：

FIFO（First In, First Out）：按照数据进入缓存的顺序进行淘汰。适用于连续访问模式，但可能不如LRU在其他情况下性能好。
LFU（Least Frequently Used）：根据数据的访问频率进行淘汰。在连续访问模式下，LFU可以一定程度上识别并保留频繁访问的数据块。
ARC（Adaptive Replacement Cache）：综合了LRU和LFU的特点，自适应地根据访问模式来调整缓存淘汰策略。
Bélády’s OPT（Optimal）：理论上最优的缓存替换策略，但需要提前知道未来的访问模式，实际中不可行。
Random：随机选择淘汰的数据块。在某些情况下可能表现得更好，但无法提供一致的性能。

LRU-K

LRU-K是对经典的LRU（Least Recently Used）缓存淘汰策略的一种改进和扩展，它引入了历史信息来更准确地判断哪些缓存项应该被淘汰。LRU-K考虑了过去K次访问的历史，而不仅仅是最近的一次访问。

在经典的LRU策略中，当缓存满时，会选择最久未使用的缓存项进行淘汰。但在某些情况下，这可能不是最优的选择，特别是在存在顺序泛洪（Sequential Flooding）等情况下，经典LRU策略可能会失效。

LRU-K引入了K这个参数，表示考虑过去K次访问的历史。它的工作原理如下：

当访问一个缓存项时，将其移动到链表的最前面（或更新历史访问信息）。
当需要淘汰缓存项时，不仅考虑最久未使用的，还要考虑过去K次访问中最久未使用的。
具体实现时，LRU-K可以使用一个双向链表和一个哈希表来维护缓存项和它们的访问历史。在插入和访问操作时，需要更新链表中的顺序，而在淘汰操作时，需要根据历史访问信息选择最适合淘汰的项。

LRU-K可以根据不同的场景和访问模式进行调整K值，从而在不同情况下获得更好的性能。它相对于经典LRU策略来说，在一些特殊情况下，例如频繁顺序访问或周期性访问模式，可能能够更准确地识别和保留需要缓存的数据项。

LRU-K实现

#include <iostream>
#include <list>
#include <unordered_map>

class LRUKCache {
public:
    LRUKCache(int capacity, int k) : capacity(capacity), k(k), time(0) {}

    int get(int key) {
        if (cacheMap.find(key) == cacheMap.end()) {
            return -1; // Key not found
        }

        time++;
        updateFrequency(key);
        return cacheMap[key].first;
    }

    void put(int key, int value) {
        if (capacity <= 0) {
            return;
        }

        time++;
        if (cacheMap.size() >= capacity && cacheMap.find(key) == cacheMap.end()) {
            evict();
        }

        updateFrequency(key);
        cacheMap[key] = {value, time};
    }

private:
    int capacity;
    int k;
    int time;
    std::list<int> lruList;
    std::unordered_map<int, std::pair<int, int>> cacheMap; // key -> (value, last access time)

    void updateFrequency(int key) {
        auto& entry = cacheMap[key];
        lruList.remove(key);
        lruList.push_front(key);
        entry.second = time;
    }

    void evict() {
        int minTime = time;
        int keyToRemove = -1;

        for (const int& key : lruList) {
            if (time - cacheMap[key].second >= k && cacheMap[key].second < minTime) {
                minTime = cacheMap[key].second;
                keyToRemove = key;
            }
        }

        if (keyToRemove != -1) {
            cacheMap.erase(keyToRemove);
            lruList.remove(keyToRemove);
        }
    }
};

int main() {
    LRUKCache lrukCache(3, 2); // Create an LRUK cache with capacity 3 and k = 2

    lrukCache.put(1, 1);
    lrukCache.put(2, 2);
    std::cout << lrukCache.get(1) << std::endl; // Output: 1
    lrukCache.put(3, 3);
    std::cout << lrukCache.get(2) << std::endl; // Output: 2
    lrukCache.put(4, 4); // Evicts key 1
    std::cout << lrukCache.get(1) << std::endl; // Output: -1 (not found)
    std::cout << lrukCache.get(3) << std::endl; // Output: 3

    return 0;
}

链表实现LRU-K

#include <iostream>
#include <list>
#include <unordered_map>

class LRUKCache {
public:
    LRUKCache(int capacity, int k) : capacity(capacity), k(k), time(0) {}

    int get(int key) {
        if (cacheMap.find(key) == cacheMap.end()) {
            return -1; // Key not found
        }

        time++;
        updateFrequency(key);
        return cacheMap[key].first;
    }

    void put(int key, int value) {
        if (capacity <= 0) {
            return;
        }

        time++;
        if (cacheMap.size() >= capacity && cacheMap.find(key) == cacheMap.end()) {
            evict();
        }

        updateFrequency(key);
        cacheMap[key] = {value, time};
    }

private:
    int capacity;
    int k;
    int time;
    std::list<int> lruList;
    std::unordered_map<int, std::pair<int, int>> cacheMap; // key -> (value, last access time)

    void updateFrequency(int key) {
        auto& entry = cacheMap[key];
        lruList.remove(key);
        lruList.push_front(key);
        entry.second = time;
    }

    void evict() {
        int minTime = time;
        int keyToRemove = -1;

        for (const int& key : lruList) {
            if (time - cacheMap[key].second >= k && cacheMap[key].second < minTime) {
                minTime = cacheMap[key].second;
                keyToRemove = key;
            }
        }

        if (keyToRemove != -1) {
            cacheMap.erase(keyToRemove);
            lruList.remove(keyToRemove);
        }
    }
};

int main() {
    LRUKCache lrukCache(3, 2); // Create an LRUK cache with capacity 3 and k = 2

    lrukCache.put(1, 1);
    lrukCache.put(2, 2);
    std::cout << lrukCache.get(1) << std::endl; // Output: 1
    lrukCache.put(3, 3);
    std::cout << lrukCache.get(2) << std::endl; // Output: 2
    lrukCache.put(4, 4); // Evicts key 1
    std::cout << lrukCache.get(1) << std::endl; // Output: -1 (not found)
    std::cout << lrukCache.get(3) << std::endl; // Output: 3

    return 0;
}