数据结构设计以及常见算法总结

最新推荐文章于 2022-08-15 10:58:34 发布

每天学点新东西

最新推荐文章于 2022-08-15 10:58:34 发布

阅读量975

点赞数

分类专栏： leetcode 刷题总结数据结构文章标签：数据结构哈希队列 leetcode

本文链接：https://blog.csdn.net/qq_41202486/article/details/108543143

版权

leetcode 刷题总结同时被 2 个专栏收录

48 篇文章 0 订阅

订阅专栏

数据结构

1 篇文章 0 订阅

订阅专栏

数据结构和算法

数据结构
常见算法
- 排序
操作系统
redis应用场景

数据结构

基本数据结构

设计栈

使用vector实现

class MyStack{
public:
	int top,size,cur_size;
	vector<int> buffer;
	
	MyStack(int k){
		buffer.resize(k);
		top=0;
		size=k;
		cur_size=0;
	}

	bool Push(int value){
		if(cur_size==size)
			return false;
		buffer[top]=value;
		top=(top+1)%size;
		cur_size++;
		return true;
	}

	bool Pop(){
		if(cur_size==0)
			return false;
		top=(top+size-1)%size;
		cur_size--;
		return true;
	}

	int Top(){
		if(cur_size==0)
			return -1;
		return buffer[top];
	}

	bool IsEmpty(){
		return cur_size==0;
	}

	bool IsFull(){
		return cur_size==size;
	}
};

使用list实现

class MyStack {
public:
	int size, cur_size;
	list<int> buffer;

	MyStack(int k) {
		size = k;
		cur_size = 0;
	}

	bool Push(int value) {
		if (cur_size == size)
			return false;
		buffer.insert(buffer.begin(),value);
		cur_size++;
		return true;
	}

	bool Pop() {
		if (cur_size == 0)
			return false;
		buffer.erase(buffer.begin());
		cur_size--;
		return true;
	}

	int Top() {
		if (cur_size == 0)
			return -1;
		return *buffer.begin();
	}

	bool IsEmpty() {
		return cur_size == 0;
	}

	bool IsFull() {
		return cur_size == size;
	}
};

使用队列实现

leetcode 225 用队列实现栈

class MyStack {
public:
    /** Initialize your data structure here. */
    MyStack() = default;

    /** Push element x onto stack. */
    void push(int x) {
        q.push(x);
        int n = q.size();
        for (int i = 0; i < n-1; i++) {
            q.push(q.front());
            q.pop();
        }
    }

    /** Removes the element on top of the stack and returns that element. */
    int pop() {
        int val = top();
        q.pop();
        return val;
    }

    /** Get the top element. */
    int top() {
        return q.front();
    }

    /** Returns whether the stack is empty. */
    bool empty() {
        return q.empty();
    }

private:
    queue<int> q;
};

设计最小栈

leetcode 155 最小栈：
维护两个栈，一个原来的栈，一个最小栈；
压入元素时，最小栈的压栈策略：栈顶和当前压入元素比较选小的压

class MinStack {
public:

    stack<int> st;
    stack<int> minst;
    /** initialize your data structure here. */
    MinStack() {
        
    }
    
    void push(int x) {
        st.push(x);
        if(minst.empty()||x<=minst.top())
            minst.push(x);
        else
            minst.push(minst.top());
    }
    
    void pop() {
        st.pop();
        minst.pop();
    }
    
    int top() {
        return st.top();
    }
    
    int getMin() {
        return minst.top();
    }
};

/**
 * Your MinStack object will be instantiated and called as such:
 * MinStack* obj = new MinStack();
 * obj->push(x);
 * obj->pop();
 * int param_3 = obj->top();
 * int param_4 = obj->getMin();
 */

设计最大栈

leetcode 716 最大栈
要求实现：可以O(1)时间弹出离栈顶最近的最大元素
使用list来存栈的数据，栈顶是list.begin()；
使用map来存(key:栈的数据,value:vector<数据对应的list迭代器>)
因为map会对key：栈的数据进行自动排序，因此当前map.begin()存的是栈最小值对应迭代器；map.rbegin()存的是栈最大值对应迭代器
而栈最大值对应的离栈顶最近的迭代器是vector<迭代器>的最后一个

class MaxStack {
public:
    /** initialize your data structure here. */
    MaxStack() {}
    
    void push(int x) {
    	list.insert(list.begin(), x);//插入到表头
    	map[x].push_back(list.begin());
    }
    
    int pop() {
    	auto x = *list.begin();
    	map[x].pop_back();	//删除map中的迭代器
    	if(map[x].empty()){
    		map.erase(x);
    	}
    	list.erase(list.begin());
    	return x;
    }
    
    int top() {
    	return *list.begin();
    }
    
    int peekMax() {
    	return map.rbegin()->first;//最大值
    }
    
    int popMax() {
    	auto x = map.rbegin->first;//最大值
    	auto it = map[x].back();//最大值中最靠近栈顶的迭代器
    	map[x].pop_back();//删除该迭代器
    	if(map[x].empty()){
    		map.erase(x);
    	}
    	list.erase(it);
    	
    	return x;
    }

private:
	list<int> list;//存放数据
	map<int, vector<list<int>::iterator>> map;//vector为相同值的迭代器集合,尾部的为最靠近栈顶的 
};

设计一个支持增量操作的栈

leetcode 1381设计一个支持增量操作的栈

设计队列

用栈实现队列

leetcode 232 用栈实现队列

class MyQueue {
public:
    stack<int> st1;
    stack<int> st2;
    /** Initialize your data structure here. */
    MyQueue() {

    }

    /** Push element x to the back of queue. */
    void push(int x) {
        while(!st1.empty()){
            st2.push(st1.top());
            st1.pop();
        }
        st1.push(x);
        while(!st2.empty()){
            st1.push(st2.top());
            st2.pop();
        }
    }

    /** Removes the element from in front of queue and returns that element. */
    int pop() {
        int res=st1.top();
        st1.pop();
        return res;
    }

    /** Get the front element. */
    int peek() {
        return st1.top();
    }

    /** Returns whether the queue is empty. */
    bool empty() {
        return st1.empty();
    }
};

/**
 * Your MyQueue object will be instantiated and called as such:
 * MyQueue* obj = new MyQueue();
 * obj->push(x);
 * int param_2 = obj->pop();
 * int param_3 = obj->peek();
 * bool param_4 = obj->empty();
 */

设计循环队列

leetcode 622 设计循环队列

class MyCircularQueue {
public:
    vector<int> v;
    int front;
    int rear;
    int cur_size;
    int _size;
    /** Initialize your data structure here. Set the size of the queue to be k. */
    MyCircularQueue(int k) {
        _size=k;
        front=rear=cur_size=0;
        v.resize(k);
    }
    
    /** Insert an element into the circular queue. Return true if the operation is successful. */
    bool enQueue(int value) {
        if(cur_size==_size) return false;
        v[rear]=value;
        rear=(rear+1)%_size;
        cur_size++;
        return true;
    }
    
    /** Delete an element from the circular queue. Return true if the operation is successful. */
    bool deQueue() {
        if(cur_size==0) return false;
        front=(front+1)%_size;
        cur_size--;
        return true;
    }
    
    /** Get the front item from the queue. */
    int Front() {
        if(isEmpty()) return -1;
        return v[front];
    }
    
    /** Get the last item from the queue. */
    int Rear() {
        if(isEmpty()) return -1;
        return v[(rear+_size-1)%_size];
    }
    
    /** Checks whether the circular queue is empty or not. */
    bool isEmpty() {
        return cur_size==0;
    }
    
    /** Checks whether the circular queue is full or not. */
    bool isFull() {
        return cur_size==_size;
    }
};

/**
 * Your MyCircularQueue object will be instantiated and called as such:
 * MyCircularQueue* obj = new MyCircularQueue(k);
 * bool param_1 = obj->enQueue(value);
 * bool param_2 = obj->deQueue();
 * int param_3 = obj->Front();
 * int param_4 = obj->Rear();
 * bool param_5 = obj->isEmpty();
 * bool param_6 = obj->isFull();
 */

设计循环双端队列

leetcode 641 设计循环双端队列
简化思考思路：
front队头，rear对尾
当前队列有效范围是[front,rear)
从队头插入：front–再对v[front]赋值
从队头拿东西：拿到v[front]再对front++
从队头插入：对v[rear]赋值再rear++
从队头拿东西：rear–再拿到v[rear]

class MyCircularDeque {
public:
    vector<int> v;
    int n;
    int cur_size;
    int front;
    int rear;
    /** Initialize your data structure here. Set the size of the deque to be k. */
    MyCircularDeque(int k) {
        n=k;
        cur_size=front=rear=0;
        v.resize(k);
    }
    
    /** Adds an item at the front of Deque. Return true if the operation is successful. */
    bool insertFront(int value) {
        if(cur_size==n) return false;
        front=(front+n-1)%n;
        v[front]=value;
        cur_size++;
        return true;
    }
    
    /** Adds an item at the rear of Deque. Return true if the operation is successful. */
    bool insertLast(int value) {
        if(cur_size==n) return false;
        v[rear]=value;
        rear=(rear+1)%n;
        cur_size++;
        return true;
    }
    
    /** Deletes an item from the front of Deque. Return true if the operation is successful. */
    bool deleteFront() {
        if(cur_size==0) return false;
        front=(front+1)%n;
        cur_size--;
        return true;
    }
    
    /** Deletes an item from the rear of Deque. Return true if the operation is successful. */
    bool deleteLast() {
        if(cur_size==0) return false;
        rear=(rear+n-1)%n;
        cur_size--;
        return true;
    }
    
    /** Get the front item from the deque. */
    int getFront() {
        if(cur_size==0) return -1;
        return v[front];
    }
    
    /** Get the last item from the deque. */
    int getRear() {
        if(cur_size==0) return -1;
        return v[(rear+n-1)%n];
    }
    
    /** Checks whether the circular deque is empty or not. */
    bool isEmpty() {
        return cur_size==0;
    }
    
    /** Checks whether the circular deque is full or not. */
    bool isFull() {
        return cur_size==n;
    }
};

/**
 * Your MyCircularDeque object will be instantiated and called as such:
 * MyCircularDeque* obj = new MyCircularDeque(k);
 * bool param_1 = obj->insertFront(value);
 * bool param_2 = obj->insertLast(value);
 * bool param_3 = obj->deleteFront();
 * bool param_4 = obj->deleteLast();
 * int param_5 = obj->getFront();
 * int param_6 = obj->getRear();
 * bool param_7 = obj->isEmpty();
 * bool param_8 = obj->isFull();
 */

设计哈希结构

设计哈希集合

leetcode 705 设计哈希集合
使用vector<Node*>实现链地址法

struct Node{
    int val;
    Node* next;
    Node(int val,Node *next): val(val),next(next){}
};

class MyHashSet {
public:
    vector<Node*> v;
    int n=1e4+1;
    /** Initialize your data structure here. */
    MyHashSet() {
        Node* dummy=new Node(-1,nullptr);
        v.resize(n,dummy);
    }
    
    void add(int key) {
        if(contains(key)) return;
        int index=key%n;
        Node* new_node=new Node(key,nullptr);
        new_node->next=v[index]->next;
        v[index]->next=new_node;
    }
    
    void remove(int key) {
        int index=key%n;
        Node* cur=v[index];
        Node* pre=nullptr;
        while(cur!=nullptr){
            if(cur->val==key){
                pre->next=cur->next;
                delete cur;
                return;
            }
            pre=cur;
            cur=cur->next;
        }
    }
    
    /** Returns true if this set contains the specified element */
    bool contains(int key) {
        int index=key%n;
        Node* cur=v[index];
        while(cur!=nullptr){
            if(cur->val==key){
                return true;
            }
            cur=cur->next;
        }
        return false;
    }
};

/**
 * Your MyHashSet object will be instantiated and called as such:
 * MyHashSet* obj = new MyHashSet();
 * obj->add(key);
 * obj->remove(key);
 * bool param_3 = obj->contains(key);
 */

设计哈希映射

leetcode 706 设计哈希映射

struct Node{
    int key;
    int val;
    Node* next;
    Node(int key,int val,Node* next):key(key),val(val),next(next){}
};

class MyHashMap {
public:
    vector<Node*> v;
    int n=1e4+1;
    /** Initialize your data structure here. */
    MyHashMap() {
        Node* dummy=new Node(-1,-1,nullptr);
        v.resize(n,dummy);
    }
    
    /** value will always be non-negative. */
    void put(int key, int value) {
        int index=key%n;
        if(get(key)!=-1)
            remove(key);
        Node* new_node=new Node(key,value,nullptr);
        new_node->next=v[index]->next;
        v[index]->next=new_node;
    }
    
    /** Returns the value to which the specified key is mapped, or -1 if this map contains no mapping for the key */
    int get(int key) {
        int index=key%n;
        Node* cur=v[index];
        while(cur!=nullptr){
            if(cur->key==key)
                return cur->val;
            cur=cur->next;
        }
        return -1;
    }
    
    /** Removes the mapping of the specified value key if this map contains a mapping for the key */
    void remove(int key) {
        int index=key%n;
        Node* cur=v[index];
        Node* pre=nullptr;
        while(cur!=nullptr){
            if(cur->key==key){
                pre->next=cur->next;
                delete cur;
                return;
            }
            pre=cur;
            cur=cur->next;
        }
    }
};

/**
 * Your MyHashMap object will be instantiated and called as such:
 * MyHashMap* obj = new MyHashMap();
 * obj->put(key,value);
 * int param_2 = obj->get(key);
 * obj->remove(key);
 */

链表

设计双端链表

leetcode 707 设计链表

class MyLinkedList {
public:
    /** Initialize your data structure here. */
    MyLinkedList() {
        size = 0;
        dummy_head = new DoublyListNode(0);
        dummy_tail = new DoublyListNode(0);
        dummy_head->next = dummy_tail;
        dummy_tail->prev = dummy_head;
    }
    
    /** Get the value of the index-th node in the linked list. If the index is invalid, return -1. */
    int get(int index) {
        if (index < 0 || index >= size) {
            return -1;
        }
        
        DoublyListNode *curr = nullptr;
        if (index + 1  < size - index) {
            curr = dummy_head;
            for (int i = 0; i < index + 1; ++i) {
                curr = curr->next;
            }
            
        } else {
           curr = dummy_tail;
            for (int i = 0; i < size - index; ++i) {
                curr = curr->prev;
            }
        }

        return curr->val;
    }
    
    /** Add a node of value val before the first element of the linked list. After the insertion, the new node will be the first node of the linked list. */
    void addAtHead(int val) {
        DoublyListNode *pred = dummy_head; // Predecessor
        DoublyListNode *succ = dummy_head->next; // Successor
        
        DoublyListNode *add_node = new DoublyListNode(val);
        
        add_node->next = succ;
        add_node->prev = pred;
        succ->prev = add_node;
        pred->next = add_node;
        
        size++;
    }
    
    /** Append a node of value val to the last element of the linked list. */
    void addAtTail(int val) {
        DoublyListNode *pred = dummy_tail->prev; // Predecessor
        DoublyListNode *succ = dummy_tail; // Successor
        
        DoublyListNode *add_node = new DoublyListNode(val);
        
        add_node->next = succ;
        add_node->prev = pred;
        succ->prev = add_node;
        pred->next = add_node;
        
        size++;
    }
    
    /** Add a node of value val before the index-th node in the linked list. If index equals to the length of linked list, the node will be appended to the end of linked list. If index is greater than the length, the node will not be inserted. */
    void addAtIndex(int index, int val) {
        if (index > size) return;

        if(index < 0) index = 0;

        DoublyListNode *curr = nullptr;
        if (index + 1  < size - index) {
            curr = dummy_head;
            for (int i = 0; i < index + 1; ++i) {
                curr = curr->next;
            }
            
        } else {
           curr = dummy_tail;
            for (int i = 0; i < size - index; ++i) {
                curr = curr->prev;
            }
        }

        DoublyListNode *pred = curr->prev; // Predecessor
        DoublyListNode *succ = curr; // Successor
        
        DoublyListNode *add_node = new DoublyListNode(val);
        
        add_node->next = succ;
        add_node->prev = pred;
        succ->prev = add_node;
        pred->next = add_node;

        size++;
    }
    
    /** Delete the index-th node in the linked list, if the index is valid. */
    void deleteAtIndex(int index) {
        if (index < 0 || index >= size) return;

        DoublyListNode *curr = nullptr;
        if (index + 1  < size - index) {
            curr = dummy_head;
            for (int i = 0; i < index + 1; ++i) {
                curr = curr->next;
            }
            
        } else {
           curr = dummy_tail;
            for (int i = 0; i < size - index; ++i) {
                curr = curr->prev;
            }
        }
        
        DoublyListNode *deletedNode = curr;
        DoublyListNode *pred = curr->prev; // Predecessor
        DoublyListNode *succ = curr->next; // Successor
        
        succ->prev = pred;
        pred->next = succ;
        
        delete deletedNode;
        deletedNode = nullptr;
        
        --size;
    }
    
private:
    // Definition for doubly-linked list.
    struct DoublyListNode {
        int val;
        DoublyListNode *next, *prev;
        DoublyListNode(int x) : val(x), next(NULL), prev(NULL) {}
    };
    DoublyListNode *dummy_head; // 哑头结点
    DoublyListNode *dummy_tail; // 哑尾结点
    
    int size; // 链表的长度
    
};

进阶数据结构设计

跳表

leetcode 1206设计跳表
具体而言
每个节点除了key外，只需要一个引用/指针数组 level 即可。
skiplist 真正的数据只有最下面一层的数据节点，每个节点的后继就是level[0], 每层之间的连接是通过level进行维系的。相当于把除了第一层的数据节点外的其他节点的数据项省略了, 只保留了next指针。
back指针，这里并没有加入back指针，实际上对于普通的查找数据而言，查找过程中永远不会反向。redis中的back貌似是用于反向rank查询.

static const int SKIPLIST_P_VAL = RAND_MAX / 2, MAX_LEVEL = 16;
class Skiplist {
public:
    struct Node {
        int val;
        vector<Node *> level;
        Node(int val, int size = MAX_LEVEL) : val(val), level(size) {}
    };

    Node head;
    int maxlevel = 1;
    Skiplist() : head(INT_MIN, MAX_LEVEL) {}
    // ~Skiplist() { delte all nodes. }

    bool search(int target) {
    	//第一层存的是完整的数据，我要在第一层中找到比我小的最大的节点
        Node * prev = _search(target)[0];//prevs记录的是每一层的比我小的最大的节点，我要的是第一层的
        return prev->level[0] && prev->level[0]->val == target;//这个节点的下一个是我吗？是的话就true
    }

    vector<Node *> _search(int key) {
        Node * cur = &head;//使用cur从第一列出发
        vector<Node *> prevs(MAX_LEVEL);
        // 遍历每一层，从最大层级到第一层
        for (int i = maxlevel - 1; i >= 0; i--) {//从最高一层出发
            // 在当前层中，如果下一个节点值小于我，我就往右移，移到不能移动为止
            while (cur->level[i] && cur->level[i]->val < key)//如果在当前列的当前层有节点并且节点值<目标查找值
                cur = cur->level[i];//当前节点向右移
            prevs[i] = cur;//使用prevs来记录每一层的比我小的最大的节点
        }

        return prevs;
    }

    void add(int num) {
        auto prevs = _search(num);//拿到每一层的比我小的最大的节点，我可能要插入在它们的后面
        int level = random_level();//拿到我要加入到的一个随机的层级
        // 如果随机的层级超出当前的最大层级
        if (level > maxlevel) {
            for (int i = maxlevel; i < level; i++)//因为我的prevs大小只有当前最大层级那么大，我要对prevs扩
                prevs[i] = &head;
            maxlevel = level;//更新最大层级
        }
        Node * cur = new Node(num, level);//在随机层级中新建一个节点
        // 从最大层级到第一层，将当前节点插入到prevs在后面来
        for (int i = level - 1; i >= 0; i--) {
            cur->level[i] = prevs[i]->level[i];
            prevs[i]->level[i] = cur;
        }
        //在redis的实现中还有后退指针，需要设置好当前节点和当前节点的下一个的后退指针
        //后退指针的第一个合法节点是空指针
    }
    
    bool erase(int num) {
        auto prevs = _search(num);//拿到每一层的比我小的最大的节点，我可能在它们的后面
        if (!prevs[0]->level[0] || prevs[0]->level[0]->val != num)//如果我在第一层都没有，那我在其他层也不可能存在
            return false;
        Node * del = prevs[0]->level[0];//拿到我在第一层就要删除的那个节点
        // 在要删掉我这个数的所有层之前，先建立好我的前一个和我的后一个的关系
        for (int i = 0; i < maxlevel; i++)
            if (prevs[i]->level[i] == del)
                prevs[i]->level[i] = del->level[i];
        delete del;//把我这一列都删掉吧
        // 更新最大层级
        while (maxlevel > 1 && !head.level[maxlevel - 1])//如果第一列最大层级指向的节点为空，把这一层去掉吧^_^
            maxlevel--;
        //在redis的实现中还有后退指针，需要设置好当前节点和当前节点的下一个的后退指针
        //后退指针的第一个合法节点是空指针
        return true;
    }

    static int random_level() {
        int level = 1;
        while (rand() < SKIPLIST_P_VAL && level < MAX_LEVEL) level++;//rand() < SKIPLIST_P_VAL有一半概率为1，一半为0，要就是说我的层级有一半的概率+1,一半的概率不再增加，则层级50%概率为1，50%*50%的概率为2，...这样可以控制在加入节点的时候整体结构像个金字塔
        return level;
    }
};

布隆过滤器

#include <iostream>
#include <bitset>
#include <cmath>
 
using namespace std;

typedef unsigned int uint; 
const int DEFAULT_SIZE = 1 << 20;
const int seed[] = { 5, 7, 11, 13, 31, 37, 61 };


class BloomFilter {
public:
 BloomFilter() : hash_func_count(3) {}
 BloomFilter(int bitsize, int str_count) { 
     hash_func_count = ceil((bitsize / str_count) * log(2));
 }
 ~BloomFilter() {}
 
 
 uint RSHash(const char *str, int seed);
 void Add(const char *str);
 bool LookUp(const char *str);
 
private:
 int hash_func_count;
 bitset<DEFAULT_SIZE> bits;
};

uint BloomFilter::RSHash(const char *str, int seed) {  
    uint base = 63689;
    uint hash = 0;    
     
    while (*str) {    
        hash = hash * base + (*str++);    
        base *= seed;    
    }    
     
    return (hash & 0x7FFFFFFF);    
}    
 
void BloomFilter::Add(const char* str) {
	 int index = 0;
	 for(int i = 0; i < hash_func_count; ++i) {
		  index = static_cast<int>(RSHash(str, seed[i])) % DEFAULT_SIZE;
		  bits[index] = 1;
	 }
	 
	 return ;
}
 
bool BloomFilter::LookUp(const char* str) {
	 int index = 0;
	 for(int i = 0; i < hash_func_count; ++i) {
		  index = static_cast<int>(RSHash(str, seed[i])) % DEFAULT_SIZE;
		  if (!bits[index]) return false; 
	 }
	 
	 return true;
}

一致性哈希

一致性哈希
一. 算法解决问题
一致性哈希算法在1997年由麻省理工学院提出的一种分布式哈希（DHT）实现算法，设计目标是为了解决因特网中的热点(Hot spot)问题，初衷和CARP十分类似。一致性哈希修正了CARP使用的简单哈希算法带来的问题，使得分布式哈希（DHT）可以在P2P环境中真正得到应用。
一致性hash算法提出了在动态变化的Cache环境中，判定哈希算法好坏的四个定义：
1、平衡性(Balance)：平衡性是指哈希的结果能够尽可能分布到所有的缓冲中去，这样可以使得所有的缓冲空间都得到利用。很多哈希算法都能够满足这一条件。
2、单调性(Monotonicity)：单调性是指如果已经有一些内容通过哈希分派到了相应的缓冲中，又有新的缓冲加入到系统中。哈希的结果应能够保证原有已分配的内容可以被映射到原有的或者新的缓冲中去，而不会被映射到旧的缓冲集合中的其他缓冲区。
3、分散性(Spread)：在分布式环境中，终端有可能看不到所有的缓冲，而是只能看到其中的一部分。当终端希望通过哈希过程将内容映射到缓冲上时，由于不同终端所见的缓冲范围有可能不同，从而导致哈希的结果不一致，最终的结果是相同的内容被不同的终端映射到不同的缓冲区中。这种情况显然是应该避免的，因为它导致相同内容被存储到不同缓冲中去，降低了系统存储的效率。分散性的定义就是上述情况发生的严重程度。好的哈希算法应能够尽量避免不一致的情况发生，也就是尽量降低分散性。
4、负载(Load)：负载问题实际上是从另一个角度看待分散性问题。既然不同的终端可能将相同的内容映射到不同的缓冲区中，那么对于一个特定的缓冲区而言，也可能被不同的用户映射为不同的内容。与分散性一样，这种情况也是应当避免的，因此好的哈希算法应能够尽量降低缓冲的负荷。

在分布式集群中，对机器的添加删除，或者机器故障后自动脱离集群这些操作是分布式集群管理最基本的功能。如果采用常用的hash(object)%N算法，那么在有机器添加或者删除后，很多原有的数据就无法找到了，这样严重的违反了单调性原则。接下来主要讲解一下一致性哈希算法是如何设计的。

二. 设计方法
把数据用hash函数（如MD5，MurMurHash等算法），映射到一个很大的空间里，如图所示。数据的存储时，先得到一个hash值，对应到这个环中的每个位置，如k1对应到了图中所示的位置，然后沿顺时针找到一个机器节点B，将k1存储到B这个节点中。
这里写图片描述

如果B节点宕机了，则B上的数据就会落到C节点上，如下图所示：

在这里插入图片描述

这样，只会影响C节点，对其他的节点A，D的数据不会造成影响。然而，这又会造成一个“雪崩”的情况，即C节点由于承担了B节点的数据，所以C节点的负载会变高，C节点很容易也宕机，这样依次下去，这样造成整个集群都挂了。

为此，引入了“虚拟节点”的概念：即把想象在这个环上有很多“虚拟节点”，数据的存储是沿着环的顺时针方向找一个虚拟节点，每个虚拟节点都会关联到一个真实节点，如下图所使用：

在这里插入图片描述

图中的A1、A2、B1、B2、C1、C2、D
1、D2都是虚拟节点，机器A负载存储A1、A2的数据，机器B负载存储B1、B2的数据，机器C负载存储C1、C2的数据。由于这些虚拟节点数量很多，均匀分布，因此不会造成“雪崩”现象。

三. 代码实现
MurMurHash算法，是非加密HASH算法，性能很高，比传统的CRC32, MD5，SHA-1（这两个算法都是加密HASH算法，复杂度本身就很高，带来的性能上的损害也不可避免）等HASH算法要快很多，而且这个算法的碰撞率很低。所以这里我们采用MurMurHash算法

头文件consistent_hash.h

#include <map>
using namespace std;

class ConsistentHash
{
public:
    ConsistentHash(int node_num, int virtual_node_num);
    ~ConsistentHash();

    void Initialize();
    size_t GetServerIndex(const char* key);

    void DeleteNode(const int index);
    void AddNewNode(const int index);

private:
    map<uint32_t,size_t> server_nodes_; //虚拟节点,key是哈希值，value是机器的index
    int node_num_;//真实机器节点个数
    int virtual_node_num_;//每个机器节点关联的虚拟节点个数
};

实现文件consistent_hash.cpp

#include <map>
#include <string.h>
#include <sstream>
#include "consistent_hash.h"
#include "murmurhash3.h"
using namespace std;

ConsistentHash::ConsistentHash(int node_num, int virtual_node_num){
    node_num_ = node_num;
    virtual_node_num_ = virtual_node_num;
}

ConsistentHash::~ConsistentHash(){
    server_nodes_.clear();
}


void ConsistentHash::Initialize(){
    for(int i=0; i<node_num_; ++i){
        for(int j=0; j<virtual_node_num_; ++j){
            stringstream node_key;
            node_key<<"SHARD-"<<i<<"-NODE-"<<j;
            uint32_t partition = murmur3_32(node_key.str().c_str(), strlen(node_key.str().c_str()));
            server_nodes_.insert(pair<uint32_t, size_t>(partition, i));
        }
    }
}


size_t ConsistentHash::GetServerIndex(const char* key){
    uint32_t partition = murmur3_32(key, strlen(key));
    map<uint32_t, size_t>::iterator it = server_nodes_.lower_bound(partition);//沿环的顺时针找到一个大于等于key的虚拟节点

    if(it == server_nodes_.end()){//未找到
        return server_nodes_.begin()->second;
    }
    return it->second;
}


void ConsistentHash::DeleteNode(const int index){
    for(int j=0; j<virtual_node_num_; ++j){
        stringstream node_key;
        node_key<<"SHARD-"<<index<<"-NODE-"<<j;
        uint32_t partition = murmur3_32(node_key.str().c_str(), strlen(node_key.str().c_str()));
        map<uint32_t,size_t>::iterator it = server_nodes_.find(partition);
        if(it != server_nodes_.end()){
            server_nodes_.erase(it);
        }
    }
}

void ConsistentHash::AddNewNode(const int index)
{
    for(int j=0; j<virtual_node_num_; ++j){
        stringstream node_key;
        node_key<<"SHARD-"<<index<<"-NODE-"<<j;
        uint32_t partition = murmur3_32(node_key.str().c_str(), strlen(node_key.str().c_str()));
        server_nodes_.insert(pair<uint32_t, size_t>(partition, index));
    }
}

常见算法

排序

冒泡排序：两个石头哪个重哪个往下掉，从后往前每次确定一个元素

void bubblesort(vector<int> a){
	if(a.empty()||a.size()<2) return;
	for(int end=a.size()-1;end>0;end--){
		for(int i=0;i<end;i++){
			if(a[i]>a[i+1]) swap(a[i],a[i+1]);
		}
	}
}

选择排序：从前往后每次确定一个元素，找到当前的最小元素交换

void selectionsort(vector<int> a){
	if(a.empty()||a.size()<2) return;
	for(int i=0;i<a.size();i++){
		int minindex=i;
		for(j=i;j<a.size();j++){
			minindex=(a[minindex]<a[j]?)minindex:j;
		}
		swap(a[i],a[minindex]);
	}
}

插入排序：和扑克一样，从左到右维护边界，遍历到新元素就把它插入前边的边界中；

void insert_sort(vector<int> a){
	if(a.empty()||a.size()<2) return;
	for(int i=1;i<a.size();i++){
		for(int j=i-1;j>=0&&a[j]>a[j+1];j--){
			swap(a[j],a[j+1]);
		}
	}
}

归并排序：分两半，排好左边的，再排右边的，最后再合并一起

O(n*log n)

void merge_sort(vector<int> a){
	if(a.empty()||a.size()<2) return;
	mergesort(a,0,a.size()-1);
}
void mergesort(vector<int> a,int left,int right){
	if(left==right){
		return;
	}
	int mid=left+(right-left)/2;
	sort(a,left,mid);
	sort(a,mid+1,right);
	merge(a,left,mid,right);
}
void merge(vector<int> a,int left,int mid,int right){
	vector<int> help(right-left+1);
	int i=0,p1=left,p2=mid+1；
	while(p1<=mid&&p2<=right){
		help[i++]=a[p1]<a[p2]?a[p1++]:a[p2++];
	}
	while(p1<=mid) help[i++]=a[p1++];
	while(p2<=right) help[i++]=a[p2++];
	for(int i=0;i<help.size();i++){
		a[left+i]=help[i];
	}
}

荷兰国旗问题：在数组arr中，小于p放左边，等于放中间，大于在右边

void partition(vector<int> arr,int l,int r,int p){
	int less=l-1;
	int more=r+1;
	while(l<more){
		if(arr[l]<p){
			less++;
			swap(arr[less],arr[l]);
			l++;
		}
		else if(arr[l]>p){
			more--;
			swap(arr[more],arr[l]);
		}
		else l++;
	}
	//return make_pair(less,more);
}

快排：经典快排：选择最后一个元素p，做类似荷兰国旗问题的方法：空间O(logN)：浪费在划分点上

void quicksort(vector<int> a){
	if(a.empty()||a.size()<2) return;
	quick(a,0,a.size()-1);
}
void quick(vector<int> a,int left,int right){
	if(left<right){
		pair<int,int> p=partiton(a,left,right,a[right]);
		quick(a,left,p.first);
		quick(a,p.second,right);
	}
}

堆排序

//有新的子节点插进来时，子节点要是比父节点大，就和父节点交换（大根堆）

void heap_insert(vector<int> a,int index){
	while(a[index]>a[(index-1)/2]){
		swap(a[index]>a[(index-1)/2]);
		index=(index-1)/2;
	}
}

删掉根节点后:heapsize-1，根节点和最后一个元素换位置
（swap(a[0],a[- -heapsize])）：
先用largest代表当前节点和左儿子，右儿子中最大的那个；再看largest具体是当前节点还是儿子来区分情况

void heapify(vector<int> a,int index,int heapsize){
	int left=index*2+1，right=left+1;
	while(left<heapsize){
		int largest=right<heapsize&&a[right]>a[left]?right:left;
		largest=a[largest]>a[index]?largest:index;
		if(largest==index) return;
		swap(a[index],a[largest]);
		index=largest;
		left=index*2+1;
		right=left+1;
	}
}

什么是排序的稳定性？

排序后相同值的序列和原来的序列一样
冒泡排序：可以实现，碰到相同的不交换就行
选择排序：不可以：如5，5，5，0
插入排序：可以实现
归并排序：可以实现：在合并的时候有相同的先要第一个序列的
快排：不可以，在partition时如4，4，4，都>p时，要交换第一个4到后面
堆排：不可以：如4，4，4，5

桶排序:给你一堆数，求出排序后两个数之间的最大gap是啥

找到最大值和最小值（用来分桶号），然后对每一个数分桶号同时更新相应桶的最大值和最小值,然后遍历每一个桶来算gap

int max_gap(vector<int> nums){
	if(nums.empty()||nums.size()) return;
	int minnum=INT_MAX;
	int maxnum=INT_MIN;
	for(int i=0;i<nums.size();i++){
		minnum=min(minnum,nums[i]);
		maxnum=max(maxnum,nums[i]);
	}
	if(minnum==maxnum) return 0;
	vector<bool> hasnum(nums.size()+1)；
	vector<bool> maxs(nums.size()+1)；
	vector<bool> mins(nums.size()+1)；
	int bid=0;
	for(int i=0;i<nums.size();i++){
		bid=(int)(nums[i]-minnum)*nums.size()/(maxnum-minnum);
		mins[bid]=hasnum[bid]?min(mins[bid],nums[i]):nums[i];
		maxs[bid]=hasnum[bid]?max(maxs[bid],nums[i]):nums[i];
		hasnum[bid]=true;
	}
	
	int res=0,lastmax=maxs[0];
	for(int i=1;i<nums.size()+1;i++){
		if(hasnum[i]){
		res=max(res,mins[i]-lastmax);
		lastmax=maxs[i];
		}
	}
	return res;
}

操作系统

进程调度算法

各种调度算法的比较

比较的标准：

1.CPU利用率
2.系统吞吐量
3.周转时间
4.等待时间
5.响应时间

FCFS

一个长作业在前面，短作业要等很久
有利于CPU繁忙型，不利于IO繁忙型

SJF

长作业的周转时间长
不考虑作业的紧迫程度

优先级调度算法

优先级设置参考：
1.系统进程>用户进程
2.前台进程>后台进程
3.I/O进程>计算型进程

高响应比优先调度算法

响应比=(等待时间+要求服务时间)/要求服务时间

分析：
等待时间相同时，要求服务时间越短，响应比越高->短作业优先
要求服务时间相同时，等待时间越长，响应比越高->先来先服务，照顾长作业

时间片轮转调度

响应时间好，周转时间差

时间片大小分析：
1.时间片大了->先来先服务
2.时间片小了->进程切换开销

多级反馈队列调度算法（MLFQ）

在其他调度算法的基础上要解决的问题：
1.优化周转时间：对于SJF，系统不知道你要多久，无法预见
2.降低响应时间：对于RR，响应时间低了但是周转时间高了

规则：

1.如果A的优先级>B的优先级，运行A
2.如果A的优先级=B的优先级，轮转运行A和B
3.工作进入系统时，放在最高优先级
4.A用完了它在某一层的时间配额，降低优先级
5.经过一段时间，将系统的所有工作重新加入最高优先级

解决问题了吗？
1.短作业优先了吗？ [规则3]
2.交互型作业及时响应了吗？[规则3]
3.交互型作业存在I/O操作让出CPU了怎么办？怎么保证I/O完事以后有高的优先级？[规则4]
4.对于长作业，如何避免饥饿问题？[规则5]

实现上的细节：
1.支持不同队列可变的时间片长度，高优先级短时间片，这一层的交互工作可以更快切换；低优先级的一般是CPU密集型工作
2.有的实现采用数学公式的方式改变优先级；如基于当前使用了多少CPU，通过公式计算某个工作的当前优先级
3.一般最高优先级只留给系统使用

多级反馈队列调度算法

代码实现

Process.h

#pragma once

class Process {
public :
	int PID; //进程pid
	int duration_time;//进程剩余时间
	int priority;//进程优先级
	Process(int pid, int time, int p) {
		PID = pid;
		duration_time = time;
		priority = p;
	}

	void setLastTime(int time) {
		lastTime = time;
	}

	int getLastTime() {
		return lastTime;
	}

	int runningTime(int time) {//实际运行时间取 进程拿到的时间和进程剩余要执行的时间中的最小值
		return duration_time > time ? time : duration_time;
	}

	bool runOut(int time) {//看 进程拿到的时间够不够进程跑完
		duration_time -= time;
		return duration_time <= 0;
	}
private:
	int lastTime;//进程上一次运行的最后时间
};

Processor.h

#pragma once
#include<iostream>
#include<vector>
#include<queue>
#include"Random.h"
#include"Process.h"
using namespace std;

struct greater {
	bool operator()(Process p1, Process p2) {//先看优先级谁高
		if (p1.priority == p2.priority && p1.priority == 1) return p1.getLastTime() > p2.getLastTime();//如果优先级一样是1（最低级），谁的最后执行时间小就指行谁
		return p1.priority < p2.priority;//大项堆
	}
};

class Processor {
private:
	priority_queue<Process, vector<Process>, greater> q;//存储多个进程pcb的结构
	int cnt;//进程数量

	void createProcesses(int cnt) {//生成进程
		Random gen;
		for (int i = 0; i < cnt; i++) {
			Process p(gen.createNum(100), gen.createNum(79, 1), gen.createNum(4,1));
			q.push(p);
		}
	}

public:
	Processor(int cnt) {
		this->cnt = cnt;
		createProcesses(cnt);
	}

	void run() {
		int currTime = 0;
		int totalTime = 0;
		while (!q.empty()) {
			Process curr = q.top();//从优先队列中拿一个进程出来
			cout << "now is " << currTime << " : processing pid = " << curr.PID << " curr priority = " << curr.priority << " still has " << curr.duration_time << endl;
			// check if visted before
			int getP = curr.priority;//拿到当前进程的优先级
			int slice;
			switch (getP){//根据优先级来拿到时间片，优先级越大越高，拿到的时间片越小
				case 2:
					slice = 32;
					break;
				case 3:
					slice = 16;
					break;
				case 4:
					slice = 8;
					break;
				default:
					slice = 80;
			}
			// update time
			q.pop();
			currTime += curr.runningTime(slice);//更新当前时间
			bool finish = curr.runOut(slice);//判断当前进程是否已经结束
			if (finish) {
				cout << "finish : pid = " << curr.PID << " ends at " << currTime << ". it costs " << currTime << endl;
				totalTime += currTime;//更新总的时间
			}
			else {
				curr.setLastTime(currTime);//设置当前进程上一次运行的最后时间
				curr.priority--;//优先级-1
				q.push(curr);
			}
		}
		cout << "all processes have fininsh! And the average time is " << totalTime / cnt << endl;
	}
};

Random.h

#pragma once
#include<cstdlib>
#include<ctime>
class Random {
public:
	Random() {
		srand((unsigned)time(NULL));
	}
	int createNum(int range, int lb = 0) {
		return lb + rand() % range;
	}
};

Test.cpp

#include"Processor.h"
using namespace std;

int main() {
	Processor p(100);
	p.run();
	system("pause");
	return 0;
}

比例份额调度算法

按比例分配CPU：
假设两个进程A和B分别有75和25张彩票，通过不断定时地抽取0-99之间的一个数，抽到0-74代表运行进程A，抽到75-99代表运行B。

利用随机性的好处：

1.避免奇怪的边角情况
2.几乎不用记录任何状态
3.随机方法快，对运行速度要求高的场景适用

彩票机制：
1.彩票货币：每个用户可以定义不同的货币类型，在和其他用户共同参与比例划分时再进行转换
2.彩票转让：一个进程可以临时将自己的彩票交给另一个进程；在客户端/服务器交互的场景下常见
3.彩票通胀：一个进程可以临时提升或降低自己拥有的彩票数量；可以用于进程之间相互信任的环境

彩票调度实现：

struct node {
	int tickets;
	node* next;
	node(int tickets) :tickets(tickets), next(nullptr) {}
	node(int tickets,node* next):tickets(tickets),next(next){}
};

class ticket {
public:
	ticket(int total):total_tickets(total){
		head = new node(-1,nullptr);
	}
	~ticket(){
		node* cur=head;
		while(cur){
			node* pre=cur;
			cur=cur->next;
			delete pre;
		}
	}

	void add_progress(node * new_node) {
		node* cur = head;
		while (cur->next&&cur->next->tickets<new_node->tickets) {
			cur = cur->next;
		}
		new_node->next = cur->next;
		cur->next = new_node;
	}

	int generate_rand() {
		return rand() % total_tickets;
	}

	node* find_target(int target) {
		node* cur = head->next;
		while (cur&&target>cur->tickets) {
			target -= cur->tickets;
			cur = cur->next;
		}
		return cur;
	}

private:
	int total_tickets;
	node* head;
};

特点：工作执行时间很短时，公平性很差；

步长调度：确定性的公平分配算法

struct pcb {
	int tickets;
	int stride;
	int count;
	pcb(int tickets) :tickets(tickets) {
		count = 0;
	}
	bool operator <(const pcb& p)const {
		return count < p.count;
	}
};

class ticket {
public:
	ticket(int total):total_tickets(total) {}
	~ticket(){
		while (!progress.empty()) {
			progress.pop();
		}
	}

	void add_progress(pcb p) {
		p.stride = total_tickets / p.tickets;
		progress.push(p);
	}

	pcb get_progress() {
		pcb res=progress.top();
		progress.pop();
		res.count += res.stride;
		progress.push(res);
		return res;
	}

private:
	int total_tickets;
	priority_queue<pcb, vector<pcb>, greater<pcb>> progress;
};

有了步长调度，还需要彩票调度吗？

彩票调度不需要记录全局状态；并且在加入一个新的进程时，在步长调度下，这个进程对应的count如果设置为0，它将独占CPU

比例份额调度的缺点：

1.不能很好地适合I/O；
2.很难确定票数分配问题；

应用场景：

在容易确定比例份额的情况中可采用，如：在虚拟数据中心中，希望分配1/4的CPU周期给windows虚拟机，3/4给linux虚拟机；

linux 0.11 的实现（基于时间片轮转调度+优先级调度+短作业优先）

CPU调度函数叫schedule()，其核心目标是找到一个合适的next进程然后切换之

void Schedule(void){
	while(1){
		c=-1;
		next=0;//next是要切换到的下一个进程
		i=NR_TASKS;//i指向PCB数组末尾元素
		p=&task[NR_TASK];//把最后一个PCB地址赋值给指针p;把运行态和就绪态的进程的PCB做成了一个数组，名字叫task。其中NR_TASKS是PCB数组中的最后一个元素，也就是最后一个PCB。
		//注意因为是进程切换的函数，故这里不考虑非就绪态的进程，比如阻塞态进程。
		while(--i){//循环条件是反向遍历进程的PCB数组,找到PCB数组里就绪态的PCB里，那个最大的counter，并且把拥有最大counter的进程设置为next
			if(*p->state==TASK_RUNNING&&(*p)->counter>c){//如果该进程的状态是就绪态（这个枚举变量TASK_RUNNING，在0.11版本的内核源码里代表的是就绪态），并且该进程的counter值大于变量c
				c=(*p)->counter;//counter变量除了有优先级的含义，还有时间片的体现
				next=i;
			}
			if(c) break;//如果找到了最大counter（也就是c不为0）的就绪进程，那么跳出外层循环，立即执行蓝色的switch_to函数，进程开始切换，这是典型的优先级调度算法
			for(p=&LAST_TASK;p>&FIRST_TASK;--p)//如果c最后为0，也就是就绪态进程的时间片都用完了，那么就进入到for循环里执行：
				(*p)->counter=((*p)->counter>>1)+(*p)->priority;//把所有的进程（不论状态）再遍历一次，期间重新赋值各个进程的counter，每次把进程counter除以2：
/*1、如果是就绪态的进程，此时它的counter是0，0除以2还是0，然后加上进程的优先级，等于是把就绪态进程的counter赋值为初值，这个初值就是进程的优先级——priority。

2、如果是其他进程，此时可能有阻塞，比如长时间阻塞在I/O上，那么把他们的counter折一半，肯定大于0，再加上之前的初值，这样一旦阻塞的I/O进程每次回到就绪态时，它的counter一定比原来的非I/O进程的counter大，也就是让那些阻塞的I/O进程恢复了较高优先级，且还让I/O进程的时间片变大，且阻塞的时间越长，效果越明显。*/
		}
		switch_to(next);
	}
}

//而时钟中断函数又是在操作系统初始化函数里就已经设置的。每次发生CPU时钟中断，都会让进程的counter--，变为0时切换。典型的时间片轮转算法。保证了用户的即时响应。
_time_interrupt:
	call _do_timer

void do_timer(){
	current->counter--;
	if(current->counter>0) return;
	current->counter=0;
	schedule();
}

算法优缺点分析：

阻塞的进程再就绪以后优先级高于非阻塞进程，这是Linux的一种自我学习机制，如果进程经历了阻塞，那么就认为它需要优先调度，让等的越久的任务先调度，且调度时间也变长，照顾了阻塞进程，一箭双雕。

前面说了，该算法照顾了经常阻塞的进程，比如I/O进程，有自我学习机制，但它同时也照顾了短进程，对于短进程，在它们之间一直按照counter轮转调度，近似短作业优先算法，其实是时间片轮转调度算法，但是每次轮转之后，肯定是短作业优先执行完，一箭三雕。且时间片用完，切换进程的时候找counter最大的去调度（优先级最高的，同时时间片也比上次多些），合情合理。

这里可能的疑问：counter不断的变化，真的能保证响应时间的上界么？

答案是能，设一个进程时间片的初值Priority，如果该进程一直阻塞，每次切回就绪态，都会给它时间片变长，那么该进程的时间片肯定是最长的。设C(0) = Priority，简写为p，作为最长的时间片的进程，它会一直按照下面的算法更新：

C(T)=C(T-1) / 2 + Priority
递归：c0=p，c1=c0/2+p=p+p/2=3p/2，c2 = p/2+ p/4 + p=7p/4……
c无穷大，是一个收敛级数，小于等于2p。
最终n个任务的周转时间的界就是2np，故算法中除2，不是随便设置的。
做成减法就不行，就不收敛了。

实现一个锁

如何评价一个锁
1.能否提供互斥
2.公平性，是否有竞争锁的线程会饿死
3.性能：使用锁之后增加的时间开销
1）只有一个线程抢锁，释放锁的开销
2）1个CPU多个线程竞争
3）多个CPU，多个线程竞争

最早期的方案

void lock(){
	DisableInterrupts();
}
void unlock(){
	EnableInterrupts();
}

原理：在单处理器中，进入临界区之前关中断，可以保证在临界区执行的代码不被中断；

缺点：
1.要求允许所有调用线程执行特权操作；
可能存在恶意程序调用lock后进入死循环
2.不支持多处理器；
3.关闭中断导致中断丢失；磁盘设备完成了读取请求但是CPU丢失这一请求
4.效率低下；与其他正常指令相比，关中断慢

应用：
操作系统采用屏蔽中断的方式，保证访问自己数据结构的原子性，或避免复杂的中断处理情况

使用硬件原语实现锁

要是没有硬件原语会怎样？

typedef struct lock_t{
	int flag;
}lock_t;

void init(lock_t *mutex){
	mutex->flag=0;
}

void lock(lock_t *mutex){
	while(mutex->flag==1);
	mutex->flag=1;
}

void unlock(lock_t *mutex){
	mutex->flag=0;
}

存在问题：
lock涉及到的两个步骤不是原子的；
即：判断可以进入临界区之后，没有立刻改变当前锁状态

硬件原语

test-and-set

int  TestAndSet(int *old_ptr,int new){
	int old=*old_ptr;
	*old_ptr=new;
	return old;
}

将上面的代码改为：

void lock(lock_t *mutex){
	while(TestAndSet(&mutex->flag,1)==1);
}

评价自旋锁：
1.互斥
2.公平性：不能保证一个等待线程一定会进入临界区
3.性能：
1）单CPU：差
假设一个线程持有锁进入临界区时被抢占，其他进不了也会自旋一个时间片
2）多CPU：

compare-and-swap

int CompareAndSwap(int *ptr,int expected,int new){
	int actual=*ptr;
	if(actual==expected){
		*ptr=new;
	}
	return actual;
}

将上面的代码改为：

void lock(lock_t *mutex){
	while(CompareAndSwap(&mutex->flag,0,1)==1);
}

链接的加载和条件式存储指令

int LoadLinked(int *ptr){
	return *ptr;
}

int StoreConditional(int *ptr,int value){
	if(如果上一次加载的地址在这个期间都没有更新的话){
		*ptr=value;
		return 1;
	}
	else
		return 0;
}

将上面的代码改为：

void lock(lock_t *mutex){
	while(1){
		while(LoadLinked(&lock->flag)==1) ;
		if(StoreCondition(&lock->flag,1)==1)
			return;
	}
}

为啥可以实现互斥？

fetch-and-add

原子地返回特定地址的旧值，并且让该值自增1

int FetchAndAdd(int *ptr){
	int old=*ptr;
	*ptr=old+1;
	return old;
}

typedef struct lock_t{
	int ticket;
	int turn;
}lock_t;

void lock_init(lock_t* lock){
	lock->ticket=0;
	lock->turn=0;
}

void lock(lock_t *lock){
	int myturn=FetchAndAdd(&lock->ticket);
	while(lock->turn!=myturn) ;
}

void unlock(lock_t *lock){
	FetchAndAdd(&lock->turn);
}

为啥可以实现互斥？
相比其他实现有什么优点？

对于自旋的解决

void lock(lock_t *mutex){
   while(TestAndSet(&mutex->flag,1)==1)
   		yield();
}

有什么问题？
1.许多线程反复竞争一把锁时，存在上下文切换的开销问题
2.饿死问题：一个线程一致处于让出的循环，其他线程反复进出临界区

使用队列：休眠替代自旋

前面的方法存在的问题：
存在偶然性，不能防止饿死，不能防止浪费

利用Solaris提供的支持：
park():让调用线程休眠
unpark(threadID)会唤醒线程

typedef struct lock_t{
	int flag;
	int guard;
	queue_t *q;
}

void lock_init(lock_t *m){
	m->flag=0;
	m->guard=0;
	queue_init(m->q);
}

void lock(lock_t *m){
	while(TestAndSet(&m->guard,1)==1) ;
	if(m->flag==0){  
		m->flag=1;
		m->guard=0;
	}else{
		queue_add(m->q,gettid());
		m->guard=0;
		park();
	}
}

void unlock(lock_t *m){
	while(TestAndSet(&m->guard,1)==1) ;
	if(queue->empty(m->q))
		m->flag=0;
	else
		unpark(queue_remove(m->q));
	m->guard=0;
}

linux的实现

页面置换算法

FIFO

LRU

使用list和unordered_map

class LRUCache {
    list<pair<int, int>> l;
    unordered_map<int, list<pair<int, int>>::iterator> mp;
    int cap;
public:
    LRUCache(int capacity) {
        cap = capacity;
    }
    
    int get(int key) {
        if(mp.find(key) != mp.end()){
            int res = (*mp[key]).second;
            l.erase(mp[key]);
            l.push_front(make_pair(key, res));
            mp[key] = l.begin();
            return res;
        }
        return -1;
    }
    
    void put(int key, int value) {
        if(mp.find(key) != mp.end()){
            l.erase(mp[key]);
            l.push_front(make_pair(key, value));
            mp[key] = l.begin();
        }
        else{
            if(l.size() == cap){
                mp.erase(l.back().first);
                l.pop_back();
            }
            l.push_front(make_pair(key, value));
            mp[key] = l.begin();
        }
    }
};

/**
 * Your LRUCache object will be instantiated and called as such:
 * LRUCache* obj = new LRUCache(capacity);
 * int param_1 = obj->get(key);
 * obj->put(key,value);
 */

如果这个list和unordered_map要自己实现

class LRUCache {
public:
    struct Node
    {
        int val, key;
        Node *left, *right;
        Node() : key(0), val(0), left(NULL), right(NULL) {}
    };
    Node *hu, *tu; // hu: head_used, tu: tail_used; head在左侧，tail在右侧
    Node *hr, *tr; // hr: head_remains, tr: tail_remains; head在左侧，tail在右侧
    int n;
    unordered_map<int, Node*> hash;

    void delete_node(Node *p)
    {
        p->left->right = p->right, p->right->left = p->left;
    }

    void insert_node(Node *h, Node *p)
    {
        p->right = h->right, h->right = p;
        p->left = h, p->right->left = p;
    }

    LRUCache(int capacity) {
        n = capacity;
        hu = new Node(), tu = new Node();
        hr = new Node(), tr = new Node();
        hu->right = tu, tu->left = hu;
        hr->right = tr, tr->left = hr;

        for (int i = 0; i < n; i ++ )
        {
            Node *p = new Node();
            insert_node(hr, p);
        }
    }

    int get(int key) {
        if (hash[key])
        {
            Node *p = hash[key];
            delete_node(p);
            insert_node(hu, p);
            return p->val;
        }
        return -1;
    }

    void put(int key, int value) {
        if (hash[key])
        {
            Node *p = hash[key];
            delete_node(p);
            insert_node(hu, p);
            p->val = value;
            return;
        }

        if (!n)
        {
            n ++ ;
            Node *p = tu->left;
            hash[p->key] = 0;
            delete_node(p);
            insert_node(hr, p);
        }

        n -- ;
        Node *p = hr->right;
        p->key = key, p->val = value, hash[key] = p;
        delete_node(p);
        insert_node(hu, p);
    }
};

LFU

使用两个unordered_map来做

节点map：(key,节点对应在频数map中的链表节点的迭代器)
频数map：(频数,同一频数的链表节点集合)

其中频数map可以用链表做
在这里插入图片描述

// 缓存的节点信息
struct Node {
    int key, val, freq;//每个节点的键值和频次
    Node(int _key,int _val,int _freq): key(_key), val(_val), freq(_freq){}
};
class LFUCache {
    int minfreq, capacity;//最小频次和容量
    unordered_map<int, list<Node>::iterator> key_table;//节点的键值和它对应的node节点的迭代器
    unordered_map<int, list<Node>> freq_table;//在每个频次下会有一个对应的链表存节点
public:
    LFUCache(int _capacity) {
        minfreq = 0;
        capacity = _capacity;
        key_table.clear();
        freq_table.clear();
    }
    
    int get(int key) {
        if (capacity == 0) return -1;
        auto it = key_table.find(key);//看这个节点在数据map中存不存在
        if (it == key_table.end()) return -1;//找不到就返回
        list<Node>::iterator node = it -> second;//拿到节点的迭代器
        int val = node -> val, freq = node -> freq;//拿到节点的值和原来的频次
        freq_table[freq].erase(node);//在频次map中把原来频次存的节点删掉
        // 如果当前链表为空，我们需要在哈希表中删除，且更新minFreq
        if (freq_table[freq].size() == 0) {
            freq_table.erase(freq);
            if (minfreq == freq) minfreq += 1;
        }
        // 插入到 freq + 1对应的链表中
        freq_table[freq + 1].push_front(Node(key, val, freq + 1));
        key_table[key] = freq_table[freq + 1].begin();//不要忘记同时更新数据map
        return val;
    }
    
    void put(int key, int value) {
        if (capacity == 0) return;
        auto it = key_table.find(key);//在节点map中找看有没有
        if (it == key_table.end()) {//如果以前没有出现过，要新加一个节点
            if (key_table.size() == capacity) {// 缓存已满，需要进行删除操作
                auto it2 = freq_table[minfreq].back();// 通过 minFreq 拿到 freq_table[minFreq] 链表的末尾节点
                key_table.erase(it2.key);//从节点map中删掉
                freq_table[minfreq].pop_back();//从频次map中删除
                if (freq_table[minfreq].size() == 0) {//最小频次对应的链表为空就删掉
                    freq_table.erase(minfreq);
                }
            } 
            freq_table[1].push_front(Node(key, value, 1));//此时的频次为1，在频次map中做更新
            key_table[key] = freq_table[1].begin();//别忘了更新节点map
            minfreq = 1;//更新最小频次
        } else {// 与 get 操作基本一致，除了需要更新缓存的值
            list<Node>::iterator node = it -> second;
            int freq = node -> freq;
            freq_table[freq].erase(node);
            if (freq_table[freq].size() == 0) {
                freq_table.erase(freq);
                if (minfreq == freq) minfreq += 1;
            }
            freq_table[freq + 1].push_front(Node(key, value, freq + 1));
            key_table[key] = freq_table[freq + 1].begin();
        }
    }
};

链接：leetcode 460

//为 list 自定义一个内存管理类模板
template<typename T>
class SingleAlloctor {
	enum {
		MAXN = 5000,
	};
	static T *data;    //内存池为全局变量，防止重复申请。
	static int current;//current 为头结点，标记了哪些data可用。
	static int pool[MAXN]; //pool本质上是一个链表；
	public:
		typedef T value_type;
		T * allocate(int num) {
			int cur = current;
			current = pool[current];
			return data + cur;
		}
		void deallocate(T *p, std::size_t) {
			ptrdiff_t offset = p-data;
			pool[offset] = current;
			current = offset;
		}
		SingleAlloctor() {
            if(data != nullptr) { //应该还有对应的delete，哈哈偷懒了。
                return;
            }
			data = static_cast<T*>(::operator new (MAXN * sizeof(T)));
			for(int i = 0; i < MAXN; i++) {
				pool[i] = i-1;
			}
			current = MAXN-1;
		}
};

template<typename T>
int SingleAlloctor<T>::current;

template<typename T>
int SingleAlloctor<T>::pool[SingleAlloctor<T>::MAXN];

template<typename T>
T *SingleAlloctor<T>::data = nullptr;

class LFUCache {
    struct FreqNode;
	typedef list<FreqNode, SingleAlloctor<FreqNode>> FreqList;

	struct DataNode {
		int key;     //索引
		int value;   //数据
		FreqList::iterator fit; //对应的频次链表的结点
		DataNode(int k = 0, int v = 0, FreqList::iterator f = FreqList::iterator())
			: key(k), value(v), fit(f) {}
	};
	typedef list<DataNode, SingleAlloctor<DataNode>> DataList;

    struct FreqNode {
        int cnt;      //频次
        DataList dataList;  //对应的数据结点列表
		FreqNode(int c = 0) : cnt(c) {}
    };
	FreqList cache;
	unordered_map<int, DataList::iterator> router;

	size_t capacity;

	void updateCache(int key) {
		auto rit = router.find(key);
		auto it = rit->second;
		if(it->fit == cache.begin()) {
			cache.push_front(FreqNode(it->fit->cnt+1)); //更高频次的结点不存在，创建它！
		} else {
			FreqList::iterator pre = it->fit; --pre;    
			if(pre->cnt != it->fit->cnt+1) {       // 更高频次的结点频次不匹配，插入一个新的。
				cache.insert(it->fit, FreqNode(it->fit->cnt+1));
			}
		}
		auto curIt = it->fit;          //从当前数据链表断开，插入到更高频次的数据链表。
		auto preIt = it->fit; --preIt;
		preIt->dataList.push_front(DataNode(key, it->value, preIt));
		curIt->dataList.erase(it);
		rit->second = preIt->dataList.begin(); //更新 router
	}

	void swapOut() {
		for(auto it = --cache.end(); ;) {
			if(it->dataList.size()) { //删除最后一个
				auto out = it->dataList.back();
				router.erase(out.key);
				it->dataList.pop_back();
				break;
			} else {       //可能存在若干个空的数据链表，清楚它。均摊 O(1)。
				cache.erase(it--);
			}
		}
	}
    
public:
    LFUCache(int capacity) : capacity(capacity) {}
    
    int get(int key) {
		auto it = router.find(key);
		if(it == router.end()) {
			return -1;
		}
		int val = it->second->value;
		updateCache(key);
		return val;
    }

    void put(int key, int value) {
		if(capacity == 0) {
			return;
		}
		auto rit = router.find(key);
		if(rit != router.end()) {
			rit->second->value = value; //key已存在，更新value。
			updateCache(key);
		} else {
			if(router.size() == capacity) {
				swapOut();
			}
			if(cache.empty() || cache.back().cnt != 1) {
				cache.push_back(FreqNode(1)); //不存在频次为 1 的数据链表，创建一个。
			}
			auto it = --cache.end();
			it->dataList.push_front(DataNode(key, value, it)); //插入到数据链表。
			router.insert(make_pair(key, it->dataList.begin())); //更新router
		}
    }
};

C-CLOCK

struct page {
	bool is_visited;
	bool is_modified;
	pair<int, int> page_data;
	page(bool is_v,bool is_m,pair<int,int> p):is_visited(is_v),is_modified(is_m),page_data(p){}
};

class CLOCK {
public:
	int capicity;
	list<page> pages;
	unordered_map<int, list<page>::iterator> mp;
	CLOCK() {}
	~CLOCK(){}
	int get(int key) {
		auto it = mp.find(key);
		if (it == mp.end()) return -1;
		int res =(*mp[key]).page_data.second;
		(*mp[key]).is_visited = true;
		return res;
	}
	void put(int key,int value) {
		auto it = mp.find(key);
		if (it != mp.end()) {
			(*mp[key]).page_data.second=value;
			(*mp[key]).is_visited = true;
			(*mp[key]).is_modified = true;
		}
		else {
			if (pages.size()==capicity) {
				auto cur = pages.begin();
				for (int round = 1;round<=4;round++) {
					cur = pages.begin();
					while (cur != pages.end()) {
						if (round %2== 1 && (*cur).is_visited == 0 && (*cur).is_modified == 0) break;
						if (round % 2 == 0 && (*cur).is_visited == 0 && (*cur).is_modified == 1) break;
						(*cur).is_visited == 0;
						cur++;
					}
				}
				(*cur).is_visited = true;
				(*cur).is_modified = true;
				(*cur).page_data.first = key;
				(*cur).page_data.second = value;
			}
			else {
				pages.push_front(page(true,true,make_pair(key,value)));
				mp[key] = pages.begin();
			}
		}	
	}
};

磁盘调度算法

redis应用场景

消息队列：Redis实现消息队列的4种方案

Redis作为内存中的数据结构存储，常用作数据库、缓存和消息代理。它支持数据结构，如字符串，散列，列表，集合，带有范围查询的排序集（sorted sets），位图（bitmaps），超级日志（hyperloglogs），具有半径查询和流的地理空间索引。Redis具有内置复制，Lua脚本，LRU驱逐，事务和不同级别的磁盘持久性，并通过Redis Sentinel和Redis Cluster自动分区。
为了实现其出色的性能，Redis使用内存数据集（in-memory dataset）。
MQ应用有很多，比如ActiveMQ,RabbitMQ,Kafka等，但是也可以基于redis来实现，可以降低系统的维护成本和实现复杂度，本篇介绍redis中实现消息队列的几种方案。
基于List的 LPUSH+BRPOP 的实现
PUB/SUB，订阅/发布模式
基于Sorted-Set的实现
基于Stream类型的实现

基于异步消息队列List lpush-brpop(rpush-blpop)

使用rpush和lpush操作入队列，lpop和rpop操作出队列。
List支持多个生产者和消费者并发进出消息，每个消费者拿到都是不同的列表元素。

但是当队列为空时，lpop和rpop会一直空轮训，消耗资源；
1.通常我们使用 sleep 来解决这个问题，让线程睡一会，睡个 1s 钟就可以了。不但客户端的 CPU 能降下来，Redis 的 QPS 也降下来了。
用上面睡眠的办法可以解决问题。但是有个小问题，那就是睡眠会导致消息的延迟增大。如果只有 1 个消费者，那么这个延迟就是 1s。如果有多个消费者，这个延迟会有所下降，因为每个消费者的睡觉时间是岔开来的。有没有什么办法能显著降低延迟呢？
2.所以引入阻塞读blpop和brpop（b代表blocking），阻塞读在队列没有数据的时候进入休眠状态，一旦数据到来则立刻醒过来，消息延迟几乎为零。
消费者可以设置数据不存在时的阻塞时间，来减少不必要的轮询。

注意
你以为上面的方案很完美？还有个问题需要解决：空闲连接的问题。
如果线程一直阻塞在那里，Redis客户端的连接就成了闲置连接，闲置过久，服务器一般会主动断开连接，减少闲置资源占用，这个时候blpop和brpop或抛出异常；
所以在编写客户端消费者的时候要小心，如果捕获到异常，还有重试。

该模式有很多优点：
1.实现简单
2.Redis支持持久化消息，意味着消息不会丢失，可以重复查看（注意不是消费，只看不用，LRANGE类的指令）。Redis支持消息持久化，在服务端数据是安全的
3.可以保证顺序，保证使用LPUSH命令，可以保证消息的顺序性
4.使用RPUSH，可以将消息放在队列的开头，达到优先消息的目的，可以实现简易的消息优先队列。

缺点：
1.做消费者确认ACK麻烦，不能保证消费者消费消息后是否成功处理的问题（宕机或处理异常等），通常需要维护一个Pending列表，保证消息处理确认。
2.不能做广播模式，如pub/sub，消息发布/订阅模型
3.不能重复消费，一旦消费就会被删除，导致客户端数据是不安全的，当客户端宕机、网络断开会出现数据丢失
4.不支持分组消费，需要自己在业务逻辑层解决
如何实现：Redis应用-异步消息队列与延时队列

基于List的 LPUSH+LRANGE+RPOP 的实现
针对客户端数据安全进行的优化方案，使用LRANGE首先对队列元素只做读取不做消费，在客户端消费完成后，再使用RPOP对服务端进行消费。
由于LRANGE不是阻塞的就又回到了方案二解决的资源浪费问题上了，无法减少不必要的轮询。
还存在重复执行的问题，由于先读再消费，在消费者宕机重启后会再次读到没有确认消费的但是已经在消费者处理过的元素，就有了重复消费的风险。真的是绕啊。

基于List的 LPUSH+BRPOP，LPUSH+LREM 的实现
对客户端数据安全进行的优化方案，是一种安全的队列，虽然也会存在重复消费的风险，但是元素队列的操作都是在服务端进行的，问题发生的概率会大大降低。
首先消息队列数量增加了，一个存储待消费消息的队列暂且称为A，一个存储正在消费消息的队列暂且称为B，关键在与BRPOP+LPUSH操作，RPOP+LPUSH是将A中队尾的消息删除（消费掉）并添加到B队列的队头。
BRPOP+LPUSH是原子性的操作，所以不用担心从A中消费后数据丢失的问题。
BRPOP+LPUSH是阻塞的，不过你也可以使用RPOP+LPUSH非阻塞模式。

这个方案当然也不是完美的，还是存在客户端宕机的情况，正在处理中的队列存在长期不消费的消息怎么办？
可以再添加一台客户端来监控长期不消费的消息，重新将消息打回待消费的队列，这个可以使用循环队列的模式来实现。

PUB/SUB,订阅/发布模式

SUBSCRIBE，用于订阅信道
PUBLISH，向信道发送消息
UNSUBSCRIBE，取消订阅

此模式允许生产者只生产一次消息，由中间件负责将消息复制到多个消息队列，每个消息队列由对应的消费组消费。

优点
1.典型的广播模式，一个消息可以发布到多个消费者
2.多信道订阅，消费者可以同时订阅多个信道，从而接收多类消息
3.消息即时发送，消息不用等待消费者读取，消费者会自动接收到信道发布的消息

缺点
1.消息一旦发布，不能接收。换句话就是发布时若客户端不在线，则消息丢失，不能寻回
2.不能保证每个消费者接收的时间是一致的
3.若消费者客户端出现消息积压，到一定程度，会被强制断开，导致消息意外丢失。通常发生在消息的生产远大于消费速度时
可见，Pub/Sub 模式不适合做消息存储，消息积压类的业务，而是擅长处理广播，即时通讯，即时反馈的业务。

基于Sorted-Set的实现

Sortes Set(有序列表)，类似于java的SortedSet和HashMap的结合体，一方面她是一个set，保证内部value的唯一性，另一方面它可以给每个value赋予一个score，代表这个value的排序权重。内部实现是“跳跃表”。
有序集合的方案是在自己确定消息顺ID时比较常用，使用集合成员的Score来作为消息ID，保证顺序，还可以保证消息ID的单调递增。通常可以使用时间戳+序号的方案。确保了消息ID的单调递增，利用SortedSet的依据Score排序的特征，就可以制作一个有序的消息队列了。

优点
就是可以自定义消息ID，在消息ID有意义时，比较重要。
缺点
缺点也明显，不允许重复消息（因为是集合），同时消息ID确定有错误会导致消息的顺序出错。

消息延时队列
延时队列可以通过 Redis 的 zset(有序列表) 来实现。我们将消息序列化成一个字符串作为 zset 的value，这个消息的到期处理时间作为score，然后用多个线程轮询 zset 获取到期的任务进行处理，多个线程是为了保障可用性，万一挂了一个线程还有其它线程可以继续处理。因为有多个线程，所以需要考虑并发争抢任务，确保任务不能被多次执行。 Redis 的 zrem 方法是多线程多进程争抢任务的关键，它的返回值决定了当前实例有没有抢到任务，因为 loop 方法可能会被多个线程、多个进程调用，同一个任务可能会被多个进程线程抢到，通过 zrem 来决定唯一的属主。同时，我们要注意一定要对 handle_msg 进行异常捕获，避免因为个别任务处理问题导致循环异常退出。

问题来了：
同一个任务可能会被多个进程取到之后再使用 zrem 进行争抢，那些没抢到的进程都是白取了一次任务，这是浪费。解决办法：Lua是Redis内置脚本，执行Lua脚本时，Redis线程会依次执行脚本中的语句，对于客户端来说操作是原子性的，将 zrangebyscore 和 zrem 一同挪到服务器端进行原子化操作，这样多个进程之间争抢任务时就不会出现这种浪费了。

基于Stream类型的实现

Stream为redis 5.0后新增的数据结构。支持多播的可持久化消息队列，实现借鉴了Kafka设计。
在这里插入图片描述

Redis Stream的结构如上图所示，它有一个消息链表，将所有加入的消息都串起来，每个消息都有一个唯一的ID和对应的内容。消息是持久化的，Redis重启后，内容还在。

每个Stream都有唯一的名称，它就是Redis的key，在我们首次使用xadd指令追加消息时自动创建。

每个Stream都可以挂多个消费组，每个消费组会有个游标last_delivered_id在Stream数组之上往前移动，表示当前消费组已经消费到哪条消息了。每个消费组都有一个Stream内唯一的名称，消费组不会自动创建，它需要单独的指令xgroup create进行创建，需要指定从Stream的某个消息ID开始消费，这个ID用来初始化last_delivered_id变量。

每个消费组(Consumer Group)的状态都是独立的，相互不受影响。也就是说同一份Stream内部的消息会被每个消费组都消费到。

同一个消费组(Consumer Group)可以挂接多个消费者(Consumer)，这些消费者之间是竞争关系，任意一个消费者读取了消息都会使游标last_delivered_id往前移动。每个消费者有一个组内唯一名称。

消费者(Consumer)内部会有个状态变量pending_ids，它记录了当前已经被客户端读取的消息，但是还没有ack。如果客户端没有ack，这个变量里面的消息ID会越来越多，一旦某个消息被ack，它就开始减少。这个pending_ids变量在Redis官方被称之为PEL，也就是Pending Entries List，这是一个很核心的数据结构，它用来确保客户端至少消费了消息一次，而不会在网络传输的中途丢失了没处理。

增删改查
xadd 追加消息
xdel 删除消息，这里的删除仅仅是设置了标志位，不影响消息总长度
xrange 获取消息列表，会自动过滤已经删除的消息
xlen 消息长度
del 删除Stream

独立消费
我们可以在不定义消费组的情况下进行Stream消息的独立消费，当Stream没有新消息时，甚至可以阻塞等待。Redis设计了一个单独的消费指令xread，可以将Stream当成普通的消息队列(list)来使用。使用xread时，我们可以完全忽略消费组(Consumer Group)的存在，就好比Stream就是一个普通的列表(list)。

创建消费组
Stream通过xgroup create指令创建消费组(Consumer Group)，需要传递起始消息ID参数用来初始化last_delivered_id变量。

消费
Stream提供了xreadgroup指令可以进行消费组的组内消费，需要提供消费组名称、消费者名称和起始消息ID。它同xread一样，也可以阻塞等待新消息。读到新消息后，对应的消息ID就会进入消费者的PEL(正在处理的消息)结构里，客户端处理完毕后使用xack指令通知服务器，本条消息已经处理完毕，该消息ID就会从PEL中移除。

Stream消息太多怎么办
读者很容易想到，要是消息积累太多，Stream的链表岂不是很长，内容会不会爆掉就是个问题了。xdel指令又不会删除消息，它只是给消息做了个标志位。
Redis自然考虑到了这一点，所以它提供了一个定长Stream功能。在xadd的指令提供一个定长长度maxlen，就可以将老的消息干掉，确保最多不超过指定长度。

127.0.0.1:6379> xlen codehole
(integer) 5
127.0.0.1:6379> xadd codehole maxlen 3 * name xiaorui age 1
1527855160273-0
127.0.0.1:6379> xlen codehole
(integer) 3

我们看到Stream的长度被砍掉了。

消息如果忘记ACK会怎样？
Stream在每个消费者结构中保存了正在处理中的消息ID列表PEL，如果消费者收到了消息处理完了但是没有回复ack，就会导致PEL列表不断增长，如果有很多消费组的话，那么这个PEL占用的内存就会放大。

PEL如何避免消息丢失？
在客户端消费者读取Stream消息时，Redis服务器将消息回复给客户端的过程中，客户端突然断开了连接，消息就丢失了。但是PEL里已经保存了发出去的消息ID。待客户端重新连上之后，可以再次收到PEL中的消息ID列表。不过此时xreadgroup的起始消息必须是任意有效的消息ID，一般将参数设为0-0，表示读取所有的PEL消息以及自last_delivered_id之后的新消息。

分区Partition
Redis没有原生支持分区的能力，想要使用分区，需要分配多个Stream，然后在客户端使用一定的策略来讲消息放入不同的stream。

结论
Stream的消费模型借鉴了kafka的消费分组的概念，它弥补了Redis Pub/Sub不能持久化消息的缺陷。但是它又不同于kafka，kafka的消息可以分partition，而Stream不行。如果非要分parition的话，得在客户端做，提供不同的Stream名称，对消息进行hash取模来选择往哪个Stream里塞。如果读者稍微研究过Redis作者的另一个开源项目Disque的话，这极可能是作者意识到Disque项目的活跃程度不够，所以将Disque的内容移植到了Redis里面。这只是本人的猜测，未必是作者的初衷。如果读者有什么不同的想法，可以在评论区一起参与讨论。

限流算法实现

string

每天学点新东西

关注

0
点赞
踩
3

收藏

觉得还不错? 一键收藏
0
评论
数据结构设计以及常见算法总结

这里写目录标题一级目录二级目录三级目录一级目录二级目录三级目录一.基本数据结构leetcode 155 最小栈：维护两个栈，一个原来的栈，一个最小栈；压入元素时，最小栈的压栈策略：栈顶和当前压入元素比较选小的压class MinStack {public: stack<int> st; stack<int> minst; /** initialize your data structure here. */ MinStack() {
复制链接

扫一扫

专栏目录