leveldb源码学习之跳表skiplist

最新推荐文章于 2023-03-28 15:40:27 发布

空腹熊猫

最新推荐文章于 2023-03-28 15:40:27 发布

阅读量168

点赞数

分类专栏： leveldb

本文链接：https://blog.csdn.net/guangyacyb/article/details/88183219

版权

leveldb 专栏收录该内容

15 篇文章 0 订阅

订阅专栏

Skip list(跳跃表）是一种可以代替平衡树的数据结构。Skip lists应用概率保证平衡，平衡树采用严格的旋转（比如平衡二叉树有左旋右旋）来保证平衡，因此Skip list比较容易实现，而且相比平衡树有着较高的运行效率。

由于skip list比较简单，实现起来会比较容易，虽然和平衡树有着相同的时间复杂度(O(logn))，但是skip list的常数项相对小很多。skip list在空间上也比较节省。一个节点平均只需要1.333个指针（甚至更少），并且不需要存储保持平衡的变量。

在Leveldb中，skip list是实现memtable的核心数据结构，memtable的KV数据都存储在skip list中。

跳表类定义

template <typename Key, class Comparator>
class SkipList {
 private:
  struct Node;

 public:
  // 创建SkipList，以"cmp"作为key的比较器
  // 使用"*arena"分配内存，分配内存的对象必须在跳表生命周期中保持有效.
  explicit SkipList(Comparator cmp, Arena* arena);

  SkipList(const SkipList&) = delete; // 禁用拷贝构造
  SkipList& operator=(const SkipList&) = delete; //禁用赋值

  // 往跳表插入key，需要key不存.
  void Insert(const Key& key);

  // 若跳表中key存在，返回true.
  bool Contains(const Key& key) const;

  // 迭代器
  class Iterator {
   public:
    // 初始化跳表迭代器， 但返回值不指向有效节点.
    explicit Iterator(const SkipList* list);

    // 若迭代器指向有效节点，返回true.
    bool Valid() const;

    // 返回当前位置key，Valid()必须是true
    const Key& key() const;

    // 访问下一个位置，Valid()必须是true
    void Next();

    // 访问上一个位置，Valid()必须是true
    void Prev();

    // 搜索第一个key>=target的节点
    void Seek(const Key& target);

    // 定位到第一个节点，若跳表非空，则此时迭代器Valid为true.
    void SeekToFirst();

    // 定位到最后一个节点，若跳表非空，则此时迭代器Valid为true.
    void SeekToLast();

   private:
    const SkipList* list_; // 指向跳表
    Node* node_;
    // Intentionally copyable
  };

 private:
  enum { kMaxHeight = 12 };

  inline int GetMaxHeight() const {
    return max_height_.load(std::memory_order_relaxed);
  }

  Node* NewNode(const Key& key, int height);
  int RandomHeight();
  bool Equal(const Key& a, const Key& b) const { return (compare_(a, b) == 0); }

  // 若key大于n 节点的key值，返回true
  bool KeyIsAfterNode(const Key& key, Node* n) const;

  // 搜索首个key大于等于给定key的节点 key，若不存在返回nullptr.
  //若前驱prev非空，则[0..max_height_-1] 每一层的前驱都赋值为该层prev
  Node* FindGreaterOrEqual(const Key& key, Node** prev) const;

  // 返回key < 给定key的最后一个节点，若不存在返回头指针head_ 
  Node* FindLessThan(const Key& key) const;

  // 返回末尾节点，若跳表为空返回头指针head_ 
  Node* FindLast() const;

  // 构造后不可修改
  Comparator const compare_;
  Arena* const arena_;  // 用于节点的空间分配

  Node* const head_;  //头指针

  // 只有 Insert() 操作会修改.  极少读取，旧的值也可接受.
  std::atomic<int> max_height_;  // 跳表高度

  // 只有 Insert() 会读写.
  Random rnd_;
};

内部节点定义

template <typename Key, class Comparator>
struct SkipList<Key, Comparator>::Node {
  explicit Node(const Key& k) : key(k) {}

  Key const key;

  // next指针的访问/修改器，封装在函数里，可以添加必要的同步限制如memory_order.
  Node* Next(int n) {
    assert(n >= 0);
    // memory_order_acquire 表示当前线程读写指令不能被重排到此操作之前，其他线程对此变量的写入对当前线程可见.
    return next_[n].load(std::memory_order_acquire);
  }
  void SetNext(int n, Node* x) {
    assert(n >= 0);
    // memory_order_release表示当前线程的读写指令不能被重排到该操作之后，且此操作对其他读写此变量的线程可见。.
    next_[n].store(x, std::memory_order_release);
  }

  // 部分场景下可以无障碍使用的变量，memory_order_relaxed 表示无同步或顺序约束.
  Node* NoBarrier_Next(int n) {
    assert(n >= 0);
    return next_[n].load(std::memory_order_relaxed);
  }
  void NoBarrier_SetNext(int n, Node* x) {
    assert(n >= 0);
    next_[n].store(x, std::memory_order_relaxed);
  }

 private:
  // 节点每一层的后继指针.  next_[0]是最底层的后继指针.
  std::atomic<Node*> next_[1];
};

创建节点

template <typename Key, class Comparator>
typename SkipList<Key, Comparator>::Node* SkipList<Key, Comparator>::NewNode(
    const Key& key, int height) {
  char* const node_memory = arena_->AllocateAligned(
      sizeof(Node) + sizeof(std::atomic<Node*>) * (height - 1)); // 分配空间，包括节点自身和每一层的后继指针
  return new (node_memory) Node(key); // placement new 在指定地址调构造函数
}

迭代器函数实现

// 根据list初始化，但是此时node不指向任何节点
template<typename Key, class Comparator>
inline SkipList<Key,Comparator>::Iterator::Iterator(const SkipList* list) {
  list_ = list;
  node_ = nullptr;
}

// 是否合法：node是否为空
template<typename Key, class Comparator>
inline bool SkipList<Key,Comparator>::Iterator::Valid() const {
  return node_ != nullptr;
}

template<typename Key, class Comparator>
inline const Key& SkipList<Key,Comparator>::Iterator::key() const {
  assert(Valid());
  return node_->key;
}

template<typename Key, class Comparator>
inline void SkipList<Key,Comparator>::Iterator::Next() {
  assert(Valid());
  node_ = node_->Next(0);
}

template<typename Key, class Comparator>
inline void SkipList<Key,Comparator>::Iterator::Prev() {
  // 不使用 "prev" 指针, 用搜索key之前的节点（为什么？）.
  assert(Valid());
  node_ = list_->FindLessThan(node_->key);
  if (node_ == list_->head_) {
    node_ = nullptr;
  }
}

template<typename Key, class Comparator>
inline void SkipList<Key,Comparator>::Iterator::Seek(const Key& target) {
  node_ = list_->FindGreaterOrEqual(target, nullptr);
}

template<typename Key, class Comparator>
inline void SkipList<Key,Comparator>::Iterator::SeekToFirst() {
  node_ = list_->head_->Next(0);
}

template<typename Key, class Comparator>
inline void SkipList<Key,Comparator>::Iterator::SeekToLast() {
  node_ = list_->FindLast();
  if (node_ == list_->head_) {
    node_ = nullptr;
  }
}

这里迭代器访问前驱直接用find 代替了，find的复杂度是logn，二分查找，并不慢。如果是增加prev指针，可能还涉及指针的存储空间还有元素增删时的维护，并不简单。

跳表函数实现

// 以1/kBranching 概率增加高度
template<typename Key, class Comparator>
int SkipList<Key,Comparator>::RandomHeight() {
  static const unsigned int kBranching = 4;
  int height = 1;
  while (height < kMaxHeight && ((rnd_.Next() % kBranching) == 0)) {
    height++;
  }
  assert(height > 0);
  assert(height <= kMaxHeight);
  return height;
}

// 判断 key是否应该排在n节点后
template<typename Key, class Comparator>
bool SkipList<Key,Comparator>::KeyIsAfterNode(const Key& key, Node* n) const {
  //n 为空表示无穷大
  return (n != nullptr) && (compare_(n->key, key) < 0);
}

// 找出key 大于等于所给key的节点，若所给的prev参数不为空，则返回的时候prev每个元素是key在每一层的前驱节点（666）
template<typename Key, class Comparator>
typename SkipList<Key,Comparator>::Node* 
SkipList<Key,Comparator>::FindGreaterOrEqual(const Key& key, Node** prev)
    const {
  Node* x = head_; // 头结点
  int level = GetMaxHeight() - 1; // 跳表高度，参考本文开头的跳表结构
  while (true) {
    Node* next = x->Next(level);
    if (KeyIsAfterNode(key, next)) {
      // Keep searching in this list
      x = next;
    } else {
      if (prev != nullptr) prev[level] = x;
      if (level == 0) { // 第0层，元素都在这层，所以key的后继应该就是next所指节点。
        return next;
      } else {
        // 跳到下一层（更多元素）寻找
        level--;
      }
    }
  }
}

// 寻找key的前驱，和寻找大于等于key的函数类似，从高层到底层，逐渐缩小范围
template<typename Key, class Comparator>
typename SkipList<Key,Comparator>::Node*
SkipList<Key,Comparator>::FindLessThan(const Key& key) const {
  Node* x = head_;
  int level = GetMaxHeight() - 1;
  while (true) {
    assert(x == head_ || compare_(x->key, key) < 0);
    Node* next = x->Next(level);
    if (next == nullptr || compare_(next->key, key) >= 0) {
      if (level == 0) {
        return x;
      } else {
        // Switch to next list
        level--;
      }
    } else {
      x = next;
    }
  }
}

// 寻找末尾元素
template<typename Key, class Comparator>
typename SkipList<Key,Comparator>::Node* SkipList<Key,Comparator>::FindLast()
    const {
  Node* x = head_;
  int level = GetMaxHeight() - 1;
  while (true) {
    Node* next = x->Next(level);
    if (next == nullptr) {
      if (level == 0) {
        return x;
      } else {
        // Switch to next list
        level--;
      }
    } else {
      x = next;
    }
  }
}

// 跳表构造函数
template<typename Key, class Comparator>
SkipList<Key,Comparator>::SkipList(Comparator cmp, Arena* arena)
    : compare_(cmp),
      arena_(arena),
      head_(NewNode(0 /* any key will do */, kMaxHeight)),
      max_height_(1),
      rnd_(0xdeadbeef) {
  for (int i = 0; i < kMaxHeight; i++) {
    head_->SetNext(i, nullptr);
  }
}

// 跳表插入元素
template<typename Key, class Comparator>
void SkipList<Key,Comparator>::Insert(const Key& key) {
  // TODO(opt): We can use a barrier-free variant of FindGreaterOrEqual()
  // here since Insert() is externally synchronized.
  Node* prev[kMaxHeight];
  Node* x = FindGreaterOrEqual(key, prev); // 找到待插入key的所有前驱

  // 不允许重复key
  assert(x == nullptr || !Equal(key, x->key));

  // 随机增加高度
  int height = RandomHeight();
  if (height > GetMaxHeight()) {
    for (int i = GetMaxHeight(); i < height; i++) {
      prev[i] = head_;
    }
    // It is ok to mutate max_height_ without any synchronization
    // with concurrent readers.  A concurrent reader that observes
    // the new value of max_height_ will see either the old value of
    // new level pointers from head_ (nullptr), or a new value set in
    // the loop below.  In the former case the reader will
    // immediately drop to the next level since nullptr sorts after all
    // keys.  In the latter case the reader will use the new node.
    max_height_.store(height, std::memory_order_relaxed);
  }

 // 遍历每一层，设置好前驱后继
  x = NewNode(key, height);
  for (int i = 0; i < height; i++) {
    // NoBarrier_SetNext() suffices since we will add a barrier when
    // we publish a pointer to "x" in prev[i].
    x->NoBarrier_SetNext(i, prev[i]->NoBarrier_Next(i));
    prev[i]->SetNext(i, x);
  }
}

// 是否包含key
template<typename Key, class Comparator>
bool SkipList<Key,Comparator>::Contains(const Key& key) const {
  Node* x = FindGreaterOrEqual(key, nullptr);
  if (x != nullptr && Equal(key, x->key)) {
    return true;
  } else {
    return false;
  }
}

空腹熊猫

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
leveldb源码学习之跳表skiplist

Skip list(跳跃表）是一种可以代替平衡树的数据结构。Skip lists应用概率保证平衡，平衡树采用严格的旋转（比如平衡二叉树有左旋右旋）来保证平衡，因此Skip list比较容易实现，而且相比平衡树有着较高的运行效率。由于skip list比较简单，实现起来会比较容易，虽然和平衡树有着相同的时间复杂度(O(logn))，但是skip list的常数项相对小很多。skip list在空...
复制链接

扫一扫