Skip list

最新推荐文章于 2024-08-10 22:25:36 发布

疯疯癫癫

最新推荐文章于 2024-08-10 22:25:36 发布

阅读量2.6k

点赞数 2

分类专栏： Algorithm 文章标签： skiplist algorithm

本文链接：https://blog.csdn.net/fengfengdiandia/article/details/52097146

版权

Algorithm 专栏收录该内容

1 篇文章 0 订阅

订阅专栏

一什么是跳跃表
- 概念
- 组成
二跳跃表数据结构
三操作
四测试
参考文献
skiplist 代码

疑问：leveldb 和 redis 都使用了 skip list，它们为什么要使用 skip list？

跳跃表（skiplist）是一种链表，它在链表的基础上增加了跳跃功能，使得在查找元素时，跳表能够提供 O(log N) 的时间复杂度。

像红黑树这样的数据结构查找的时间复杂度也是 O(log N)，但是相比实现一颗红黑树，跳跃表的实现要简单得多。

我们通过下面单链表和跳跃表的查找对比先来感受下跳跃表的优势。

单链表

图片标题

下面这个就是跳跃表

图片标题

假设我们要查找17这个点，链表自然是顺序查找了。
下图是用跳跃表查找17，过程跳过了3,7,12 这些点，这就是跳跃表的特点。

图片标题

一、什么是跳跃表

概念

跳跃表 由 William Pugh 发明并于 1990 年 6 月发表在 Communications of the ACM，文章题目为 Skip lists: a probabilistic alternative to balanced trees。

引用 William Pugh 的话：

Skip lists are a data structure that can be used in place of balanced trees.
Skip lists use probabilistic balancing rather than strictly enforced balancing and as a result the algorithms for insertion and deletion in skip lists are much simpler and significantly faster than equivalent algorithms for balanced trees.

跳跃表是一种可用于替换平衡树的数据结构。
跳跃表使用概率均衡而非强制均衡，因此跳跃表的插入和删除比平衡树更简单快捷。

组成

表头（head）：负责维护跳跃表的节点指针。
跳跃表节点：保存着元素值，以及多个层。
层：保存着指向其他元素的指针。高层的指针越过的元素数量大于等于低层的指针，为了提高查找的效率，程序总是从高层先开始访问，然后随着元素值范围的缩小，慢慢降低层次。
表尾：全部由 NULL 组成，表示跳跃表的末尾。

那么，这个跳跃的功能是怎么实现的？
为什么能够提供跟查找树一样的 O(log N) 时间复杂度？

二、跳跃表数据结构

节点

struct Node {
  int key;
  int value;
  Node* forward_[1];  // 保存着指向其他节点的指针
};

skip list

typedef struct List_* List;
struct List_ {
  int level;
  Node* head;  // 头结点，负责维护跳跃表的节点指针
};

SkipList 的实现

class SkipList {
 public:
  SkipList() : rnd_(0xdeadbeef) { NewList(); }
  ~SkipList() { FreeList(); }

  bool Insert(int key, int value);
  bool Delete(int key);
  bool Search(int key);
  void Print();

  enum { kMaxLevel = 16 };  // 最大层数

 private:
  void NewList();
  void FreeList();

  inline int GetMaxHeight() const { return list_->level; }
  inline void SetMaxHeight(int level) { list_->level = level; }
  void NewNode(int level, Node*& n);
  int RandomHeight();

  List list_;
  Random rnd_;  // 随机数生成器

  SkipList(const SkipList&);
  void operator=(const SkipList&);
};

随机数生成器

这里使用了 leveldb 中的随机数生成器，也可以用标准库中的 rand() 函数

// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.

#ifndef RANDOM_H
#define RANDOM_H

#include <stdint.h>

// A very simple random number generator.  Not especially good at
// generating truly random bits, but good enough for our needs in this
// package.
class Random {
 private:
  uint32_t seed_;
 public:
  explicit Random(uint32_t s) : seed_(s & 0x7fffffffu) {
    // Avoid bad seeds.
    if (seed_ == 0 || seed_ == 2147483647L) {
      seed_ = 1;
    }
  }
  uint32_t Next() {
    static const uint32_t M = 2147483647L;   // 2^31-1
    static const uint64_t A = 16807;  // bits 14, 8, 7, 5, 2, 1, 0
    // We are computing
    //       seed_ = (seed_ * A) % M,    where M = 2^31-1
    //
    // seed_ must not be zero or M, or else all subsequent computed values
    // will be zero or M respectively.  For all other values, seed_ will end
    // up cycling through every number in [1,M-1]
    uint64_t product = seed_ * A;

    // Compute (product % M) using the fact that ((x << 31) % M) == x.
    seed_ = static_cast<uint32_t>((product >> 31) + (product & M));
    // The first reduction may overflow by 1 bit, so we may need to
    // repeat.  mod == M is not possible; using > allows the faster
    // sign-bit-based test.
    if (seed_ > M) {
      seed_ -= M;
    }
    return seed_;
  }
  // Returns a uniformly distributed value in the range [0..n-1]
  // REQUIRES: n > 0
  uint32_t Uniform(int n) { return Next() % n; }

  // Randomly returns true ~"1/n" of the time, and false otherwise.
  // REQUIRES: n > 0
  bool OneIn(int n) { return (Next() % n) == 0; }

  // Skewed: pick "base" uniformly from range [0,max_log] and then
  // return "base" random bits.  The effect is to pick a number in the
  // range [0,2^max_log-1] with exponential bias towards smaller numbers.
  uint32_t Skewed(int max_log) {
    return Uniform(1 << Uniform(max_log + 1));
  }
};

#endif  // RANDOM_H_

三、操作

初始化

void SkipList::NewList() {
  list_ = (List) malloc(sizeof(List_));
  assert(list_ != NULL);

  list_->level = 0;
  NewNode(kMaxLevel - 1, list_->head);

  for (int i = 0; i < kMaxLevel; i++) {  // 将每层 head 节点置空
    list_->head->forward_[i] = NULL;
  }
}

void SkipList::NewNode(int level, Node*& n) {
  n = (Node *) malloc(sizeof(Node) + level * sizeof(Node *));
  assert(n != NULL);
}

插入

bool SkipList::Insert(int key, int value) {
  Node* update[kMaxLevel];
  Node* x = list_->head;

  for (int i = list_->level - 1; i >= 0; i--) {
    Node* next;
    while ((next = x->forward_[i]) && (next->key < key)) {
      x = next;
    }

    update[i] = x;
  }

  if (x->key == key) {  // 不允许重复 key
    return false;
  }

  int level = RandomHeight();  // 随机生成 level
  if (level > GetMaxHeight()) {
    for (int i = GetMaxHeight(); i < level; i++) {
      update[i] = list_->head;
    }

    SetMaxHeight(level);
  }

  // make node
  Node* newNode;
  NewNode(level, newNode);
  newNode->key = key;
  newNode->value = value;

  for (int i = 0; i < level; i++) {
    newNode->forward_[i] = update[i]->forward_[i];
    update[i]->forward_[i] = newNode;
  }

  return true;
}

插入数据过程请看下图：

这里就回答了文章开始的时候提出的疑问 为什么能够提供跟查找树一样的 O(log N) 时间复杂度？
我们每次在进行查询操作的时候，都会从最高一层开始找，由于链表是有序的，所以期望的时间复杂度是 O(log N)，当然最坏情况下的时间复杂度是 O(N)，不过这种情况通常不会遇到。

图片标题

再附一张来自 wikipedia 插入元素的 gif 图

图片标题

删除

bool SkipList::Delete(int key) {
  Node* update[kMaxLevel];
  Node* x = list_->head;
  Node* next = NULL;

  for (int i = GetMaxHeight() - 1; i >= 0; i--) {
    while ((next = x->forward_[i]) && (next->key < key)) {
      x = next;
    }

    update[i] = x;
  }

  if (next->key != key)
    return false;

  // 删除每层的 key 节点
  for (int i = 0; i < GetMaxHeight(); i++) {
    if (update[i]->forward_[i] == next)
      update[i]->forward_[i] = next->forward_[i];
  }

  free(next);

  // 如果删除的是最大层的节点，需要重新维护跳跃表的层
  for (int i = GetMaxHeight() - 1; i >= 0; i--) {
    if (list_->head->forward_[i] == NULL) 
      list_->level--;
  }

  return true;
}

四、测试

#include <iostream>
#include <sys/time.h>
#include "skiplist.h"

int main()
{
  struct timeval start, end;
  gettimeofday(&start, NULL);

  SkipList list;

  // 插入 key 为 1 ~ 100 的节点
  for (int i = 1; i <= 100; i++) {
    list.Insert(i, i);
  }
  list.Print();

  // 删除 key 为 20 的节点
  bool d_ok = list.Delete(20);
  if (d_ok) {
    std::cout << std::endl << "Delete OK\n\n";
    list.Print();
  }

  gettimeofday(&end, NULL);
  long time_used = end.tv_sec * 1000000 + end.tv_usec - start.tv_sec * 1000000 - start.tv_usec;
  printf("In %ld usec\n", time_used);

  return 0;
}