小肥柴慢慢手写数据结构（C篇）（2-5 SkipList 跳表）

卷毛迷你猪

已于 2023-12-06 09:10:35 修改

阅读量380

点赞数 1

分类专栏：数据结构文章标签：数据结构

于 2020-12-02 16:22:38 首次发布

本文链接：https://blog.csdn.net/weixin_39635634/article/details/109810036

版权

数据结构专栏收录该内容

24 篇文章 2 订阅

订阅专栏

小肥柴慢慢手写数据结构（C篇）（2-5 跳表）

对于学过一些数据结构知识的朋友：跳表就是一种概率性替代平衡树（AVL）的数据结构，并且不需要平衡和储存优先级信息。
对没有学过数据结构的朋友：跳表类似于你在查字典时看到的目录，方便你能快速的找到你希望看到的章节内容所在的具体页码；映射到数据结构知识上，我们尝试把原始的链表人为加上索引（加一层），让寻找某一节点的操作更快，是典型的“空间换时间”策略的应用。如下图（直接贴原作者论文图，侵删）：

看了很多帖子，最后根据参考资料[1]找到原作者论文，发现上图才是跳表的本质，即每个节点中有一个存放指向不同层其他节点的指针数组！。结合图中e行说明：
（1）内容为6的节点包含4个指针，分别指向不同层的链表，分别是（我们用level表示层序号）
level 3：6->NULL
level 2：6->25->NULL
level 1：6->9->17->25->NULL
level 0：3->6->7->9->12->17->19->21->25->26->NULL （最低层，原始链表）
（2）内容为3的底层（原始链表）节点不一定会跟着长高，成为整个跳表的头节点（header）；
（3）若想找到一个节点，例如17，从header=6处开始寻找
a. level3层仅一个节点6，下降一层寻找
b. level2层6<17<25，继续下降一层
c. level1层找到节点17，done
可以看到，借用跳表我们查找17节点用了3次操作；对比遍历level0层的方法（需要6次操作），运行时间上是有优势的；如果我们把问题规模扩大：设想现在有1000个节点的链表，要求你去找到目标节点，那么跳表的效率就很可观了！相当于用二分法查找一个数据，且每一层都帮提前划分好了。

在参考资料[1]/[2]/[3]/[4]中，均对跳表做了详细的描述，只是[4]的实现有些另类，我们放在后面讲解。

2-15 参考论文的实现

2-15-1 初始化

节点结构

typedef int keyType;
typedef int valueType;

typedef struct Node {
	keyType key;	 // 关键字字段 
	valueType value; // 内容字段 
	struct Node * forward[1];  // 从这个节点扩展出去的所有转移链 
} Node;						   // 原始论文中用了forward，不是next

跳表结构

typedef struct SkipList {
	int level;
	Node * header;
} SkipList;

节点malloc宏，注意分配包含了指针数组的空间

#define NEW_NODE(n) ((Node*)malloc(sizeof(Node) + n * sizeof(Node*)))

生成节点 Node * makeNode(int level, keyType key, valueType value)

Node * makeNode(int level, keyType key, valueType value){
	Node *x = NEW_NODE(level);
	if(x == NULL)
		return NULL;
	
	x->key = key;
	x->value = value;
	return x;
}

初始化跳表

SkipList * createList(){
	SkipList *list = (SkipList*)malloc(sizeof(SkipList));
	if (list == NULL) 
		return NULL;
		
	list->level = 0; //从第0层开始，即level 0
	list->header = makeNode(MAX_LEVEL - 1, 0, 0);
	if (list->header == NULL) {
		free(list);
		return NULL;
	}
	
	int level;
	for(level = 0; level < MAX_LEVEL - 1; level++) //初始化head的转移表，均没有指向 
		list->header->forward[level] = NULL;
		
	return list;
}

2-15-2 插入节点

先看原理图，注意缓存 *update[ ] 这个指针数组的使用
在这里插入图片描述

再对照论文中算法伪代码，很快就能完成
在这里插入图片描述
常翻《算法导论》或者没事肝论文的盆友对上面的伪代码应该是比较亲切的，可能存在理解障碍的地方，我都做了注释，简单介绍流程：
step1：通过key找到目标节点
step2：key已经存在，替换节点内容
step3-1：随机晋升节点
step3-2：尝试插入新节点

其中需要本地（local）缓存当前查询路径，且指针的操作与缓存要注意细节。

int insert(SkipList *list, keyType searchKey, valueType newValue){
	Node *update[MAX_LEVEL]; //实质上缓存了查询路径 
	Node *x = list->header;
	Node *target;  //缓存当前x->forward[i]指向 
	int i; 
	//step1 通过key找到目标节点 
	for(i = list->level - 1; i >= 0; i--){
		while((target = x->forward[i]) && target->key < searchKey)
			x = target;
		update[i] = x;
	}
	
	//step2 key已经存在，替换节点内容 
	if(target && target->key == searchKey){
		target->value = newValue;
		return DONE;
	}
	
	//step3-1 随机晋升 
	int newLevel = randomLevel();
	if(newLevel > list->level){
		for(i = list->level; i < newLevel; i++)
			update[i] = list->header;
		list->level = newLevel;
	}
	
	//step3-2 尝试插入新节点 
	x = makeNode(newLevel, searchKey, newValue);
	if(x == NULL)
		return DONE;
		
	for(i = newLevel - 1; i >= 0; i--){
		x->forward[i] = update[i]->forward[i];
		update[i]->forward[i] = x;
	}
	return OK;
}

此处对随机取层，原论文中算法设计如下
在这里插入图片描述
我们遵照这个设计，定义了一个晋升几率PROMOTE_RATE，即上图中的标记p；默认概率为0.5，即期望有1/2的节点形成上一层（upper），但这仅仅是一种简单的限制条件；对比原论文，我们还做了层数的限制。

#define PROMOTE_RATE (0.5)

获取[0,1)随机数宏

#define ROLL() (rand() * (1.0 / RAND_MAX))

其实网上有的实现采用以下形式，也是可取的：

int random_level() {
    int level = 1;
    while (rand() % 2) {  //不是奇数就是偶数，但相对来讲这种设计概率调整不方便
        level++;
    }
    level = (level < MAX_LEVEL) ? level : MAX_LEVEL;
    return level;
}

2-15-3 查询（这个才是我们想要的核心功能）

在这里插入图片描述

valueType *search(SkipList *list, keyType searchKey){
	Node *x = list->header;
	Node *target; 
	int i;
	for(i = list->level - 1; i >= 0; i--){
		while((target = x->forward[i]) && target->key < searchKey)
			x = target;
		if(target && target->key == searchKey)
			return &(target->value);
	}
	return NULL;
}

2-15-4 打印

全层打印

void printAll(SkipList *list){
	Node *x;
	int i;
	printf("\nshow skip list : \n");
	for(i = list->level - 1; i >= 0; i--){ 
		x = list->header->forward[i];
		printf("level %d:\t", i + 1);
        while (x != NULL) {
            printf("(%d,%d)\t", x->key, x->value);
            x = x->forward[i];
        }
        printf("\n");
	}
	printf("\n");
}

2-15-5 删除节点

删除节点要注意排除指定删除目标节点不在跳表中的情况，避免一顿操作猛如虎，然后报空指针异常。
在这里插入图片描述

void del(SkipList *list, keyType searchKey){
	Node *update[MAX_LEVEL];
	Node *x = list->header;
	Node *target;
	int i;
	for(i = list->level - 1; i >= 0; i--){
		while((target = x->forward[i]) && target->key < searchKey)
			x = target;
		update[i] = x;
	}
	  
	if(target && target->key == searchKey){        // 仅在找到key对象时才进行删除操作 
		for (i = list->level - 1; i >= 0; i--) {   // 从上往下挨个删 
        	if (target == update[i]->forward[i]) {
            	update[i]->forward[i] = target->forward[i];
            if (list->header->forward[i] == NULL)  //减层 
                list->level--;
        	}
   	 	}
    	free(target);
    	target = NULL;	
	}
}

2-15-6 清空

void freeList(SkipList *list){
    int i;
    Node *x = list->header;
    for(i = list->level; i >= 0; i--){
        while(x != NULL){
            Node *next = x->forward[i];
            free(x);
            x = next;
        }
    }
    free(list);
}

2-15-7 完整代码

头文件skipList.h

#ifndef SKIP_LIST_H
#define SKIP_LIST_H

#define OK (0)             // 实现预期操作 
#define DONE (1)           // 仅完成操作，没有达到预期 
#define MAX_LEVEL (10)     // 最大层数
#define PROMOTE_RATE (0.5) // 节点晋升概率 
//注意：生成新节点时，不仅仅要有Node结构本身空间，还需要多申请n个Node型指针的空间 
#define NEW_NODE(n) ((Node*)malloc(sizeof(Node) + n * sizeof(Node*)))
#define MIN(A,B) ({ typeof(A) __a = (A); typeof(B) __b = (B); __a < __b ? __a : __b; })
#define ROLL() (rand() * (1.0 / RAND_MAX)) //获得[0,1)随机数 
#define ARRAY_SIZE( ARRAY ) (sizeof (ARRAY) / sizeof (ARRAY[0]))

typedef int keyType;
typedef int valueType;

typedef struct Node {
	keyType key;	 // 关键字字段 
	valueType value; // 内容字段 
	struct Node * forward[1];  // 从这个节点扩展出去的所有转移链 
} Node;						   // 原始论文中用了forward，不是next 

typedef struct SkipList {
	int level;
	Node * header;
} SkipList;

SkipList * createList();
Node * makeNode(int level, keyType key, valueType value);
int insert(SkipList *list, keyType searchKey, valueType newValue);
void del(SkipList *list, keyType searchKey);
valueType *search(SkipList *list, keyType searchKey);
void printAll(SkipList *list);
void freeList(SkipList *list);
#endif

具体实现skipList.c

#include <stdio.h>
#include <stdlib.h>
#include "skipList.h"

SkipList * createList(){
	SkipList *list = (SkipList*)malloc(sizeof(SkipList));
	if (list == NULL) 
		return NULL;
		
	list->level = 0;
	list->header = makeNode(MAX_LEVEL - 1, 0, 0);
	if (list->header == NULL) {
		free(list);
		return NULL;
	}
	
	int level;
	for(level = 0; level < MAX_LEVEL - 1; level++) //初始化head的转移表，均没有指向 
		list->header->forward[level] = NULL;
		
	return list;
}

Node * makeNode(int level, keyType key, valueType value){
	Node *x = NEW_NODE(level);
	if(x == NULL)
		return NULL;
	
	x->key = key;
	x->value = value;
	return x;
}

int randomLevel(){
	int newLevel = 1;
	while(ROLL() < PROMOTE_RATE && newLevel < MAX_LEVEL) //做了一点变化，限制层数 
		newLevel++;
	return newLevel;
}

int insert(SkipList *list, keyType searchKey, valueType newValue){
	Node *update[MAX_LEVEL]; //实质上缓存了查询路径 
	Node *x = list->header;
	Node *target;  //缓存当前x->forward[i]指向 
	int i; 
	//step1 通过key找到目标节点 
	for(i = list->level - 1; i >= 0; i--){
		while((target = x->forward[i]) && target->key < searchKey)
			x = target;
		update[i] = x;
	}
	
	//step2 key已经存在，替换节点内容 
	if(target && target->key == searchKey){
		target->value = newValue;
		return DONE;
	}
	
	//step3-1 随机晋升 
	int newLevel = randomLevel();
	if(newLevel > list->level){
		for(i = list->level; i < newLevel; i++)
			update[i] = list->header;
		list->level = newLevel;
	}
	
	//step3-2 尝试插入新节点 
	x = makeNode(newLevel, searchKey, newValue);
	if(x == NULL)
		return DONE;
		
	for(i = newLevel - 1; i >= 0; i--){
		x->forward[i] = update[i]->forward[i];
		update[i]->forward[i] = x;
	}
	return OK;
}

void del(SkipList *list, keyType searchKey){
	Node *update[MAX_LEVEL];
	Node *x = list->header;
	Node *target;
	int i;
	for(i = list->level - 1; i >= 0; i--){
		while((target = x->forward[i]) && target->key < searchKey)
			x = target;
		update[i] = x;
	}
	  
	if(target && target->key == searchKey){        // 仅在找到key对象时才进行删除操作 
		for (i = list->level - 1; i >= 0; i--) {   // 从上往下挨个删 
        	if (target == update[i]->forward[i]) {
            	update[i]->forward[i] = target->forward[i];
            if (list->header->forward[i] == NULL)  //减层 
                list->level--;
        	}
   	 	}
    	free(target);
    	target = NULL;	
	}
}

valueType *search(SkipList *list, keyType searchKey){
	Node *x = list->header;
	Node *target; 
	int i;
	for(i = list->level - 1; i >= 0; i--){
		while((target = x->forward[i]) && target->key < searchKey)
			x = target;
		if(target && target->key == searchKey)
			return &(target->value);
	}
	return NULL;
}


void printAll(SkipList *list){
	Node *x;
	int i;
	printf("\nshow skip list : \n");
	for(i = list->level - 1; i >= 0; i--){ 
		x = list->header->forward[i];
		printf("level %d:\t", i + 1);
        while (x != NULL) {
            printf("(%d,%d)\t", x->key, x->value);
            x = x->forward[i];
        }
        printf("\n");
	}
	printf("\n");
}

void freeList(SkipList *list){
    int i;
    Node *x = list->header;
    for(i = list->level; i >= 0; i--){
        while(x != NULL){
            Node *next = x->forward[i];
            free(x);
            x = next;
        }
    }
    free(list);
}

测试调用main.c

#include <stdio.h>
#include <stdlib.h>
#include "skipList.h"

int main(int argc, char *argv[]) {
	printf("\n==== test init =====\n");
	SkipList *list = createList();
	int i;
	for(i = 0; i < 10; i++)
		insert(list, i, -i);	
	for(i = 11; i < 20; i++)
		insert(list, i, -i);	
	printAll(list);
	 
	printf("\n==== test update =====\n");
	insert(list, 0, 99);
	insert(list, 19, 50);
	insert(list, 11, 33);
	printAll(list);
	
	printf("\n==== test insert =====\n");
	insert(list, 10, -10);
	printAll(list);
	
	int searchKeys[] = {10, 19, 0, 100};
	int length = ARRAY_SIZE(searchKeys);
	valueType *ret;
	printf("\n==== test search =====\n");
	for(i = 0; i < length; i++){
		ret = search(list, searchKeys[i]);
		if(ret)
			printf("\n searchKey=%d, value=%d\n", searchKeys[i], *(ret));
		else
			printf("\n searchKey=%d, not found!\n", searchKeys[i]);
	}
	
	printf("\n==== test del =====\n");
	for(i = 0; i < length; i++)
		del(list, searchKeys[i]);
	printAll(list);
	
	freeList(list);
	
	return 0;
}

2-16 复杂度分析

复杂度分析（时间&&空间）建议还是从原始论文阅读研究（先看参考资料[11]/[12]/[13]），再看看网上其他简单的分析方法。

首先一个节点是否晋升，取决于随机概率生成的结果，设升级概率为p，则反面事件（不升级）概率为 1-p；一个节点产生k层的条件是：前k-1层升级成功，但第k层升级失败。
（1）生成1层的概率： ${p_1}=1-p$
（2）生成2层的概率： ${p_2}={p}\left ( {1-p} \right )$
（3）生成3层的概率： ${p_3}={p}^{2}\left ( {1-p} \right )$
……
（k）生成k层的概率： ${p_k}={p}^{k-1}\left ( {1-p} \right )$
……
可以计算出一个节点平均层数（指针数），即数学期望：
$\begin{aligned} E\left (node \right )&=1\times {p_1} + 2\times {p_2}+3\times {p_3} +...+k\times {p_k}+... \\ &=1\times \left ( 1-p \right ) + 2\times \ p\left (1-p \right )+3\times \ p^{2}\left ( 1-p \right ) + ...+k\times \ p^{k-1}\left ( 1-p \right )+... \\ &=\left ( 1-p \right )\sum_{k=1}^{\infty }kp^{k-1} \end{aligned}$
传统艺能（类等比数列求和），令 ${S_n}=\sum_{k=1}^{n }kp^{k-1}$
（1） ${S_n}=1 + 2p+3p^{2}+ ...+np^{n-1}$
（2） $p{S_n}=p + 2p^2+3p^{3}+ ...+(n-1)p^{n-1} + np^{n}$
（3）以上两式相减： ${S_n}={\frac{1-p^n}{(1-p)^2}}-{\frac{np^n}{1-p}},(0<p<1)$ ，易有 $\lim_{n \to \infty}{S_n}=\frac{1}{(1-p)^2}$
（4）带入极限有
$E\left ( node \right )=(1-p)\times{\frac{1}{(1-p)^2}}={\frac{1}{1-p}}$
以上推导过程和结论接下来会用到。

2-16-1 空间复杂度分析

假设原始链表中有n个节点，那转化为跳表后额外开销（overhead）为：
$\times E\left (node \right )=\frac{n}{1-p}$
（1）若p=1/2，则 $\frac{n}{1-p}=2n$
（2）若p=1/4，则 $\frac{n}{1-p}=\frac{4}{3}n$
……
随着概率p减小，空间消耗接近 $O (n)$ ，直观地看就是节点层数减少或者每一层的节点数量减少；实际上我们也不会让p值大于0.5（无用的点太多），若那样做显然会降低查找数效率。

实际上我们这样分析也是有缺陷的，这样讨论忽略了p的概率分布（论文中作者讨论了二项分布和负二项分布进行讨论），且对max level的限制条件也未考虑，详情需要再啃论文；但目前我还未理解透彻。

2-16-2 时间复杂度分析

时间复杂度的讨论的核心是计算search操作的数学期望。以下转载参考资料[12]的解释：
在这里插入图片描述

借用论文中的示意图，这两种情况分别对应情况c和情况b，套用论文思想，用 $C (k)$ 表示爬山的过程时间消耗（其实就是查询长度），则 $C (0) = 0$ ，对 $C (k)$ 易有：
$\begin{aligned} C\left (k \right )&=(1-p)(情况b)+p(情况c)\\ &=(1-p)(1+C(k)) +p(1+C(k-1)) \\ \end{aligned}$
整理一下
$C\left (k \right )=\frac{1}{p}+C(k-1)$
传统艺能（等差数列求和），
$C(k)=\frac{k}{p}$
于是问题转化为求出在n个level 1层节点的情况下，整个skiplist层数的平均值（也就是数学期望），仿照2-16-1和2-16-2的分析，逐项推导：
（1）level 1 节点数 $N (1) = n$ ，这是固定值
（2）level 2 节点数 $N (2) = n p$ ，从这里开始以下都是期望值
……
（k）level k 节点数 $N(k)=np^{k-1}$
……
设想最极端的情况（实际算法执行结果也是如此，可以自己run以下看看），最高层（max_level）仅有1个节点，则
$N(max\_level)=np^{max\_level-1}=1$
两边同时取1/p的对数，解得
$max\_level=log_{1/p}n+1$
这不就是论文中前半部分讨论的 $L (n)$ 咩？自然带入 $C (x)$ ，得到时间消耗预期
$C(max\_level)=\frac{1}{p}(log_{1/p}n+1)$
取渐进
${\color{red}O(C((max\_level))=O(logn)}$
终于求出来了……更加精细的分析请回看论文和参考资料。

2-16-3 简单分析

可以假设每两个节点取一个晋升，那么

空间复杂度
（1）level 1 节点数量 $x_{1}=n$
（2）level 2 节点数量 $x_{2}=\frac{n}{2}$
（3）level 2 节点数量 $x_{3}=\frac{n}{2^2}$
……
（k）level k 节点数量 $x_{k}=\frac{n}{2^{k-1}}$
……
节点总数就是等比数列求和
${S_n}=\sum_{i=0}^{n}x_{i}=\sum_{i=0}^{n}\frac{n}{2^{i-1}}<2n$ ，易有空间复杂度 $O (n)$ ，同之前论文视角的精准分析一致（渐进的思想，抓大放小！）
时间复杂度
同样考虑极端情况，最上面一层（max_level）仅有一个节点，同2-16-2：
$\frac{n}{2^{max\_level}}=1$ ，易有 $max\_level=logn$ ，（忽略底数2），自然有时间复杂度 $O (l o g n)$

事实上假设3个节点取一个晋升，4个节点取一个晋升（通过调节p），都可以套用上面的分析过程，得到相同的结果 $O (l o g n)$ ，这不正好是为什么log不带底数的原因了吗？如果不太能理解的朋友，可以翻看下对数log的数学意义：（1）是将乘除法变为加减法，不仅在逻辑上转化了问题，在物理电路设计上也能很快实现；（2）是把线性坐标难以表达的大数值变化趋势，缩小并方便观察。此致，复杂度的讨论就基本上完备了。

2-16-4 对比平衡树、哈希表

这部分讨论参看资料，就不展开了，参考知乎作者（方小白）的图（侵删）
在这里插入图片描述

2-17 跳表的另一种实现

本小节参考资料[4]的描述并实现（将java转化为c），如果之前实现的跳表是一个“假装”有很多个节点的楼层的话（各种指向，每一层相当于虚拟的节点），那么接下介绍的跳表就是“真”节点结构了，因为它确实每一层都有节点……参考下图
在这里插入图片描述

注意：
（1）我们使用了双链表去实现，每个节点有上下左右四个指向，很像“稀疏”的一张网，哈哈是不是有稀疏矩阵的味道。
（2）漫画算法的作者自己也在知乎的回复中提到：用单链表也能实现，遍历方向仅需要向右和向下；此处使用双链表就是为了有序帖子做铺垫，在2-18小结中有说明。
（3）此处没有使用keyType用于标记顺序，所以采用了1个虚拟头结点（ $-\infty$ ）和1个虚拟尾结点（ $+\infty$ ），方便插入操作。

skipList.h

#ifndef SKIP_LIST_H
#define SKIP_LIST_H

#define MAX_LEVEL (10)
#define PROMOTE_RATE (0.5)
#define ROLL() (rand() * (1.0 / RAND_MAX))

typedef int ElementType;

typedef struct node{
	ElementType data;
	struct node *up;
	struct node *down;
	struct node *left;
	struct node *right;
} node;

typedef struct skipList{
	int maxLevel;
	node *head;
	node *tail;
} skipList;

skipList *createSkipList();
node *createNode(ElementType data);
node *search(const skipList *list, ElementType data);
int insert(skipList *list, ElementType data);
int removeData(skipList *list, ElementType data);
void printAll(const skipList *list);
#endif

skipList.c

#include <stdio.h>
#include <stdlib.h>
#include "skipList.h"

skipList *createSkipList(){
	skipList *list = (skipList *)malloc(sizeof(list));
	if(!list)
		return NULL;
	
	list->maxLevel = 0;
	list->head = createNode(INT_MIN);
	list->tail = createNode(INT_MAX);
	if(!list->head || !list->tail){
		free(list);
		return NULL;
	}
	
	list->head->right = list->tail;
	list->tail->left = list->head;
	
	return list;
}

node *createNode(ElementType data){
	node *newNode = (node *)malloc(sizeof(node) + 4*sizeof(node*));
    if (!newNode){
		printf("\n create Node[%d] fail!", data);
		return NULL;
	}

	newNode->data = data;
	newNode->left = NULL;
	newNode->right = NULL;
	newNode->up = NULL;
	newNode->down = NULL;
    return newNode;
}

node *findPreNode(const skipList *list, ElementType data){
	node *cur = list->head;
	while(1){    //用<=data可以避免重复值节点问题，少些bug，欢迎大家自己改进
		while(cur->right->data != INT_MAX && cur->right->data <= data)
			cur = cur->right;
		if(!cur->down)
			break;
		cur = cur->down;
	}
	return cur;
}

void appendNode(node *preNode, node *newNode){
	newNode->left = preNode;
	newNode->right = preNode->right;
	preNode->right->left = newNode;
	preNode->right = newNode;
}

void addLevel(skipList *list){
	list->maxLevel++;
	node *front = createNode(INT_MIN);
	node *back = createNode(INT_MAX);
	front->right = back;
	back->left = front;
	front->down = list->head;
	list->head->up = front;
	back->down = list->tail;
	list->tail->up = back;
	list->head = front;
	list->tail = back;
}

void removeLevel(skipList *list, node *leftNode){
	node *rightNode = leftNode->right;
	if(!leftNode->up){  //注意：如果删除目标节点是顶层的节点，那么不能简单的
		leftNode->down->up = NULL; //maxlevel--，空层也是存在元素的嘛
		rightNode->down->up = NULL;//只需要在打印时做点trick就好了
	}else{  //正儿八经的删除层
		leftNode->up->down = leftNode->down;
        leftNode->down->up = leftNode->up;
        rightNode->up->down = rightNode->down;
        rightNode->down->up = rightNode->up;
        list->maxLevel--;
	}
}

int insert(skipList *list, ElementType data){
	node *preNode = findPreNode(list, data);
	if(preNode->data == data)
		return 0;
		
	node *newNode = createNode(data);
	appendNode(preNode, newNode);
	int curLevel = 0;
	
	while(ROLL() < PROMOTE_RATE && list->maxLevel < MAX_LEVEL){
		if(curLevel == list->maxLevel)
			addLevel(list);
		
		while(!preNode->up)
			preNode = preNode->left;
		preNode = preNode->up;
		
		node *upperNode = createNode(data);
		appendNode(preNode, upperNode);
		upperNode->down = newNode;
		newNode->up = upperNode;
		newNode = upperNode;
		curLevel++;
	}
	return 1;
}

int removeData(skipList *list, ElementType data){
	node *rmNode = search(list, data);
	if(!rmNode)
		return 0;
		
	node *tmp = rmNode;
	int curLevel = 0;
	while(rmNode){
		rmNode->right->left = rmNode->left;
		rmNode->left->right = rmNode->right;
		if(curLevel != 0 && rmNode->left->data == INT_MIN && rmNode->right->data == INT_MAX){
			removeLevel(list, rmNode->left);
		}else{
			curLevel++;
		}
		rmNode = rmNode->up;
	}
	
	free(tmp);
	tmp = NULL;
	return 1;
}

node *search(const skipList *list, ElementType data){
	node *preNode = findPreNode(list, data);
	return preNode->data == data ? preNode : NULL;
}

void printAll(const skipList *list){
	node *cur = list->head;
	int i;
	printf("\n==========\n");
	for(i = list->maxLevel; i >= 0; i--){
		if(cur->right->data != INT_MAX){ //有可能出现空层，即仅有head和tail的
			node *tmp = cur->right;      //情况，为了适应人的习惯做了这个裹脚的
			printf("level[%d]: \t", i);	 //修改， = = !
			while(tmp && tmp->data != INT_MAX){
				printf("%d ",tmp->data);
				tmp = tmp->right;
			}
			printf("\n");
		}
		cur = cur->down;
	}
}

main.c（测试）

#include <stdio.h>
#include <stdlib.h>
#include "skipList.h"

int main(int argc, char *argv[]) {
	printf("初始化跳表:");
	skipList *list = createSkipList();
	insert(list, 50);
	insert(list, 15);
	insert(list, 13);
	insert(list, 20);
	insert(list, 100);
	insert(list, 75);
	insert(list, 99);
	insert(list, 76);
	insert(list, 83);
	insert(list, 65);
	insert(list, 65);
	printAll(list);
	
	printf("\n查询测试:");
	int searchArr[] = {50, 10, 100, -10, 20, 65};
	int i, len = sizeof(searchArr)/sizeof(searchArr[0]);
	for(i=0; i < len; i++)
		printf("\nsearch %d %s", searchArr[i], search(list, searchArr[i]) != NULL ? "ok" : "fail");	


	printf("\n\n删除测试:\n"); 
	int target = 65;
	printf("\n 删除%d:", target);
	removeData(list, target);
	printAll(list);

	target = 10;
	printf("\n 删除%d:", target);
	removeData(list, target);
	printAll(list);
	
	target = 20;
	printf("\n 删除%d:", target);
	removeData(list, target);
	printAll(list);

	return 0;
}

2-18 重要的应用

全文搜索引擎Lucene的索引倒排（Lucene倒排索引简述之索引表和Lucene倒排索引简述之倒排表等系列文章，博主不错哦）
远程字典服务Redis官网
也可直接阅读参考资料（优先[8]）

具体内容就不做展开了，帖子内容膨胀……

2-19 小结

跳表是89年就设计出来的数据结构，但还是需要踏踏实实的学习的，特别是复杂度的推导和结论的理解与记忆。
代码的实现从最单纯的论文描述出发，比简单的复制他人的博客要有意义得多，也更能修炼算法与数据结构内功；且网上很多帖子在细节描述不详细。或者是博主觉得简单不屑于讲，或者是博主自己没有理解到位。咱们这个系列的初衷不就是帮助小白同学扫雷吗？
黑皮书在12.3 确定性跳跃表一节也给出了一种实现，请大家自学讨论。
在黑皮书中，还描述了一种以单链表为基础设计的复杂数据结构：多重表（P42）

其实本质同咱们实现的第二种形式的跳表一致，在严版教材中还专门列了一章单独讨论，大家可以自行学习。

下一贴，我们简单讨论双链表。

参考资料
[1] skiplist原作论文–清晰版，期刊版本
[2] 随机化（二）：跳表 (Skip lists)（解析原文）
[3] 数据结构系列之跳表（复杂度分析 + 对比平很树及哈希表）
[4] 漫画：什么是 “跳表” ？（另类的实现）
[5] 深入理解跳跃链表[一] （这位博主不错哦）
[6] Redis-跳表
[7] Redis底层详解（六）跳跃表
[8] 跳跃表详解（对比学习应用：Lucene倒排列表中的应用与 Redis）
[9] 跳表Java实现（对JAVA中skipList的理解和JDK实现有提到，对复杂度有一定的讨论）
[10] skiplist 跳跃表详解及其编程实现（讨论复杂度）
[11] Redis跳表的时间复杂度和空间复杂度的分析（对原始论文复杂度分析的翻译，简单易懂）
[12] Redis为什么用跳表而不用平衡树？（注意这篇的参考文献中微信公众号文章，是一个redis讲解的系列课程）

卷毛迷你猪

关注

1
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
小肥柴慢慢手写数据结构（C篇）（2-5 SkipList 跳表）

小肥柴慢慢手写数据结构（C篇）（2-5 跳表）目录2-14 跳表（skip list）的概念2-15 参考论文的实现2-15-1 初始化2-15-2 插入节点2-15-3 查询（这个才是我们想要的核心功能）2-15-4 打印2-15-5 删除节点2-15-6 清空2-15-7 完整代码2-16 复杂度分析2-16-1 时间复杂度2-16-2 空间复杂度2-17 跳表的另一种实现2-18 重要的应用----Redis2-18 小结目录我在网上传播的答案中，找了一个相对简单的实现，方便手写。2-14 跳表
复制链接

扫一扫