redis原理-数据结构

最新推荐文章于 2022-04-25 19:25:41 发布

美了美了

最新推荐文章于 2022-04-25 19:25:41 发布

阅读量499

点赞数

分类专栏： redis 文章标签： redis

ccc

本文链接：https://blog.csdn.net/xyblog/article/details/50678275

版权

redis 专栏收录该内容

15 篇文章 1 订阅

订阅专栏

redis原理-数据结构

一、内存分配

redis内存分配函数是在文件zmalloc.h和zmalloc.c里面进行声明和定义的，主要的函数如下：

void*zmalloc(size_t size);//分配内存

void*zrealloc(void *ptr, size_t size); //重分配内存

voidzfree(void *ptr);//释放内存

redis使用了zmalloc zrealloc zfree来封装了内存管理的函数，这里针对不同的平台来封装，从而屏蔽了底层的差异性实现跨平台，定义如下：

#ifdefined(USE_TCMALLOC)//如果系统存在tcmalloc

#defineZMALLOC_LIB ("tcmalloc-" __xstr(TC_VERSION_MAJOR) "."__xstr(TC_VERSION_MINOR))

#include<google/tcmalloc.h>

#if(TC_VERSION_MAJOR == 1 && TC_VERSION_MINOR >= 6) ||(TC_VERSION_MAJOR > 1)

#defineHAVE_MALLOC_SIZE 1

#definezmalloc_size(p) tc_malloc_size(p)

#else

#error"Newer version of tcmalloc required"

#endif

#elifdefined(USE_JEMALLOC) //如果系统存在jemalloc

#defineZMALLOC_LIB ("jemalloc-" __xstr(JEMALLOC_VERSION_MAJOR) "."__xstr(JEMALLOC_VERSION_MINOR) "." __xstr(JEMALLOC_VERSION_BUGFIX))

#include<jemalloc/jemalloc.h>

#if(JEMALLOC_VERSION_MAJOR == 2 && JEMALLOC_VERSION_MINOR >= 1) ||(JEMALLOC_VERSION_MAJOR > 2)

#defineHAVE_MALLOC_SIZE 1

#definezmalloc_size(p) je_malloc_usable_size(p)

#else

#error"Newer version of jemalloc required"

#endif

#elifdefined(__APPLE__) //如果是苹果平台

#include<malloc/malloc.h>

#defineHAVE_MALLOC_SIZE 1

#definezmalloc_size(p) malloc_size(p)

#endif

#ifndefZMALLOC_LIB

#defineZMALLOC_LIB "libc"

#endif

定义平台之间的差异，主要是tcmalloc（google）、jemalloc（facebook）、苹果平台。

上边说过，封装就是为了屏蔽底层平台的差异，同时方便自己实现相关的统计函数。具体来说就是：

若系统中存在Google的TC_MALLOC库，则使用tc_malloc一族函数代替原本的malloc一族函数。

若当前系统是Mac系统，则使用<malloc/malloc.h>中的内存分配函数。

其他情况，在每一段分配好的空间前头，同时多分配一个定长的字段，用来记录分配的空间大小。

#ifdefHAVE_MALLOC_SIZE

#definePREFIX_SIZE (0)

#else

#ifdefined(__sun) || defined(__sparc) || defined(__sparc__)

#definePREFIX_SIZE (sizeof(long long))

#else

#definePREFIX_SIZE (sizeof(size_t))

#endif

如果是sun下就使用 sizeof(long long)，如果是linux就使用sizeof(size_t)了。

二、简单字符串

适用场景：

redis默认字符串都是使用sds。

优点:

获取字符串长度：复杂度O(1)，直接使用sdshdr->len就能获取到长度。而常规的是o(N)。

二进制安全存储：使用长度来规定了数据的存放长度，而c语言字符数组遇到\0就认为是结尾了，使得redis不仅仅可以存放字符串还可以存放任意二进制数据。

杜绝缓冲区溢出：sds存放的数据长度都是指定的，不存在溢出。

修改字符串减少内存重分配次数：在sdscat调用的sdsMakeRoomFor实现中当拷贝一个比当前存储区大的字符串的时候，如果小于1m会直接分配2倍当前字符串的大小空间，如果大于1m会在当前字符串长度的基础上面多分配1m的空间。

惰性释放：在sdstrim（sdsclear）中，会把删除的字符空间长度累加sdshdr->free里。

原理解释：

简单动态字符串（simpledynamic string，SDS），定义在sds.h头文件里面，包含了sds的定义以及sds相关的基础操作函数。

sds定义：

struct sdshdr {

int len;

int free;

char buf[];

};

这里面最后一个buf[]使用0其实是一种叫做柔性数组的技巧：

redis使用sds和常规的字符串好处有：

sds定义的API有：

sdsnew ：创建一个给定c字符串的sds

sds sdsnew(const char *init) {

size_t initlen = (init == NULL) ? 0 : strlen(init); //取字符串长度

return sdsnewlen(init, initlen);

}

sds sdsnewlen(const void *init, size_tinitlen) {

struct sdshdr *sh;

if (init) {

sh = zmalloc(sizeof(struct sdshdr)+initlen+1); //内容头部+数据

}else {

sh = zcalloc(sizeof(struct sdshdr)+initlen+1);

}

if (sh == NULL) return NULL;

sh->len = initlen;

sh->free = 0;

if (initlen && init)

memcpy(sh->buf, init, initlen);

sh->buf[initlen] = '\0';

return (char*)sh->buf;

}

sdslen ：返回sds已经使用的空间字节数

这种通过sds可以直接获取到sds的头部

static inline size_t sdslen(const sds s) {

struct sdshdr *sh = (void*)(s-(sizeof(struct sdshdr)));

return sh->len;

}

sdscat:追加一个字符串到sds尾部

sds sdsMakeRoomFor(sds s, size_t addlen) {

struct sdshdr *sh, *newsh;

size_t free = sdsavail(s);

size_t len, newlen;

if (free >= addlen) return s;

len = sdslen(s);

sh = (void*) (s-(sizeof(struct sdshdr)));

newlen = (len+addlen);

if (newlen < SDS_MAX_PREALLOC) // SDS_MAX_PREALLOC=1 M大小，如果小于1M咱就分配字符串长度的2倍

newlen *= 2;

else

newlen += SDS_MAX_PREALLOC; //如果大于1M，咱就在当前字符串的长度再分配1m的空间

newsh = zrealloc(sh, sizeof(struct sdshdr)+newlen+1);

if (newsh == NULL) return NULL;

newsh->free = newlen - len;

return newsh->buf;

}

sds sdscatlen(sds s, const void *t, size_tlen) {

struct sdshdr *sh;

size_t curlen = sdslen(s);

s= sdsMakeRoomFor(s,len); //减少重分配

if (s == NULL) return NULL;

sh = (void*) (s-(sizeof(struct sdshdr)));

memcpy(s+curlen, t, len);

sh->len = curlen+len;

sh->free = sh->free-len; //惰性释放

s[curlen+len] = '\0';

return s;

}

sds sdscat(sds s, const char *t) {

return sdscatlen(s, t, strlen(t));

}

sdstrim：清除sds首尾的字符（可指定多个）

sds sdstrim(sds s, const char *cset) {

struct sdshdr *sh = (void*) (s-(sizeof(struct sdshdr)));

char *start, *end, *sp, *ep;

size_t len;

sp = start = s;

ep = end = s+sdslen(s)-1;

while(sp <= end && strchr(cset, *sp)) sp++;

while(ep > start && strchr(cset, *ep)) ep--;

len = (sp > ep) ? 0 : ((ep-sp)+1);

if (sh->buf != sp) memmove(sh->buf, sp, len);

sh->buf[len] = '\0';

sh->free = sh->free+(sh->len-len); //惰性释放

sh->len = len;

return s;

}

三、双端链表

适用场景：

链表在redis应用比较广泛，列表键底层实现之一就是链表；客户端信息；

采用双端链表可以定位到头部或者尾部，每个节点的数据类型为任意数据类型，通过定义复制、释放、比较的函数指针实现了可以操作任意用户自定义数据。

原理解释：

链表节点：

typedef struct listNode {

struct listNode *prev; //前驱

struct listNode *next;//后继

void *value; //值指针

} listNode;

链表定义：

typedef struct list {

listNode *head;

listNode *tail;

// 节点值复制函数

void *(*dup)(void *ptr);

// 节点值释放函数

void (*free)(void *ptr);

// 节点值对比函数

int (*match)(void *ptr, void *key);

// 链表所包含的节点数量

unsigned long len;

} list;

/* Prototypes */

list *listCreate(void);

void listRelease(list *list);

list *listAddNodeHead(list *list, void*value);

list *listAddNodeHead(list *list, void*value)

{

listNode *node;

if ((node = zmalloc(sizeof(*node))) == NULL)

return NULL;

node->value = value;

if (list->len == 0) { //如果链表为空

list->head = list->tail = node;

node->prev = node->next = NULL;

}else {

node->prev = NULL;

node->next = list->head;

list->head->prev = node;

list->head = node;

}

list->len++;

return list;

}

四、字典

适用场景：

在redis中字典用的应用很广泛的，因为redis是一个键值（k-v）内存数据库，所以存储的都是以字典作为基准的，字典也是作为hash键的底层之一：当保存的元素都是比较差的字符串或者hash键值比较多的时候就会使用字典来存储。

原理解释：

由于字典是由hash表存储的，hash表的定义如下：

typedefstruct dictht {

dictEntry **table; // 哈希表数组

unsigned long size; // 哈希表大小

unsigned long sizemask; // 哈希表大小掩码，用于计算索引值，等于 size -1

unsigned long used; // 已经使用的

}dictht;

hash表节点

typedefstruct dictEntry {

void *key;

union { //是一个联合体用于存放不同的数据类型

void *val;

uint64_t u64;

int64_t s64;

} v;

struct dictEntry *next; //主要是用于解决键冲突问题

}dictEntry;

字典类型：

typedefstruct dictType {

// hash函数

unsigned int (*hashFunction)(const void*key);

// 复制键

void *(*keyDup)(void *privdata, const void*key);

// 复制值

void *(*valDup)(void *privdata, const void*obj);

// 对比键

int (*keyCompare)(void *privdata, constvoid *key1, const void *key2);

// 销毁键

void (*keyDestructor)(void *privdata, void*key);

// 销毁值

void (*valDestructor)(void *privdata, void*obj);

}dictType;

字典的定义：

typedefstruct dict {

dictType *type; //可以实现任意用户类型，只需要设定了特定的类型操作函数

void *privdata;

dictht ht[2];

int rehashidx; /* rehashing not in progressif rehashidx == -1 */

int iterators; /* number of iteratorscurrently running */

}dict;

//其中rehash是对hash表进行增大或者减小，当hash表的负载因子(负载因子=dictht.used/dictht.size)

创建一个字典

dict*dictCreate(dictType *type, void *privDataPtr);

dict*dictCreate(dictType *type,

void *privDataPtr)

{

dict *d = zmalloc(sizeof(*d)); //字典分配内存

_dictInit(d,type,privDataPtr);

return d;

}

int_dictInit(dict *d, dictType *type,

void *privDataPtr)

{

_dictReset(&d->ht[0]);

_dictReset(&d->ht[1]);

d->type = type;

d->privdata = privDataPtr; // 设置私有数据

d->rehashidx = -1; //默认是-1

d->iterators = 0;

return DICT_OK;

}

staticvoid _dictReset(dictht *ht)

{

ht->table = NULL; //默认还不分配

ht->size = 0;

ht->sizemask = 0;

ht->used = 0;

}

//创建hash表/rehash

intdictExpand(dict *d, unsigned long size)

{

dictht n; /* the new hash table */

// 根据 size 参数，计算哈希表的大小

unsigned long realsize =_dictNextPower(size);

/* the size is invalid if it is smallerthan the number of

* elements already inside the hash table*/

if (dictIsRehashing(d) || d->ht[0].used> size)

return DICT_ERR;

/* Allocate the new hash table andinitialize all pointers to NULL */

n.size = realsize;

n.sizemask = realsize-1;

// T = O(N)

n.table =zcalloc(realsize*sizeof(dictEntry*));

n.used = 0;

/* Is this the first initialization? If soit's not really a rehashing

* we just set the first hash table so thatit can accept keys. */

if (d->ht[0].table == NULL) {

d->ht[0] = n;

return DICT_OK;

}

/* Prepare a second hash table forincremental rehashing */

d->ht[1] = n;

//扩展完毕后需要设置rehash 为0，下一次就可以进行单步rehash了

d->rehashidx = 0;

return DICT_OK;

}

staticunsigned long _dictNextPower(unsigned long size)

{

unsigned long i = DICT_HT_INITIAL_SIZE; //默认是4

if (size >= LONG_MAX) return LONG_MAX;

while(1) {

if (i >= size)//如果小于4，咱就分配4个

return i;

i *= 2;//直到i大于等于size

}

//在字典中添加一个键值对

intdictAdd(dict *d, void *key, void *val)

{

// 尝试添加键到字典，并返回包含了这个键的新哈希节点

// T = O(N)

dictEntry *entry = dictAddRaw(d,key);

// 键已存在，添加失败

if (!entry) return DICT_ERR;

// 键不存在，设置节点的值

// T = O(1)

dictSetVal(d, entry, val);

// 添加成功

return DICT_OK;

}

dictEntry*dictAddRaw(dict *d, void *key) //获取一个包含键的hash表节点

{

int index;

dictEntry *entry;

dictht *ht;

//如果条件允许，进行单步rehash

if (dictIsRehashing(d)) _dictRehashStep(d);

/* Get the index of the new element, or -1if

* the element already exists. */

if ((index =_dictKeyIndex(d, key)) == -1)

return NULL;

// T = O(1)

/* Allocate the memory and store the newentry */

ht = dictIsRehashing(d) ? &d->ht[1]: &d->ht[0]; // 如果字典正在 rehash ，那么将新键添加到 1 号哈希表

entry = zmalloc(sizeof(*entry));

entry->next = ht->table[index]; //默认都是在头部插入的

ht->table[index] = entry;

ht->used++;

/* Set the hash entry fields. */

dictSetKey(d, entry, key);

return entry;

}

//步长为1的rehash

staticvoid _dictRehashStep(dict *d) {

if (d->iterators == 0) dictRehash(d,1);

}

//rehash

intdictRehash(dict *d, int n) {

// 只可以在 rehash 进行中时执行

if (!dictIsRehashing(d)) return 0;

while(n--) {

dictEntry *de, *nextde;

/* Check if we already rehashed thewhole table... */

if (d->ht[0].used == 0) {

zfree(d->ht[0].table);//如果rehash完毕

d->ht[0] = d->ht[1];//设置默认hashtable为第一个hashtable

_dictReset(&d->ht[1]);

d->rehashidx = -1;

return 0;

}

/* Note that rehashidx can't overflowas we are sure there are more

* elements because ht[0].used != 0 */

assert(d->ht[0].size >(unsigned)d->rehashidx);

//找到第一个非空hashtable元素

while(d->ht[0].table[d->rehashidx] == NULL) d->rehashidx++;

de =d->ht[0].table[d->rehashidx];

/* Move all the keys in this bucketfrom the old to the new hash HT */

while(de) {

unsigned int h;

// 保存下个节点的指针

nextde = de->next;

/* Get the index in the new hashtable */

//重新计算hash值

h = dictHashKey(d, de->key)& d->ht[1].sizemask;

de->next = d->ht[1].table[h];

d->ht[1].table[h] = de;

d->ht[0].used--;

d->ht[1].used++;

de = nextde;

}

d->ht[0].table[d->rehashidx] =NULL;

d->rehashidx++;

}

return 1;

}

//在字典中查找键为key的节点返回索引

staticint _dictKeyIndex(dict *d, const void *key)

{

unsigned int h, idx, table;

dictEntry *he;

//这里进行扩展

/* Expand the hash tableif needed */

if (_dictExpandIfNeeded(d)== DICT_ERR)

return -1;

/* Compute the key hash value */

h = dictHashKey(d, key);

// T = O(1)

for (table = 0; table <= 1; table++) {

idx = h & d->ht[table].sizemask;

/* Search if this slot does not alreadycontain the given key */

// T = O(1)

he = d->ht[table].table[idx];

while(he) {

if (dictCompareKeys(d, key,he->key))

return -1;

he = he->next;

}

// 如果运行到这里时，说明0 号哈希表中所有节点都不包含 key

// 如果这时 rehahs 正在进行，那么继续对 1 号哈希表进行 rehash

if (!dictIsRehashing(d)) break;

}

// 返回索引值

return idx;

}

//计算hash值

#definedictHashKey(d, key) (d)->type->hashFunction(key)

//设置键

#definedictSetKey(d, entry, _key_) do { \

if ((d)->type->keyDup) \

entry->key = (d)->type->keyDup((d)->privdata,_key_); \

else \

entry->key = (_key_); \

}while(0)

#definedictSetVal(d, entry, _val_) do { \

if ((d)->type->valDup) \

entry->v.val =(d)->type->valDup((d)->privdata, _val_); \

else \

entry->v.val = (_val_); \

}while(0)

rehash：

staticint _dictExpandIfNeeded(dict *d)

{

/* Incremental rehashing already inprogress. Return. */

if (dictIsRehashing(d)) return DICT_OK;

/* If the hash table is empty expand it tothe initial size. */

//如果第一次进来，咱就分配一个默认大小的hashtable DICT_HT_INITIAL_SIZE=4

if (d->ht[0].size == 0) returndictExpand(d, DICT_HT_INITIAL_SIZE);

/* If we reached the 1:1 ratio, and we areallowed to resize the hash

* table (global setting) or we shouldavoid it but the ratio between

* elements/buckets is over the"safe" threshold, we resize doubling

* the number of buckets. */

// dict_can_resize默认是启用的

// dict_force_resize_ratiorehash因子默认是5

//这里说如果使用的大于等于table的大小并且启用了字典可resize；已经使用的除以字典大小等于5咱就可//以进行rehash了

if (d->ht[0].used >= d->ht[0].size&&

(dict_can_resize ||

d->ht[0].used/d->ht[0].size >dict_force_resize_ratio))

{

return dictExpand(d,d->ht[0].used*2);

}

return DICT_OK;

}

//hash函数使用的是MurmurHash2原版，高运算性能，低碰撞率。

/**

* MurMurHash算法，是非加密HASH算法，性能很高，

* 比传统的CRC32,MD5，SHA-1（这两个算法都是加密HASH算法，复杂度本身就很高，带来的性能上的损害也不可避免）

* 等HASH算法要快很多，而且据说这个算法的碰撞率很低.

* http://murmurhash.googlepages.com/

unsignedint dictGenHashFunction(const void *key, int len) {

/* 'm' and 'r' are mixing constantsgenerated offline.

They're not really 'magic', they justhappen to work well. */

uint32_t seed = dict_hash_function_seed;

const uint32_t m = 0x5bd1e995;

const int r = 24;

/* Initialize the hash to a 'random' value*/

uint32_t h = seed ^ len;

/* Mix 4 bytes at a time into the hash */

const unsigned char *data = (const unsignedchar *)key;

while(len >= 4) {

uint32_t k = *(uint32_t*)data;

k *= m;

k ^= k >> r;

k *= m;

h *= m;

h ^= k;

data += 4;

len -= 4;

}

/* Handle the last few bytes of the inputarray */

switch(len) {

case 3: h ^= data[2] << 16;

case 2: h ^= data[1] << 8;

case 1: h ^= data[0]; h *= m;

};

/* Do a few final mixes of the hash toensure the last few

* bytes are well-incorporated. */

h ^= h >> 13;

h *= m;

h ^= h >> 15;

return (unsigned int)h;

}

五、跳跃表

适用场景：

redis在内部只有2个地方用到了跳跃表，第一个是有序集合键（键较多；或者值元素比较长）；第二个是集群节点中。

原理解释：

跳跃表节点定义：

typedefstruct zskiplistNode {

robj *obj; //redis对象

double score; //分值，跳跃表中分值是按照从小到大排列的

struct zskiplistNode *backward; //后退指针

struct zskiplistLevel {

struct zskiplistNode *forward;//前进指针

unsigned int span; //跨度，记录2个节点之间的距离，越大距离就越远

} level[];//层数组

}zskiplistNode;

跳跃表定义：

typedefstruct zskiplist {

struct zskiplistNode *header, *tail;

unsigned long length; //跳跃表节点数（不包括表头节点）

int level; //记录跳跃表内层数最大的那个节点的层数

} zskiplist;

//创建一个跳跃表

zskiplist*zslCreate(void) {

int j;

zskiplist *zsl;

zsl = zmalloc(sizeof(*zsl)); //分配空间

zsl->level = 1; //默认是1层

zsl->length = 0;//节点数为0

//分配了一个最大层数32层的大小的空间

//#defineZSKIPLIST_MAXLEVEL 32

zsl->header =zslCreateNode(ZSKIPLIST_MAXLEVEL,0,NULL);

for (j = 0; j < ZSKIPLIST_MAXLEVEL; j++){

zsl->header->level[j].forward =NULL;

zsl->header->level[j].span = 0;//跨度为0标示没连接到任何节点

}

zsl->header->backward = NULL;

zsl->tail = NULL;

return zsl;

}

zskiplistNode *zslCreateNode(int level,double score, robj *obj) {

zskiplistNode *zn = zmalloc(sizeof(*zn)+level*sizeof(structzskiplistLevel));

zn->score = score;

zn->obj = obj;

return zn;

}

六、整数集合

适用场景：

redis是集合键的底层实现之一，当集合里面只包含整数，并且数量不多的时候，redis就用整数集合作为集合键的底层实现。

原理解释：

整数集合定义：

typedefstruct intset {

uint32_t encoding; // 编码方式

uint32_t length;

int8_t contents[];//内容数组 ,数据内容是从小到大排列，数组不包含重复项，数据类型取决于encoding

} intset;

//创建一个intset

intset *intsetNew(void) {

intset *is = zmalloc(sizeof(intset));

// #defineINTSET_ENC_INT16 (sizeof(int16_t)) -128~127

//#define INTSET_ENC_INT32(sizeof(int32_t)) -32768~32767

//#define INTSET_ENC_INT64 (sizeof(int64_t)) -2^63 ~ 2^63-1

is->encoding = intrev32ifbe(INTSET_ENC_INT16);

is->length = 0;

return is;

}

//添加一个元素

intset *intsetAdd(intset *is, int64_tvalue, uint8_t *success) {

//获取当前的编码类型

uint8_t valenc = _intsetValueEncoding(value);

uint32_t pos;

if (success) *success = 1;

/* Upgrade encoding if necessary. If we need to upgrade, we know that

* this value should be either appended (if > 0) or prepended (if <0),

*because it lies outside the range of existing values. */

//类型比现在的类型要大，咱就改编码模式

if (valenc > intrev32ifbe(is->encoding)) {

/* This always succeeds, so we don't need to curry *success. */

// T = O(N)

return intsetUpgradeAndAdd(is,value);

}else {

/* Abort if the value is already present in the set.

* This call will populate "pos" with the right position toinsert

* the value when it cannot be found. */

//如果搜索到了咱就返回

if (intsetSearch(is,value,&pos)) {

if (success) *success = 0;

return is;

}

//给需要添加的元素留出空间

is = intsetResize(is,intrev32ifbe(is->length)+1);

//如果pos是小于整体长度，意思是搜索到的位置不在2边，咱需要移动数据

if (pos < intrev32ifbe(is->length)) intsetMoveTail(is,pos,pos+1);

}

//在索引上面设置值

_intsetSet(is,pos,value);

is->length = intrev32ifbe(intrev32ifbe(is->length)+1); //长度加1

return is;

}

//获取数据的编码格式

/* Return the required encoding for theprovided value.

static uint8_t _intsetValueEncoding(int64_tv) {

if (v < INT32_MIN || v > INT32_MAX)

return INTSET_ENC_INT64;

else if (v < INT16_MIN || v > INT16_MAX)

return INTSET_ENC_INT32;

else

return INTSET_ENC_INT16;

}

//更新intset并且添加数据

static intset *intsetUpgradeAndAdd(intset*is, int64_t value) {

uint8_t curenc = intrev32ifbe(is->encoding);

uint8_t newenc = _intsetValueEncoding(value);

int length = intrev32ifbe(is->length);

//如果value是小于0的那么咱就添加到前端，否则就添加到后端

int prepend = value < 0 ? 1 : 0;

/* First set new encoding and resize */

is->encoding = intrev32ifbe(newenc);

// T = O(N)

is = intsetResize(is,intrev32ifbe(is->length)+1);

/* Upgrade back-to-front so we don't overwrite values.

* Note that the "prepend" variableis used to make sure we have an empty

* space at either the beginning or the end of the intset. */

//因为前面说过了数据是从小到大排列的

// T = O(N)

while(length--)

_intsetSet(is,length+prepend,_intsetGetEncoded(is,length,curenc));

/* Set the value at the beginning or the end. */

// 设置新值，根据 prepend 的值来决定是添加到数组头还是数组尾

if (prepend)

_intsetSet(is,0,value);

else

_intsetSet(is,intrev32ifbe(is->length),value);

is->length = intrev32ifbe(intrev32ifbe(is->length)+1);

return is;

}

//扩展intset空间

static intset *intsetResize(intset *is,uint32_t len) {

uint32_t size = len*intrev32ifbe(is->encoding);

is = zrealloc(is,sizeof(intset)+size); //只扩充空间，数据是不变的

return is;

}

/* Return the value at pos, given anencoding. 根据当前编码，返回索引上的值

static int64_t _intsetGetEncoded(intset*is, int pos, uint8_t enc) {

int64_t v64;

int32_t v32;

int16_t v16;

if (enc == INTSET_ENC_INT64) {

memcpy(&v64,((int64_t*)is->contents)+pos,sizeof(v64));

memrev64ifbe(&v64);

return v64;

}else if (enc == INTSET_ENC_INT32) {

memcpy(&v32,((int32_t*)is->contents)+pos,sizeof(v32));

memrev32ifbe(&v32);

return v32;

}else {

memcpy(&v16,((int16_t*)is->contents)+pos,sizeof(v16));

memrev16ifbe(&v16);

return v16;

}

/* Set the value at pos, using theconfigured encoding. 根据当前编码，设置索引上的值

* T= O(1)

static void _intsetSet(intset *is, int pos,int64_t value) {

uint32_t encoding = intrev32ifbe(is->encoding);

if (encoding == INTSET_ENC_INT64) {

((int64_t*)is->contents)[pos] = value;

memrev64ifbe(((int64_t*)is->contents)+pos);

}else if (encoding == INTSET_ENC_INT32) {

((int32_t*)is->contents)[pos] = value;

memrev32ifbe(((int32_t*)is->contents)+pos);

}else {

((int16_t*)is->contents)[pos] = value;

memrev16ifbe(((int16_t*)is->contents)+pos);

}

//在intset中搜索value，如果找到返回索引，否则返回0

/* Search for the position of"value". Return 1 when the value was found and

*sets "pos" to the position of the value within the intset. Return 0when

*the value is not present in the intset and sets "pos" to the position

*where "value" can be inserted. */

static uint8_t intsetSearch(intset *is,int64_t value, uint32_t *pos) {

int min = 0, max = intrev32ifbe(is->length)-1, mid = -1;

int64_t cur = -1;

/* The value can never be found when the set is empty */

//如果intset为空，咱就直接返回0，并且设置索引为0

if (intrev32ifbe(is->length) == 0) {

if (pos) *pos = 0;

return 0;

}else {

/* Check for the case where we know we cannot find the value,

* but do know the insert position. */

//如果值大于最后一个，咱都返回最后一个位置的索引

if (value > _intsetGet(is,intrev32ifbe(is->length)-1)) {

if (pos) *pos = intrev32ifbe(is->length);

return 0;

} else if (value < _intsetGet(is,0)) {//如果值小于第一个，咱返回0

if (pos) *pos = 0;

return 0;

}

//二分法查找

while(max >= min) {

mid = ((unsigned int)min + (unsigned int)max) >> 1;

cur = _intsetGet(is,mid);

if (value > cur) {

min = mid+1;

} else if (value < cur) {

max = mid-1;

} else {

break;

}

if (value == cur) {

if (pos) *pos = mid;

return 1;

}else {

if (pos) *pos = min;

return 0;

}

//移动数据

static void intsetMoveTail(intset *is,uint32_t from, uint32_t to) {

void *src, *dst;

uint32_t bytes = intrev32ifbe(is->length)-from;

uint32_t encoding = intrev32ifbe(is->encoding);

if (encoding == INTSET_ENC_INT64) {

src = (int64_t*)is->contents+from;

dst = (int64_t*)is->contents+to;

bytes *= sizeof(int64_t);

}else if (encoding == INTSET_ENC_INT32) {

src = (int32_t*)is->contents+from;

dst = (int32_t*)is->contents+to;

bytes *= sizeof(int32_t);

}else {

src = (int16_t*)is->contents+from;

dst = (int16_t*)is->contents+to;

bytes *= sizeof(int16_t);

}

memmove(dst,src,bytes);

}

七、压缩列表

适用场景：

压缩列表是列表键和hash键的底层实现之一，当一个列表键只包含较小的整数或者长度比较短的字符串，那么久使用压缩列表。

原理解释：

//ziplist是一个特殊编码的双端链表，保存了字符串和整形，整数保存的是实际整数而不是字符串。

/*The ziplist is a specially encoded dually linked list that is designed

* tobe very memory efficient. It stores both strings and integer values,

*where integers are encoded as actual integers instead of a series of

*characters. It allows push and pop operations on either side of the list

* inO(1) time. However, because every operation requires a reallocation of

*the memory used by the ziplist, the actual complexity is related to the

*amount of memory used by the ziplist.

*----------------------------------------------------------------------------

*ZIPLIST OVERALL LAYOUT:

*The general layout of the ziplist is as follows://存储模型如下

*<zlbytes><zltail><zllen><entry><entry><zlend>

* 4字节 4字节 2字节 1字节

*<zlbytes> is an unsigned integer to hold the number of bytes that the//标示存储的字节数，通过这个值可以对ziplist进行调整而不用遍历所有获取大小

*ziplist occupies. This value needs to be stored to be able to resize the

*entire structure without the need to traverse it first.

*<zltail> is the offset to the last entry in the list. This allows a pop//保存了链表的尾部偏移

*operation on the far side of the list without the need for full traversal.

*<zllen> is the number of entries.When this value is larger than 2**16-2,//保存节点的数目，此值最大是

* weneed to traverse the entire list to know how many items it holds.

*<zlend> is a single byte special value, equal to 255, which indicates the//标示ziplist的尾部，值是255

*end of the list.

//每个entry保存了2部分内容：1、前置节点的长度；2、当前节点的编码类型和长度

* Every entry in the ziplist is prefixed bya header that contains two pieces

* ofinformation. First, the length of the previous entry is stored to be

*able to traverse the list from back to front. Second, the encoding with an

*optional string length of the entry itself is stored.

entry:[ previous_length][encoding][content]

//如果前置长度小于254个字节，只用1个字节保存值；如果大于或者等于254个字节，它将占5个字节，第一个字节设置为254，其他4个字节存储长度。

* The length of the previous entry isencoded in the following way:

* Ifthis length is smaller than 254 bytes, it will only consume a single

*byte that takes the length as value. When the length is greater than or

*equal to 254, it will consume 5 bytes. The first byte is set to 254 to

*indicate a larger value is following. The remaining 4 bytes take the

*length of the previous entry as value.

//其他的字段是需要根据内容来存放的，如果存放的是一个字符串，那么头2个位存放编码字符串所使用的类型，接下来的位是存放字符串的长度，如果头2个位都是1那么接下来2个标示存储整形的类型

* The other header field of the entryitself depends on the contents of the

*entry. When the entry is a string, the first 2 bits of this header will hold

*the type of encoding used to store the length of the string, followed by the

*actual length of the string. When the entry is an integer the first 2 bits

*are both set to 1. The following 2 bits are used to specify what kind of

*integer will be stored after this header. An overview of the different

*types and encodings is as follows:

* |00pppppp| - 1 byte //00 字符串长度小于63字节

* String value with length less than or equal to 63 bytes (6 bits).

*|01pppppp|qqqqqqqq| - 2 bytes //01 字符串长度小于16383字节

* String value with length less than or equal to 16383 bytes (14 bits).

* String value with length greater than or equal to 16384 bytes.

*|11000000| - 1 byte //00标示 1个字节(int16_t)

* Integer encoded as int16_t (2 bytes).

*|11010000| - 1 byte

* Integer encoded as int32_t (4 bytes). //01标示 4个字节(int32_t)

*|11100000| - 1 byte

* Integer encoded as int64_t (8 bytes). //10标示 8个字节(int64_t)

*|11110000| - 1 byte

* Integer encoded as 24 bit signed (3 bytes). //11 标示存放的是3个字节的有符号整形

*|11111110| - 1 byte

* Integer encoded as 8 bit signed (1 byte). //11111 表中存放的是1个字节有符号整形

*|1111xxxx| - (with xxxx between 0000 and 1101) immediate 4 bit integer.//0000-1101标示4位长0-12的无符号整数，没有right值了

* Unsigned integer from 0 to 12. The encoded value is actually from

* 1to 13 because 0000 and 1111 can not be used, so 1 should be

* subtracted from the encoded 4 bit value to obtain the right value.

*|11111111| - End of ziplist. //标示ziplist结尾

*All the integers are represented in little endian byte order.

美了美了

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
打赏
0
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫

专栏目录