redis原理-数据结构
一、 内存分配
redis内存分配函数是在文件zmalloc.h和zmalloc.c里面进行声明和定义的,主要的函数如下:
void*zmalloc(size_t size);//分配内存
void*zrealloc(void *ptr, size_t size); //重分配内存
voidzfree(void *ptr);//释放内存
redis使用了zmalloc zrealloc zfree来封装了内存管理的函数,这里针对不同的平台来封装,从而屏蔽了底层的差异性实现跨平台,定义如下:
#ifdefined(USE_TCMALLOC)//如果系统存在tcmalloc
#defineZMALLOC_LIB ("tcmalloc-" __xstr(TC_VERSION_MAJOR) "."__xstr(TC_VERSION_MINOR))
#include<google/tcmalloc.h>
#if(TC_VERSION_MAJOR == 1 && TC_VERSION_MINOR >= 6) ||(TC_VERSION_MAJOR > 1)
#defineHAVE_MALLOC_SIZE 1
#definezmalloc_size(p) tc_malloc_size(p)
#else
#error"Newer version of tcmalloc required"
#endif
#elifdefined(USE_JEMALLOC) //如果系统存在jemalloc
#defineZMALLOC_LIB ("jemalloc-" __xstr(JEMALLOC_VERSION_MAJOR) "."__xstr(JEMALLOC_VERSION_MINOR) "." __xstr(JEMALLOC_VERSION_BUGFIX))
#include<jemalloc/jemalloc.h>
#if(JEMALLOC_VERSION_MAJOR == 2 && JEMALLOC_VERSION_MINOR >= 1) ||(JEMALLOC_VERSION_MAJOR > 2)
#defineHAVE_MALLOC_SIZE 1
#definezmalloc_size(p) je_malloc_usable_size(p)
#else
#error"Newer version of jemalloc required"
#endif
#elifdefined(__APPLE__) //如果是苹果平台
#include<malloc/malloc.h>
#defineHAVE_MALLOC_SIZE 1
#definezmalloc_size(p) malloc_size(p)
#endif
#ifndefZMALLOC_LIB
#defineZMALLOC_LIB "libc"
#endif
定义平台之间的差异,主要是tcmalloc(google)、jemalloc(facebook)、苹果平台。
上边说过,封装就是为了屏蔽底层平台的差异,同时方便自己实现相关的统计函数。具体来说就是:
若系统中存在Google的TC_MALLOC库,则使用tc_malloc一族函数代替原本的malloc一族函数。
若当前系统是Mac系统,则使用<malloc/malloc.h>中的内存分配函数。
其他情况,在每一段分配好的空间前头,同时多分配一个定长的字段,用来记录分配的空间大小。
#ifdefHAVE_MALLOC_SIZE
#definePREFIX_SIZE (0)
#else
#ifdefined(__sun) || defined(__sparc) || defined(__sparc__)
#definePREFIX_SIZE (sizeof(long long))
#else
#definePREFIX_SIZE (sizeof(size_t))
#endif
#endif
如果是sun下就使用 sizeof(long long),如果是linux就使用sizeof(size_t)了。
二、 简单字符串
适用场景:
redis默认字符串都是使用sds。
优点:
获取字符串长度:复杂度O(1),直接使用sdshdr->len就能获取到长度。而常规的是o(N)。
二进制安全存储:使用长度来规定了数据的存放长度,而c语言字符数组遇到\0就认为是结尾了,使得redis不仅仅可以存放字符串还可以存放任意二进制数据。
杜绝缓冲区溢出:sds存放的数据长度都是指定的,不存在溢出。
修改字符串减少内存重分配次数:在sdscat调用的sdsMakeRoomFor实现中当拷贝一个比当前存储区大的字符串的时候,如果小于1m会直接分配2倍当前字符串的大小空间,如果大于1m会在当前字符串长度的基础上面多分配1m的空间。
惰性释放:在sdstrim(sdsclear)中,会把删除的字符空间长度累加sdshdr->free里。
原理解释:
简单动态字符串(simpledynamic string,SDS),定义在sds.h头文件里面,包含了sds的定义以及sds相关的基础操作函数。
sds定义:
struct sdshdr {
int len;
int free;
char buf[];
};
这里面最后一个buf[]使用0其实是一种叫做柔性数组的技巧:
redis使用sds和常规的字符串好处有:
sds定义的API有:
sdsnew :创建一个给定c字符串的sds
sds sdsnew(const char *init) {
size_t initlen = (init == NULL) ? 0 : strlen(init); //取字符串长度
return sdsnewlen(init, initlen);
}
sds sdsnewlen(const void *init, size_tinitlen) {
struct sdshdr *sh;
if (init) {
sh = zmalloc(sizeof(struct sdshdr)+initlen+1); //内容 头部+数据
}else {
sh = zcalloc(sizeof(struct sdshdr)+initlen+1);
}
if (sh == NULL) return NULL;
sh->len = initlen;
sh->free = 0;
if (initlen && init)
memcpy(sh->buf, init, initlen);
sh->buf[initlen] = '\0';
return (char*)sh->buf;
}
sdslen :返回sds已经使用的空间字节数
这种通过sds可以直接获取到sds的头部
static inline size_t sdslen(const sds s) {
struct sdshdr *sh = (void*)(s-(sizeof(struct sdshdr)));
return sh->len;
}
sdscat:追加一个字符串到sds尾部
sds sdsMakeRoomFor(sds s, size_t addlen) {
struct sdshdr *sh, *newsh;
size_t free = sdsavail(s);
size_t len, newlen;
if (free >= addlen) return s;
len = sdslen(s);
sh = (void*) (s-(sizeof(struct sdshdr)));
newlen = (len+addlen);
if (newlen < SDS_MAX_PREALLOC) // SDS_MAX_PREALLOC=1 M大小 ,如果小于1M咱就分配字符串长度的2倍
newlen *= 2;
else
newlen += SDS_MAX_PREALLOC; //如果大于1M,咱就在当前字符串的长度再分配1m的空间
newsh = zrealloc(sh, sizeof(struct sdshdr)+newlen+1);
if (newsh == NULL) return NULL;
newsh->free = newlen - len;
return newsh->buf;
}
sds sdscatlen(sds s, const void *t, size_tlen) {
struct sdshdr *sh;
size_t curlen = sdslen(s);
s= sdsMakeRoomFor(s,len); //减少重分配
if (s == NULL) return NULL;
sh = (void*) (s-(sizeof(struct sdshdr)));
memcpy(s+curlen, t, len);
sh->len = curlen+len;
sh->free = sh->free-len; //惰性释放
s[curlen+len] = '\0';
return s;
}
sds sdscat(sds s, const char *t) {
return sdscatlen(s, t, strlen(t));
}
sdstrim:清除sds首尾的字符(可指定多个)
sds sdstrim(sds s, const char *cset) {
struct sdshdr *sh = (void*) (s-(sizeof(struct sdshdr)));
char *start, *end, *sp, *ep;
size_t len;
sp = start = s;
ep = end = s+sdslen(s)-1;
while(sp <= end && strchr(cset, *sp)) sp++;
while(ep > start && strchr(cset, *ep)) ep--;
len = (sp > ep) ? 0 : ((ep-sp)+1);
if (sh->buf != sp) memmove(sh->buf, sp, len);
sh->buf[len] = '\0';
sh->free = sh->free+(sh->len-len); //惰性释放
sh->len = len;
return s;
}
三、 双端链表
适用场景:
链表在redis应用比较广泛,列表键底层实现之一就是链表;客户端信息;
采用双端链表可以定位到头部或者尾部,每个节点的数据类型为任意数据类型,通过定义复制、释放、比较的函数指针实现了可以操作任意用户自定义数据。
原理解释:
链表节点:
typedef struct listNode {
struct listNode *prev; //前驱
struct listNode *next;//后继
void *value; //值指针
} listNode;
链表定义:
typedef struct list {
listNode *head;
listNode *tail;
// 节点值复制函数
void *(*dup)(void *ptr);
// 节点值释放函数
void (*free)(void *ptr);
// 节点值对比函数
int (*match)(void *ptr, void *key);
// 链表所包含的节点数量
unsigned long len;
} list;
/* Prototypes */
list *listCreate(void);
void listRelease(list *list);
list *listAddNodeHead(list *list, void*value);
list *listAddNodeHead(list *list, void*value)
{
listNode *node;
if ((node = zmalloc(sizeof(*node))) == NULL)
return NULL;
node->value = value;
if (list->len == 0) { //如果链表为空
list->head = list->tail = node;
node->prev = node->next = NULL;
}else {
node->prev = NULL;
node->next = list->head;
list->head->prev = node;
list->head = node;
}
list->len++;
return list;
}
四、 字典
适用场景:
在redis中字典用的应用很广泛的,因为redis是一个键值(k-v)内存数据库,所以存储的都是以字典作为基准的,字典也是作为hash键的底层之一:当保存的元素都是比较差的字符串或者hash键值比较多的时候就会使用字典来存储。
原理解释:
由于字典是由hash表存储的,hash表的定义如下:
typedefstruct dictht {
dictEntry **table; // 哈希表数组
unsigned long size; // 哈希表大小
unsigned long sizemask; // 哈希表大小掩码,用于计算索引值,等于 size -1
unsigned long used; // 已经使用的
}dictht;
hash表节点
typedefstruct dictEntry {
void *key;
union { //是一个联合体 用于存放不同的数据类型
void *val;
uint64_t u64;
int64_t s64;
} v;
struct dictEntry *next; //主要是用于解决键冲突问题
}dictEntry;
字典类型:
typedefstruct dictType {
// hash函数
unsigned int (*hashFunction)(const void*key);
// 复制键
void *(*keyDup)(void *privdata, const void*key);
// 复制值
void *(*valDup)(void *privdata, const void*obj);
// 对比键
int (*keyCompare)(void *privdata, constvoid *key1, const void *key2);
// 销毁键
void (*keyDestructor)(void *privdata, void*key);
// 销毁值
void (*valDestructor)(void *privdata, void*obj);
}dictType;
字典的定义:
typedefstruct dict {
dictType *type; //可以实现任意用户类型,只需要设定了特定的类型操作函数
void *privdata;
dictht ht[2];
int rehashidx; /* rehashing not in progressif rehashidx == -1 */
int iterators; /* number of iteratorscurrently running */
}dict;
//其中rehash是对hash表进行增大或者减小,当hash表的负载因子(负载因子=dictht.used/dictht.size)
创建一个字典
dict*dictCreate(dictType *type, void *privDataPtr);
dict*dictCreate(dictType *type,
void *privDataPtr)
{
dict *d = zmalloc(sizeof(*d)); //字典分配内存
_dictInit(d,type,privDataPtr);
return d;
}
int_dictInit(dict *d, dictType *type,
void *privDataPtr)
{
_dictReset(&d->ht[0]);
_dictReset(&d->ht[1]);
d->type = type;
d->privdata = privDataPtr; // 设置私有数据
d->rehashidx = -1; //默认是-1
d->iterators = 0;
return DICT_OK;
}
staticvoid _dictReset(dictht *ht)
{
ht->table = NULL; //默认还不分配
ht->size = 0;
ht->sizemask = 0;
ht->used = 0;
}
//创建hash表/rehash
intdictExpand(dict *d, unsigned long size)
{
dictht n; /* the new hash table */
// 根据 size 参数,计算哈希表的大小
unsigned long realsize =_dictNextPower(size);
/* the size is invalid if it is smallerthan the number of
* elements already inside the hash table*/
if (dictIsRehashing(d) || d->ht[0].used> size)
return DICT_ERR;
/* Allocate the new hash table andinitialize all pointers to NULL */
n.size = realsize;
n.sizemask = realsize-1;
// T = O(N)
n.table =zcalloc(realsize*sizeof(dictEntry*));
n.used = 0;
/* Is this the first initialization? If soit's not really a rehashing
* we just set the first hash table so thatit can accept keys. */
if (d->ht[0].table == NULL) {
d->ht[0] = n;
return DICT_OK;
}
/* Prepare a second hash table forincremental rehashing */
d->ht[1] = n;
//扩展完毕后需要设置rehash 为0,下一次就可以进行单步rehash了
d->rehashidx = 0;
return DICT_OK;
}
staticunsigned long _dictNextPower(unsigned long size)
{
unsigned long i = DICT_HT_INITIAL_SIZE; //默认是4
if (size >= LONG_MAX) return LONG_MAX;
while(1) {
if (i >= size)//如果小于4,咱就分配4个
return i;
i *= 2;//直到i大于等于size
}
}
//在字典中添加一个键值对
intdictAdd(dict *d, void *key, void *val)
{
// 尝试添加键到字典,并返回包含了这个键的新哈希节点
// T = O(N)
dictEntry *entry = dictAddRaw(d,key);
// 键已存在,添加失败
if (!entry) return DICT_ERR;
// 键不存在,设置节点的值
// T = O(1)
dictSetVal(d, entry, val);
// 添加成功
return DICT_OK;
}
dictEntry*dictAddRaw(dict *d, void *key) //获取一个包含键的hash表节点
{
int index;
dictEntry *entry;
dictht *ht;
//如果条件允许,进行单步rehash
if (dictIsRehashing(d)) _dictRehashStep(d);
/* Get the index of the new element, or -1if
* the element already exists. */
if ((index =_dictKeyIndex(d, key)) == -1)
return NULL;
// T = O(1)
/* Allocate the memory and store the newentry */
ht = dictIsRehashing(d) ? &d->ht[1]: &d->ht[0]; // 如果字典正在 rehash ,那么将新键添加到 1 号哈希表
entry = zmalloc(sizeof(*entry));
entry->next = ht->table[index]; //默认都是在头部插入的
ht->table[index] = entry;
ht->used++;
/* Set the hash entry fields. */
dictSetKey(d, entry, key);
return entry;
}
//步长为1的rehash
staticvoid _dictRehashStep(dict *d) {
if (d->iterators == 0) dictRehash(d,1);
}
//rehash
intdictRehash(dict *d, int n) {
// 只可以在 rehash 进行中时执行
if (!dictIsRehashing(d)) return 0;
while(n--) {
dictEntry *de, *nextde;
/* Check if we already rehashed thewhole table... */
if (d->ht[0].used == 0) {
zfree(d->ht[0].table);//如果rehash完毕
d->ht[0] = d->ht[1];//设置默认hashtable为第一个hashtable
_dictReset(&d->ht[1]);
d->rehashidx = -1;
return 0;
}
/* Note that rehashidx can't overflowas we are sure there are more
* elements because ht[0].used != 0 */
assert(d->ht[0].size >(unsigned)d->rehashidx);
//找到第一个非空hashtable元素
while(d->ht[0].table[d->rehashidx] == NULL) d->rehashidx++;
de =d->ht[0].table[d->rehashidx];
/* Move all the keys in this bucketfrom the old to the new hash HT */
while(de) {
unsigned int h;
// 保存下个节点的指针
nextde = de->next;
/* Get the index in the new hashtable */
//重新计算hash值
h = dictHashKey(d, de->key)& d->ht[1].sizemask;
de->next = d->ht[1].table[h];
d->ht[1].table[h] = de;
d->ht[0].used--;
d->ht[1].used++;
de = nextde;
}
d->ht[0].table[d->rehashidx] =NULL;
d->rehashidx++;
}
return 1;
}
//在字典中查找键为key的节点返回索引
staticint _dictKeyIndex(dict *d, const void *key)
{
unsigned int h, idx, table;
dictEntry *he;
//这里进行扩展
/* Expand the hash tableif needed */
if (_dictExpandIfNeeded(d)== DICT_ERR)
return -1;
/* Compute the key hash value */
h = dictHashKey(d, key);
// T = O(1)
for (table = 0; table <= 1; table++) {
idx = h & d->ht[table].sizemask;
/* Search if this slot does not alreadycontain the given key */
// T = O(1)
he = d->ht[table].table[idx];
while(he) {
if (dictCompareKeys(d, key,he->key))
return -1;
he = he->next;
}
// 如果运行到这里时,说明0 号哈希表中所有节点都不包含 key
// 如果这时 rehahs 正在进行,那么继续对 1 号哈希表进行 rehash
if (!dictIsRehashing(d)) break;
}
// 返回索引值
return idx;
}
//计算hash值
#definedictHashKey(d, key) (d)->type->hashFunction(key)
//设置键
#definedictSetKey(d, entry, _key_) do { \
if ((d)->type->keyDup) \
entry->key = (d)->type->keyDup((d)->privdata,_key_); \
else \
entry->key = (_key_); \
}while(0)
#definedictSetVal(d, entry, _val_) do { \
if ((d)->type->valDup) \
entry->v.val =(d)->type->valDup((d)->privdata, _val_); \
else \
entry->v.val = (_val_); \
}while(0)
rehash:
staticint _dictExpandIfNeeded(dict *d)
{
/* Incremental rehashing already inprogress. Return. */
if (dictIsRehashing(d)) return DICT_OK;
/* If the hash table is empty expand it tothe initial size. */
//如果第一次进来,咱就分配一个默认大小的hashtable DICT_HT_INITIAL_SIZE=4
if (d->ht[0].size == 0) returndictExpand(d, DICT_HT_INITIAL_SIZE);
/* If we reached the 1:1 ratio, and we areallowed to resize the hash
* table (global setting) or we shouldavoid it but the ratio between
* elements/buckets is over the"safe" threshold, we resize doubling
* the number of buckets. */
// dict_can_resize默认是启用的
// dict_force_resize_ratiorehash因子默认是5
//这里说如果使用的大于等于table的大小并且启用了字典可resize;已经使用的除以字典大小等于5咱就可//以进行rehash了
if (d->ht[0].used >= d->ht[0].size&&
(dict_can_resize ||
d->ht[0].used/d->ht[0].size >dict_force_resize_ratio))
{
return dictExpand(d,d->ht[0].used*2);
}
return DICT_OK;
}
//hash函数 使用的是MurmurHash2原版,高运算性能,低碰撞率。
/**
* MurMurHash算法,是非加密HASH算法,性能很高,
* 比传统的CRC32,MD5,SHA-1(这两个算法都是加密HASH算法,复杂度本身就很高,带来的性能上的损害也不可避免)
* 等HASH算法要快很多,而且据说这个算法的碰撞率很低.
* http://murmurhash.googlepages.com/
*/
unsignedint dictGenHashFunction(const void *key, int len) {
/* 'm' and 'r' are mixing constantsgenerated offline.
They're not really 'magic', they justhappen to work well. */
uint32_t seed = dict_hash_function_seed;
const uint32_t m = 0x5bd1e995;
const int r = 24;
/* Initialize the hash to a 'random' value*/
uint32_t h = seed ^ len;
/* Mix 4 bytes at a time into the hash */
const unsigned char *data = (const unsignedchar *)key;
while(len >= 4) {
uint32_t k = *(uint32_t*)data;
k *= m;
k ^= k >> r;
k *= m;
h *= m;
h ^= k;
data += 4;
len -= 4;
}
/* Handle the last few bytes of the inputarray */
switch(len) {
case 3: h ^= data[2] << 16;
case 2: h ^= data[1] << 8;
case 1: h ^= data[0]; h *= m;
};
/* Do a few final mixes of the hash toensure the last few
* bytes are well-incorporated. */
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return (unsigned int)h;
}
五、 跳跃表
适用场景:
redis在内部只有2个地方用到了跳跃表,第一个是有序集合键(键较多;或者值元素比较长);第二个是集群节点中。
原理解释:
跳跃表节点定义:
typedefstruct zskiplistNode {
robj *obj; //redis对象
double score; //分值,跳跃表中分值是按照从小到大排列的
struct zskiplistNode *backward; //后退指针
struct zskiplistLevel {
struct zskiplistNode *forward;//前进指针
unsigned int span; //跨度,记录2个节点之间的距离,越大距离就越远
} level[];//层数组
}zskiplistNode;
跳跃表定义:
typedefstruct zskiplist {
struct zskiplistNode *header, *tail;
unsigned long length; //跳跃表节点数(不包括表头节点)
int level; //记录跳跃表内层数最大的那个节点的层数
} zskiplist;
//创建一个跳跃表
zskiplist*zslCreate(void) {
int j;
zskiplist *zsl;
zsl = zmalloc(sizeof(*zsl)); //分配空间
zsl->level = 1; //默认是1层
zsl->length = 0;//节点数为0
//分配了一个最大层数32层的大小的空间
//#defineZSKIPLIST_MAXLEVEL 32
zsl->header =zslCreateNode(ZSKIPLIST_MAXLEVEL,0,NULL);
for (j = 0; j < ZSKIPLIST_MAXLEVEL; j++){
zsl->header->level[j].forward =NULL;
zsl->header->level[j].span = 0;//跨度为0标示没连接到任何节点
}
zsl->header->backward = NULL;
zsl->tail = NULL;
return zsl;
}
zskiplistNode *zslCreateNode(int level,double score, robj *obj) {
zskiplistNode *zn = zmalloc(sizeof(*zn)+level*sizeof(structzskiplistLevel));
zn->score = score;
zn->obj = obj;
return zn;
}
六、 整数集合
适用场景:
redis是集合键的底层实现之一,当集合里面只包含整数,并且数量不多的时候,redis就用整数集合作为集合键的底层实现。
原理解释:
整数集合定义:
typedefstruct intset {
uint32_t encoding; // 编码方式
uint32_t length;
int8_t contents[];//内容数组 ,数据内容是从小到大排列,数组不包含重复项,数据类型取决于encoding
} intset;
//创建一个intset
intset *intsetNew(void) {
intset *is = zmalloc(sizeof(intset));
// #defineINTSET_ENC_INT16 (sizeof(int16_t)) -128~127
//#define INTSET_ENC_INT32(sizeof(int32_t)) -32768~32767
//#define INTSET_ENC_INT64 (sizeof(int64_t)) -2^63 ~ 2^63-1
is->encoding = intrev32ifbe(INTSET_ENC_INT16);
is->length = 0;
return is;
}
//添加一个元素
intset *intsetAdd(intset *is, int64_tvalue, uint8_t *success) {
//获取当前的编码类型
uint8_t valenc = _intsetValueEncoding(value);
uint32_t pos;
if (success) *success = 1;
/* Upgrade encoding if necessary. If we need to upgrade, we know that
* this value should be either appended (if > 0) or prepended (if <0),
*because it lies outside the range of existing values. */
//类型比现在的类型要大,咱就改编码模式
if (valenc > intrev32ifbe(is->encoding)) {
/* This always succeeds, so we don't need to curry *success. */
// T = O(N)
return intsetUpgradeAndAdd(is,value);
}else {
/* Abort if the value is already present in the set.
* This call will populate "pos" with the right position toinsert
* the value when it cannot be found. */
//如果搜索到了咱就返回
if (intsetSearch(is,value,&pos)) {
if (success) *success = 0;
return is;
}
//给需要添加的元素留出空间
is = intsetResize(is,intrev32ifbe(is->length)+1);
//如果pos是小于整体长度,意思是搜索到的位置不在2边,咱需要移动数据
if (pos < intrev32ifbe(is->length)) intsetMoveTail(is,pos,pos+1);
}
//在索引上面设置值
_intsetSet(is,pos,value);
is->length = intrev32ifbe(intrev32ifbe(is->length)+1); //长度加1
return is;
}
//获取数据的编码格式
/* Return the required encoding for theprovided value.
static uint8_t _intsetValueEncoding(int64_tv) {
if (v < INT32_MIN || v > INT32_MAX)
return INTSET_ENC_INT64;
else if (v < INT16_MIN || v > INT16_MAX)
return INTSET_ENC_INT32;
else
return INTSET_ENC_INT16;
}
//更新intset并且添加数据
static intset *intsetUpgradeAndAdd(intset*is, int64_t value) {
uint8_t curenc = intrev32ifbe(is->encoding);
uint8_t newenc = _intsetValueEncoding(value);
int length = intrev32ifbe(is->length);
//如果value是小于0的那么咱就添加到前端,否则就添加到后端
int prepend = value < 0 ? 1 : 0;
/* First set new encoding and resize */
is->encoding = intrev32ifbe(newenc);
// T = O(N)
is = intsetResize(is,intrev32ifbe(is->length)+1);
/* Upgrade back-to-front so we don't overwrite values.
* Note that the "prepend" variableis used to make sure we have an empty
* space at either the beginning or the end of the intset. */
//因为前面说过了数据是从小到大排列的
// T = O(N)
while(length--)
_intsetSet(is,length+prepend,_intsetGetEncoded(is,length,curenc));
/* Set the value at the beginning or the end. */
// 设置新值,根据 prepend 的值来决定是添加到数组头还是数组尾
if (prepend)
_intsetSet(is,0,value);
else
_intsetSet(is,intrev32ifbe(is->length),value);
is->length = intrev32ifbe(intrev32ifbe(is->length)+1);
return is;
}
//扩展intset空间
static intset *intsetResize(intset *is,uint32_t len) {
uint32_t size = len*intrev32ifbe(is->encoding);
is = zrealloc(is,sizeof(intset)+size); //只扩充空间,数据是不变的
return is;
}
/* Return the value at pos, given anencoding. 根据当前编码,返回索引上的值
*/
static int64_t _intsetGetEncoded(intset*is, int pos, uint8_t enc) {
int64_t v64;
int32_t v32;
int16_t v16;
if (enc == INTSET_ENC_INT64) {
memcpy(&v64,((int64_t*)is->contents)+pos,sizeof(v64));
memrev64ifbe(&v64);
return v64;
}else if (enc == INTSET_ENC_INT32) {
memcpy(&v32,((int32_t*)is->contents)+pos,sizeof(v32));
memrev32ifbe(&v32);
return v32;
}else {
memcpy(&v16,((int16_t*)is->contents)+pos,sizeof(v16));
memrev16ifbe(&v16);
return v16;
}
}
/* Set the value at pos, using theconfigured encoding. 根据当前编码,设置索引上的值
*
* T= O(1)
*/
static void _intsetSet(intset *is, int pos,int64_t value) {
uint32_t encoding = intrev32ifbe(is->encoding);
if (encoding == INTSET_ENC_INT64) {
((int64_t*)is->contents)[pos] = value;
memrev64ifbe(((int64_t*)is->contents)+pos);
}else if (encoding == INTSET_ENC_INT32) {
((int32_t*)is->contents)[pos] = value;
memrev32ifbe(((int32_t*)is->contents)+pos);
}else {
((int16_t*)is->contents)[pos] = value;
memrev16ifbe(((int16_t*)is->contents)+pos);
}
}
//在intset中搜索value,如果找到返回索引,否则返回0
/* Search for the position of"value". Return 1 when the value was found and
*sets "pos" to the position of the value within the intset. Return 0when
*the value is not present in the intset and sets "pos" to the position
*where "value" can be inserted. */
static uint8_t intsetSearch(intset *is,int64_t value, uint32_t *pos) {
int min = 0, max = intrev32ifbe(is->length)-1, mid = -1;
int64_t cur = -1;
/* The value can never be found when the set is empty */
//如果intset为空,咱就直接返回0,并且设置索引为0
if (intrev32ifbe(is->length) == 0) {
if (pos) *pos = 0;
return 0;
}else {
/* Check for the case where we know we cannot find the value,
* but do know the insert position. */
//如果值大于最后一个,咱都返回最后一个位置的索引
if (value > _intsetGet(is,intrev32ifbe(is->length)-1)) {
if (pos) *pos = intrev32ifbe(is->length);
return 0;
} else if (value < _intsetGet(is,0)) {//如果值小于第一个,咱返回0
if (pos) *pos = 0;
return 0;
}
}
//二分法查找
while(max >= min) {
mid = ((unsigned int)min + (unsigned int)max) >> 1;
cur = _intsetGet(is,mid);
if (value > cur) {
min = mid+1;
} else if (value < cur) {
max = mid-1;
} else {
break;
}
}
if (value == cur) {
if (pos) *pos = mid;
return 1;
}else {
if (pos) *pos = min;
return 0;
}
}
//移动数据
static void intsetMoveTail(intset *is,uint32_t from, uint32_t to) {
void *src, *dst;
uint32_t bytes = intrev32ifbe(is->length)-from;
uint32_t encoding = intrev32ifbe(is->encoding);
if (encoding == INTSET_ENC_INT64) {
src = (int64_t*)is->contents+from;
dst = (int64_t*)is->contents+to;
bytes *= sizeof(int64_t);
}else if (encoding == INTSET_ENC_INT32) {
src = (int32_t*)is->contents+from;
dst = (int32_t*)is->contents+to;
bytes *= sizeof(int32_t);
}else {
src = (int16_t*)is->contents+from;
dst = (int16_t*)is->contents+to;
bytes *= sizeof(int16_t);
}
memmove(dst,src,bytes);
}
七、 压缩列表
适用场景:
压缩列表是列表键和hash键的底层实现之一,当一个列表键只包含较小的整数或者长度比较短的字符串,那么久使用压缩列表。
原理解释:
//ziplist是一个特殊编码的双端链表,保存了字符串和整形,整数保存的是实际整数而不是字符串。
/*The ziplist is a specially encoded dually linked list that is designed
* tobe very memory efficient. It stores both strings and integer values,
*where integers are encoded as actual integers instead of a series of
*characters. It allows push and pop operations on either side of the list
* inO(1) time. However, because every operation requires a reallocation of
*the memory used by the ziplist, the actual complexity is related to the
*amount of memory used by the ziplist.
*
*----------------------------------------------------------------------------
*
*ZIPLIST OVERALL LAYOUT:
*The general layout of the ziplist is as follows://存储模型如下
*<zlbytes><zltail><zllen><entry><entry><zlend>
* 4字节 4字节 2字节 1字节
*<zlbytes> is an unsigned integer to hold the number of bytes that the//标示存储的字节数,通过这个值可以对ziplist进行调整而不用遍历所有获取大小
*ziplist occupies. This value needs to be stored to be able to resize the
*entire structure without the need to traverse it first.
*
*<zltail> is the offset to the last entry in the list. This allows a pop//保存了链表的尾部偏移
*operation on the far side of the list without the need for full traversal.
*
*<zllen> is the number of entries.When this value is larger than 2**16-2,//保存节点的数目,此值最大是
* weneed to traverse the entire list to know how many items it holds.
*
*<zlend> is a single byte special value, equal to 255, which indicates the//标示ziplist的尾部,值是255
*end of the list.
//每个entry保存了2部分内容:1、前置节点的长度;2、当前节点的编码类型和长度
* Every entry in the ziplist is prefixed bya header that contains two pieces
* ofinformation. First, the length of the previous entry is stored to be
*able to traverse the list from back to front. Second, the encoding with an
*optional string length of the entry itself is stored.
entry:[ previous_length][encoding][content]
//如果前置长度小于254个字节,只用1个字节保存值;如果大于或者等于254个字节,它将占5个字节,第一个字节设置为254,其他4个字节存储长度。
* The length of the previous entry isencoded in the following way:
* Ifthis length is smaller than 254 bytes, it will only consume a single
*byte that takes the length as value. When the length is greater than or
*equal to 254, it will consume 5 bytes. The first byte is set to 254 to
*indicate a larger value is following. The remaining 4 bytes take the
*length of the previous entry as value.
//其他的字段是需要根据内容来存放的,如果存放的是一个字符串,那么头2个位存放编码字符串所使用的类型,接下来的位是存放字符串的长度,如果头2个位都是1那么接下来2个标示存储整形的类型
* The other header field of the entryitself depends on the contents of the
*entry. When the entry is a string, the first 2 bits of this header will hold
*the type of encoding used to store the length of the string, followed by the
*actual length of the string. When the entry is an integer the first 2 bits
*are both set to 1. The following 2 bits are used to specify what kind of
*integer will be stored after this header. An overview of the different
*types and encodings is as follows:
* |00pppppp| - 1 byte //00 字符串长度小于63字节
* String value with length less than or equal to 63 bytes (6 bits).
*|01pppppp|qqqqqqqq| - 2 bytes //01 字符串长度小于16383字节
* String value with length less than or equal to 16383 bytes (14 bits).
*|10______|qqqqqqqq|rrrrrrrr|ssssssss|tttttttt| - 5 bytes //10字符串大于或者等于16384的字符串
* String value with length greater than or equal to 16384 bytes.
*|11000000| - 1 byte //00标示 1个字节(int16_t)
* Integer encoded as int16_t (2 bytes).
*|11010000| - 1 byte
* Integer encoded as int32_t (4 bytes). //01标示 4个字节(int32_t)
*|11100000| - 1 byte
* Integer encoded as int64_t (8 bytes). //10标示 8个字节(int64_t)
*|11110000| - 1 byte
* Integer encoded as 24 bit signed (3 bytes). //11 标示存放的是3个字节的有符号整形
*|11111110| - 1 byte
* Integer encoded as 8 bit signed (1 byte). //11111 表中存放的是1个字节有符号整形
*|1111xxxx| - (with xxxx between 0000 and 1101) immediate 4 bit integer.//0000-1101标示4位长0-12的无符号整数,没有right值了
* Unsigned integer from 0 to 12. The encoded value is actually from
* 1to 13 because 0000 and 1111 can not be used, so 1 should be
* subtracted from the encoded 4 bit value to obtain the right value.
*|11111111| - End of ziplist. //标示ziplist结尾
*
*All the integers are represented in little endian byte order.