概述
Redis对象系统是在数据结构上又一层封装
源码
robj结构体
typedef struct redisObject {
unsigned type:4;
unsigned encoding:4;
unsigned lru:LRU_BITS; /* LRU time (relative to global lru_clock) or
* LFU data (least significant 8 bits frequency
* and most significant 16 bits access time). */
//LRU: 最近最少使用,以最近一次访问时间为参考。LFU: 最近一次被访问次数最少的数据,以次数为参考。
int refcount;
void *ptr;
} robj;
在robj中每个元素冒号后面的是位数,sizeof(robj)的结果是16个字节,内存使用非常紧凑。
关于该结构体的详细描述可参考Redis 数据结构——robj,写的非常详细。
创建
robj *createObject(int type, void *ptr) {
robj *o = zmalloc(sizeof(*o));
o->type = type;
o->encoding = OBJ_ENCODING_RAW;
o->ptr = ptr;
o->refcount = 1;
/* Set the LRU to the current lruclock (minutes resolution), or
* alternatively the LFU counter. */
if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
o->lru = (LFUGetTimeInMinutes()<<8) | LFU_INIT_VAL;
} else {
o->lru = LRU_CLOCK();
}
return o;
}
获取LRU
unsigned int getLRUClock(void) {
return (mstime()/LRU_CLOCK_RESOLUTION) & LRU_CLOCK_MAX;
}
/* This function is used to obtain the current LRU clock.
* If the current resolution is lower than the frequency we refresh the
* LRU clock (as it should be in production servers) we return the
* precomputed value, otherwise we need to resort to a system call. */
unsigned int LRU_CLOCK(void) {
unsigned int lruclock;
//默认LRU_CLOCK_RESOLUTION是1000,如果server.hz大于1000的话要使用原子类型的加载了
if (1000/server.hz <= LRU_CLOCK_RESOLUTION) {
atomicGet(server.lruclock,lruclock);
} else {
lruclock = getLRUClock();
}
return lruclock;
}
设置成共享的对象
引入引用计数的功能,类似于C++中的shared_ptr
/* Set a special refcount in the object to make it "shared":
* incrRefCount and decrRefCount() will test for this special refcount
* and will not touch the object. This way it is free to access shared
* objects such as small integers from different threads without any
* mutex.
*
* A common patter to create shared objects:
*
* robj *myobject = makeObjectShared(createObject(...));
*
*/
robj *makeObjectShared(robj *o) {
serverAssert(o->refcount == 1);
o->refcount = OBJ_SHARED_REFCOUNT;//INT_MAX
return o;
}
创建不同String对象
redis内置的sds结构
/* Create a string object with encoding OBJ_ENCODING_RAW, that is a plain
* string object where o->ptr points to a proper sds string. */
robj *createRawStringObject(const char *ptr, size_t len) {
return createObject(OBJ_STRING, sdsnewlen(ptr,len));
}
//可以理解为紧凑型的robj,string直接在robj后面
robj *createEmbeddedStringObject(const char *ptr, size_t len) {
//robj后紧接着的就是这个sdshdr8
robj *o = zmalloc(sizeof(robj)+sizeof(struct sdshdr8)+len+1);
struct sdshdr8 *sh = (void*)(o+1);//指向这个sdshdr8对象
o->type = OBJ_STRING;
o->encoding = OBJ_ENCODING_EMBSTR;
o->ptr = sh+1;//这里跳到的是字符串那里
o->refcount = 1;
if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
o->lru = (LFUGetTimeInMinutes()<<8) | LFU_INIT_VAL;
} else {
o->lru = LRU_CLOCK();
}
sh->len = len;
sh->alloc = len;
sh->flags = SDS_TYPE_8;
if (ptr == SDS_NOINIT)
sh->buf[len] = '\0';
else if (ptr) {
memcpy(sh->buf,ptr,len);
sh->buf[len] = '\0';
} else {
memset(sh->buf,0,len+1);
}
return o;
}
/* Create a string object with EMBSTR encoding if it is smaller than
* OBJ_ENCODING_EMBSTR_SIZE_LIMIT, otherwise the RAW encoding is
* used.
*
* The current limit of 44 is chosen so that the biggest string object
* we allocate as EMBSTR will still fit into the 64 byte arena of jemalloc. */
//小于44的话就创建EmbeddedStringObject,不然就是RawStringObject
#define OBJ_ENCODING_EMBSTR_SIZE_LIMIT 44
robj *createStringObject(const char *ptr, size_t len) {
if (len <= OBJ_ENCODING_EMBSTR_SIZE_LIMIT)
return createEmbeddedStringObject(ptr,len);
else
return createRawStringObject(ptr,len);
}
/* Same as CreateRawStringObject, can return NULL if allocation fails */
robj *tryCreateRawStringObject(const char *ptr, size_t len) {
sds str = sdstrynewlen(ptr,len);
if (!str) return NULL;
return createObject(OBJ_STRING, str);
}
/* Same as createStringObject, can return NULL if allocation fails */
robj *tryCreateStringObject(const char *ptr, size_t len) {
if (len <= OBJ_ENCODING_EMBSTR_SIZE_LIMIT)
return createEmbeddedStringObject(ptr,len);
else
return tryCreateRawStringObject(ptr,len);
}
/* Create a string object from a long long value. When possible returns a
* shared integer object, or at least an integer encoded one.
*
* If valueobj is non zero, the function avoids returning a shared
* integer, because the object is going to be used as value in the Redis key
* space (for instance when the INCR command is used), so we want LFU/LRU
* values specific for each key. */
robj *createStringObjectFromLongLongWithOptions(long long value, int valueobj) {
robj *o;
if (server.maxmemory == 0 ||
!(server.maxmemory_policy & MAXMEMORY_FLAG_NO_SHARED_INTEGERS))
{
/* If the maxmemory policy permits, we can still return shared integers
* even if valueobj is true. */
valueobj = 0;
}
if (value >= 0 && value < OBJ_SHARED_INTEGERS && valueobj == 0) {
incrRefCount(shared.integers[value]);
o = shared.integers[value];
} else {
if (value >= LONG_MIN && value <= LONG_MAX) {
o = createObject(OBJ_STRING, NULL);
o->encoding = OBJ_ENCODING_INT;
o->ptr = (void*)((long)value);
} else {
o = createObject(OBJ_STRING,sdsfromlonglong(value));
}
}
return o;
}
/* Wrapper for createStringObjectFromLongLongWithOptions() always demanding
* to create a shared object if possible. */
robj *createStringObjectFromLongLong(long long value) {
return createStringObjectFromLongLongWithOptions(value,0);
}
/* Wrapper for createStringObjectFromLongLongWithOptions() avoiding a shared
* object when LFU/LRU info are needed, that is, when the object is used
* as a value in the key space, and Redis is configured to evict based on
* LFU/LRU. */
robj *createStringObjectFromLongLongForValue(long long value) {
return createStringObjectFromLongLongWithOptions(value,1);
}
/* Create a string object from a long double. If humanfriendly is non-zero
* it does not use exponential format and trims trailing zeroes at the end,
* however this results in loss of precision. Otherwise exp format is used
* and the output of snprintf() is not modified.
*
* The 'humanfriendly' option is used for INCRBYFLOAT and HINCRBYFLOAT. */
robj *createStringObjectFromLongDouble(long double value, int humanfriendly) {
char buf[MAX_LONG_DOUBLE_CHARS];
int len = ld2string(buf,sizeof(buf),value,humanfriendly? LD_STR_HUMAN: LD_STR_AUTO);
return createStringObject(buf,len);
}
后面是一些其他类型对象的创建,这里不多叙述
对象的解除(dismiss)
使用fork创建一个子进程用于生成快照时,主进程和子进程共享相同的物理内存页面,如果父进程更改了一些键,就会引起写时复制(copy on write, CoW),增加了内存的消耗。在子进程中,序列化键值之后这些数据将不会被再次访问,因此为了避免CoW,需要把这部分内存归还为操作系统,见dismissMemory()。鉴于迭代复杂数据类型中所有的node/field/member/entry比较耗时,我们仅在估计单个分配的大小大于 OS 的页面大小的近似平均值时迭代并关闭它们。'size_hint’是序列化值的大小。 这种方法并不准确,但它可以减少可能不会释放任何内存的复杂数据类型的不必要迭代。
void dismissObject(robj *o, size_t size_hint) {
/* madvise(MADV_DONTNEED) may not work if Transparent Huge Pages is enabled. */
if (server.thp_enabled) return;
//这里只有使用Linux下的jemalloc才进行这个操作
/* Currently we use zmadvise_dontneed only when we use jemalloc with Linux.
* so we avoid these pointless loops when they're not going to do anything. */
#if defined(USE_JEMALLOC) && defined(__linux__)
if (o->refcount != 1) return;
switch(o->type) {
case OBJ_STRING: dismissStringObject(o); break;
case OBJ_LIST: dismissListObject(o, size_hint); break;
case OBJ_SET: dismissSetObject(o, size_hint); break;
case OBJ_ZSET: dismissZsetObject(o, size_hint); break;
case OBJ_HASH: dismissHashObject(o, size_hint); break;
case OBJ_STREAM: dismissStreamObject(o, size_hint); break;
default: break;
}
#else
UNUSED(o); UNUSED(size_hint);
#endif
}
内存解除
以dismissSds为例,详解一下这部分的函数:
void dismissSds(sds s) {
dismissMemory(sdsAllocPtr(s), sdsAllocSize(s));
}
/* Try to release pages back to the OS directly (bypassing the allocator),
* in an effort to decrease CoW during fork. For small allocations, we can't
* release any full page, so in an effort to avoid getting the size of the
* allocation from the allocator (malloc_size) when we already know it's small,
* we check the size_hint. If the size is not already known, passing a size_hint
* of 0 will lead the checking the real size of the allocation.
* Also please note that the size may be not accurate, so in order to make this
* solution effective, the judgement for releasing memory pages should not be
* too strict. */
//内存大小超过一个页面的大小才可以这样做
void dismissMemory(void* ptr, size_t size_hint) {
if (ptr == NULL) return;
/* madvise(MADV_DONTNEED) can not release pages if the size of memory
* is too small, we try to release only for the memory which the size
* is more than half of page size. */
if (size_hint && size_hint <= server.page_size/2) return;
zmadvise_dontneed(ptr);
}
/* Use 'MADV_DONTNEED' to release memory to operating system quickly.
* We do that in a fork child process to avoid CoW when the parent modifies
* these shared pages. */
void zmadvise_dontneed(void *ptr) {
#if defined(USE_JEMALLOC) && defined(__linux__)
static size_t page_size = 0;
if (page_size == 0) page_size = sysconf(_SC_PAGESIZE);
size_t page_size_mask = page_size - 1;
size_t real_size = zmalloc_size(ptr);
if (real_size < page_size) return;
/* We need to align the pointer upwards according to page size, because
* the memory address is increased upwards and we only can free memory
* based on page. */
//必须以页面为单位释放内存,先进行内存对齐
char *aligned_ptr = (char *)(((size_t)ptr+page_size_mask) & ~page_size_mask);
real_size -= (aligned_ptr-(char*)ptr);
if (real_size >= page_size) {
madvise((void *)aligned_ptr, real_size&~page_size_mask, MADV_DONTNEED);
}
#else
(void)(ptr);
#endif
}
浮点型数值的存储
在redis中,浮点型数值也是使用sds类型进行存储的,主要的函数为:
int getDoubleFromObject(const robj *o, double *target) {
double value;
if (o == NULL) {
value = 0;
} else {
//类型一定要是OBJ_STRING
serverAssertWithInfo(NULL,o,o->type == OBJ_STRING);
//首先判断encoding是不是OBJ_ENCODING_RAW || OBJ_ENCODING_EMBSTR
if (sdsEncodedObject(o)) {
//是的话将字符串转换为double
if (!string2d(o->ptr, sdslen(o->ptr), &value))
return C_ERR;
} else if (o->encoding == OBJ_ENCODING_INT) {
//是OBJ_ENCODING_INT的话直接读取就行
value = (long)o->ptr;
} else {
serverPanic("Unknown string encoding");
}
}
*target = value;
return C_OK;
}
int getDoubleFromObjectOrReply(client *c, robj *o, double *target, const char *msg) {
double value;
if (getDoubleFromObject(o, &value) != C_OK) {
if (msg != NULL) {
addReplyError(c,(char*)msg);
} else {
addReplyError(c,"value is not a valid float");
}
return C_ERR;
}
*target = value;
return C_OK;
}