数据结构,源码:
// 最大预分配长度
#define SDS_MAX_PREALLOC (1024*1024)
#include <sys/types.h>
#include <stdarg.h>
// Redis没有直接使用sdshdr结构,而是定义了sds类型来操作sdshdr结构
typedef char *sds;
// redis 自定义字符串结构
struct sdshdr {
// 当前字符串长度
unsigned int len;
// 剩余可用长度
unsigned int free;
// 字符数组,实际存放地方
char buf[];
};
static inline size_t sdslen(const sds s) {
// 其中sds指向sdshdr结构的buf[],通过如下操作来获得整个sdshdr结构:
// sizeof(struct sdshdr)的值为8
/* 这里为什么用s-(sizeof(struct sdshdr))就得到sdshdr *指针?
从后面我们可以看到sds指向sdshdr结构的buf[]字符数组,所以
s-(sizeof(struct sdshdr))就是sdshdr结构的地址。
*/
struct sdshdr *sh = (void*)(s-(sizeof(struct sdshdr)));
return sh->len;
}
static inline size_t sdsavail(const sds s) {
struct sdshdr *sh = (void*)(s-(sizeof(struct sdshdr)));
return sh->free;
}
SDS和C字符串的区别
(1)sds 的sdslen的时间复杂度是O(1),长度保存在len字段中;C语言的strlen函数的时间复杂度是O(N),需要遍历完整字符串,直到最后的空字符串 ‘\0’。
(2)C字符串不记录自身长度,容易导致缓冲区溢出(buffer overflow)杜绝缓冲区溢出。SDS需要增加长度时,先检查buf[]数字空间是否足够,如果不够的话自动将SDS的buf空间扩展至足够大,然后再执行修改操作。
(3)减少内存重新分配时带来的内存重新分配次数。使用free字段标记还可以使用的空闲空间。不必每次减少或者增加长度都重新申请内存空间。
对于C语言字符串,比如append操作,操作前通过内存重新分配数组的大小--如果忘记了会产生缓冲区溢出。对于缩短字符串操作,比如trim程序通过重新分配内存释放不再使用的空间,否则或内存泄漏。
SDS中,buf数组的长度不一定就是字符串数量 + 1,数组中可以包含未被使用的空间,这些字节数量使用 free标记的。
stdnewlen
/* Create a new sds string with the content specified by the 'init' pointer
* and 'initlen'.
* If NULL is used for 'init' the string is initialized with zero bytes.
*
* The string is always null-termined (all the sds strings are, always) so
* even if you create an sds string with:
*
* mystring = sdsnewlen("abc",3);
*
* You can print the string with printf() as there is an implicit \0 at the
* end of the string. However the string is binary safe and can contain
* \0 characters in the middle, as the length is stored in the sds header. */
/**
新建sds字符串,根据ini指针和initlen指定的长度。
*/
sds sdsnewlen(const void *init, size_t initlen) {
struct sdshdr *sh;
if (init) {
sh = zmalloc(sizeof(struct sdshdr)+initlen+1);
} else {
//当init参数为NULL,使用zcalloc的方法,使用'\0' 填充
sh = zcalloc(sizeof(struct sdshdr)+initlen+1);
}
if (sh == NULL) return NULL;
sh->len = initlen;
sh->free = 0;
if (initlen && init)
// 拷贝init 源的字符串到sds的buf
memcpy(sh->buf, init, initlen);
// 标记C字符串空结尾
sh->buf[initlen] = '\0';
// 返回sds
return (char*)sh->buf;
}
接着看其他方法:
/* Create an empty (zero length) sds string. Even in this case the string
* always has an implicit null term. */
// 创建一个空sds字符串(长度是0);同样会创建sdshdr结构,buf有一个字符 '\0'
sds sdsempty(void) {
return sdsnewlen("",0);
}
/* Create a new sds string starting from a null terminated C string. */
// 创建一个sds 字符串,从C语言字符串(以null结尾)
sds sdsnew(const char *init) {
size_t initlen = (init == NULL) ? 0 : strlen(init);
return sdsnewlen(init, initlen);
}
/* Duplicate an sds string. */
// 复制一个sds
sds sdsdup(const sds s) {
return sdsnewlen(s, sdslen(s));
}
/* Free an sds string. No operation is performed if 's' is NULL. */
// 释放sds空间
void sdsfree(sds s) {
if (s == NULL) return;
zfree(s-sizeof(struct sdshdr));
}
sdsupdatelen函数作用
/* Set the sds string length to the length as obtained with strlen(), so
* considering as content only up to the first null term character.
*
* This function is useful when the sds string is hacked manually in some
* way, like in the following example:
*
* s = sdsnew("foobar");
* s[2] = '\0';
* sdsupdatelen(s);
* printf("%d\n", sdslen(s));
*
* The output will be "2", but if we comment out the call to sdsupdatelen()
* the output will be "6" as the string was modified but the logical length
* remains 6 bytes. */
/**
设置sds 字符串的长度为strlen方法获取的长度;所以sds字符串内容是遇到第一个null前面的部分。
当sds string被hack方式手工修改了,例如:
s = sdsnew("foobar");
s[2] = '\0';
sdsupdatelen(s);
printf("%d\n", sdslen(s));
上面的执行结果是2,
如果注释掉sdsupdatelen 这一行代码,输出结果是6,虽然字符串被修改了,但是len字段没有改
*/
void sdsupdatelen(sds s) {
struct sdshdr *sh = (void*) (s-(sizeof(struct sdshdr)));
int reallen = strlen(s);
sh->free += (sh->len-reallen);
sh->len = reallen;
}
清空sds字符串
/* Modify an sds string in-place to make it empty (zero length).
* However all the existing buffer is not discarded but set as free space
* so that next append operations will not require allocations up to the
* number of bytes previously available. */
/**
在原位置修改sds字符串成空字符串(长度为0)。
但是原来已经存在的缓冲区不会释放掉,所以后续的append操作不需要分配内存空间如果前一次申请的空间足够。
*/
void sdsclear(sds s) {
struct sdshdr *sh = (void*) (s-(sizeof(struct sdshdr)));
sh->free += sh->len;
sh->len = 0;
sh->buf[0] = '\0';
}
数据结构,源码:
// 最大预分配长度
#define SDS_MAX_PREALLOC (1024*1024)
#include <sys/types.h>
#include <stdarg.h>
// Redis没有直接使用sdshdr结构,而是定义了sds类型来操作sdshdr结构
typedef char *sds;
// redis 自定义字符串结构
struct sdshdr {
// 当前字符串长度
unsigned int len;
// 剩余可用长度
unsigned int free;
// 字符数组,实际存放地方
char buf[];
};
static inline size_t sdslen(const sds s) {
// 其中sds指向sdshdr结构的buf[],通过如下操作来获得整个sdshdr结构:
// sizeof(struct sdshdr)的值为8
/* 这里为什么用s-(sizeof(struct sdshdr))就得到sdshdr *指针?
从后面我们可以看到sds指向sdshdr结构的buf[]字符数组,所以
s-(sizeof(struct sdshdr))就是sdshdr结构的地址。
*/
struct sdshdr *sh = (void*)(s-(sizeof(struct sdshdr)));
return sh->len;
}
static inline size_t sdsavail(const sds s) {
struct sdshdr *sh = (void*)(s-(sizeof(struct sdshdr)));
return sh->free;
}
SDS和C字符串的区别
(1)sds 的sdslen的时间复杂度是O(1),长度保存在len字段中;C语言的strlen函数的时间复杂度是O(N),需要遍历完整字符串,直到最后的空字符串 ‘\0’。
(2)C字符串不记录自身长度,容易导致缓冲区溢出(buffer overflow)杜绝缓冲区溢出。SDS需要增加长度时,先检查buf[]数字空间是否足够,如果不够的话自动将SDS的buf空间扩展至足够大,然后再执行修改操作。
(3)减少内存重新分配时带来的内存重新分配次数。使用free字段标记还可以使用的空闲空间。不必每次减少或者增加长度都重新申请内存空间。
对于C语言字符串,比如append操作,操作前通过内存重新分配数组的大小--如果忘记了会产生缓冲区溢出。对于缩短字符串操作,比如trim程序通过重新分配内存释放不再使用的空间,否则或内存泄漏。
SDS中,buf数组的长度不一定就是字符串数量 + 1,数组中可以包含未被使用的空间,这些字节数量使用 free标记的。
stdnewlen
/* Create a new sds string with the content specified by the 'init' pointer
* and 'initlen'.
* If NULL is used for 'init' the string is initialized with zero bytes.
*
* The string is always null-termined (all the sds strings are, always) so
* even if you create an sds string with:
*
* mystring = sdsnewlen("abc",3);
*
* You can print the string with printf() as there is an implicit \0 at the
* end of the string. However the string is binary safe and can contain
* \0 characters in the middle, as the length is stored in the sds header. */
/**
新建sds字符串,根据ini指针和initlen指定的长度。
*/
sds sdsnewlen(const void *init, size_t initlen) {
struct sdshdr *sh;
if (init) {
sh = zmalloc(sizeof(struct sdshdr)+initlen+1);
} else {
//当init参数为NULL,使用zcalloc的方法,使用'\0' 填充
sh = zcalloc(sizeof(struct sdshdr)+initlen+1);
}
if (sh == NULL) return NULL;
sh->len = initlen;
sh->free = 0;
if (initlen && init)
// 拷贝init 源的字符串到sds的buf
memcpy(sh->buf, init, initlen);
// 标记C字符串空结尾
sh->buf[initlen] = '\0';
// 返回sds
return (char*)sh->buf;
}
接着看其他方法:
/* Create an empty (zero length) sds string. Even in this case the string
* always has an implicit null term. */
// 创建一个空sds字符串(长度是0);同样会创建sdshdr结构,buf有一个字符 '\0'
sds sdsempty(void) {
return sdsnewlen("",0);
}
/* Create a new sds string starting from a null terminated C string. */
// 创建一个sds 字符串,从C语言字符串(以null结尾)
sds sdsnew(const char *init) {
size_t initlen = (init == NULL) ? 0 : strlen(init);
return sdsnewlen(init, initlen);
}
/* Duplicate an sds string. */
// 复制一个sds
sds sdsdup(const sds s) {
return sdsnewlen(s, sdslen(s));
}
/* Free an sds string. No operation is performed if 's' is NULL. */
// 释放sds空间
void sdsfree(sds s) {
if (s == NULL) return;
zfree(s-sizeof(struct sdshdr));
}
sdsupdatelen函数作用
/* Set the sds string length to the length as obtained with strlen(), so
* considering as content only up to the first null term character.
*
* This function is useful when the sds string is hacked manually in some
* way, like in the following example:
*
* s = sdsnew("foobar");
* s[2] = '\0';
* sdsupdatelen(s);
* printf("%d\n", sdslen(s));
*
* The output will be "2", but if we comment out the call to sdsupdatelen()
* the output will be "6" as the string was modified but the logical length
* remains 6 bytes. */
/**
设置sds 字符串的长度为strlen方法获取的长度;所以sds字符串内容是遇到第一个null前面的部分。
当sds string被hack方式手工修改了,例如:
s = sdsnew("foobar");
s[2] = '\0';
sdsupdatelen(s);
printf("%d\n", sdslen(s));
上面的执行结果是2,
如果注释掉sdsupdatelen 这一行代码,输出结果是6,虽然字符串被修改了,但是len字段没有改
*/
void sdsupdatelen(sds s) {
struct sdshdr *sh = (void*) (s-(sizeof(struct sdshdr)));
int reallen = strlen(s);
sh->free += (sh->len-reallen);
sh->len = reallen;
}
清空sds字符串
/* Modify an sds string in-place to make it empty (zero length).
* However all the existing buffer is not discarded but set as free space
* so that next append operations will not require allocations up to the
* number of bytes previously available. */
/**
在原位置修改sds字符串成空字符串(长度为0)。
但是原来已经存在的缓冲区不会释放掉,所以后续的append操作不需要分配内存空间如果前一次申请的空间足够。
*/
void sdsclear(sds s) {
struct sdshdr *sh = (void*) (s-(sizeof(struct sdshdr)));
sh->free += sh->len;
sh->len = 0;
sh->buf[0] = '\0';
}
sdsMakeRoomFor
#define SDS_MAX_PREALLOC (1024*1024) |
/* Enlarge the free space at the end of the sds string so that the caller
* is sure that after calling this function can overwrite up to addlen
* bytes after the end of the string, plus one more byte for nul term.
*
* Note: this does not change the *length* of the sds string as returned
* by sdslen(), but only the free buffer space we have. */
/**
在sds的后面增加空闲空间,增加addlen个字节,另外还有空字符'\0';
注意:这个方法不会修该sds的使用slslen方法计算的长度, 只是增加free buffer 孔家
*/
sds sdsMakeRoomFor(sds s, size_t addlen) {
struct sdshdr *sh, *newsh;
size_t free = sdsavail(s);
size_t len, newlen;
if (free >= addlen) return s;
len = sdslen(s);
sh = (void*) (s-(sizeof(struct sdshdr)));
newlen = (len+addlen);
if (newlen < SDS_MAX_PREALLOC)
newlen *= 2;
else
newlen += SDS_MAX_PREALLOC;
newsh = zrealloc(sh, sizeof(struct sdshdr)+newlen+1);
if (newsh == NULL) return NULL;
newsh->free = newlen - len;
return newsh->buf;
}
释放空闲空间sdsRemoveFreeSpace
/* Reallocate the sds string so that it has no free space at the end. The
* contained string remains not altered, but next concatenation operations
* will require a reallocation.
*
* After the call, the passed sds string is no longer valid and all the
* references must be substituted with the new pointer returned by the call. */
sds sdsRemoveFreeSpace(sds s) {
struct sdshdr *sh;
sh = (void*) (s-(sizeof(struct sdshdr)));
sh = zrealloc(sh, sizeof(struct sdshdr)+sh->len+1);
sh->free = 0;
return sh->buf;
}
sdsAllocSize
/* Return the total size of the allocation of the specifed sds string,
* including:
* 1) The sds header before the pointer.
* 2) The string.
* 3) The free buffer at the end if any.
* 4) The implicit null term.
*/
/**
返回sds总长度,包含以下4部分
1)指针前面的 shd header
2)字符串
3)空闲字节
4)隐含的 null 空字符
*/
size_t sdsAllocSize(sds s) {
struct sdshdr *sh = (void*) (s-(sizeof(struct sdshdr)));
return sizeof(*sh)+sh->len+sh->free+1;
}
sdsIncrLen 增加指定长度,可以是负数
/* Increment the sds length and decrements the left free space at the
* end of the string according to 'incr'. Also set the null term
* in the new end of the string.
*
* This function is used in order to fix the string length after the
* user calls sdsMakeRoomFor(), writes something after the end of
* the current string, and finally needs to set the new length.
*
* Note: it is possible to use a negative increment in order to
* right-trim the string.
*
* Usage example:
*
* Using sdsIncrLen() and sdsMakeRoomFor() it is possible to mount the
* following schema, to cat bytes coming from the kernel to the end of an
* sds string without copying into an intermediate buffer:
*
* oldlen = sdslen(s);
* s = sdsMakeRoomFor(s, BUFFER_SIZE);
* nread = read(fd, s+oldlen, BUFFER_SIZE);
* ... check for nread <= 0 and handle it ...
* sdsIncrLen(s, nread);
*/
/**
使用场景:在系统内核读入到buffer时候不经过中间buffer,具体的遇到可以关注
*/
void sdsIncrLen(sds s, int incr) {
struct sdshdr *sh = (void*) (s-(sizeof(struct sdshdr)));
if (incr >= 0)
assert(sh->free >= (unsigned int)incr);
else
assert(sh->len >= (unsigned int)(-incr));
sh->len += incr;
sh->free -= incr;
s[sh->len] = '\0';
}
sdsgrowzero
/* Grow the sds to have the specified length. Bytes that were not part of
* the original length of the sds will be set to zero.
*
* if the specified length is smaller than the current length, no operation
* is performed. */
/**
增加sds长度到指定len,新增字符设置成0.
如果指定的长度小于原来长度,什么也不做
*/
sds sdsgrowzero(sds s, size_t len) {
struct sdshdr *sh = (void*)(s-(sizeof(struct sdshdr)));
size_t totlen, curlen = sh->len;
if (len <= curlen) return s;
s = sdsMakeRoomFor(s,len-curlen);
if (s == NULL) return NULL;
/* Make sure added region doesn't contain garbage */
sh = (void*)(s-(sizeof(struct sdshdr)));
memset(s+curlen,0,(len-curlen+1)); /* also set trailing \0 byte */
totlen = sh->len+sh->free;
sh->len = len;
sh->free = totlen-sh->len;
return s;
}
字符串拼接
sdscatlen
/* Append the specified binary-safe string pointed by 't' of 'len' bytes to the
* end of the specified sds string 's'.
*
* After the call, the passed sds string is no longer valid and all the
* references must be substituted with the new pointer returned by the call. */
/**
字符换拼接(二进制安全--不以null作为判断结束的标志)
*/
sds sdscatlen(sds s, const void *t, size_t len) {
struct sdshdr *sh;
size_t curlen = sdslen(s);
// free 足够,直接使用;free不够,重新分配内存空间
s = sdsMakeRoomFor(s,len);
if (s == NULL) return NULL;
sh = (void*) (s-(sizeof(struct sdshdr)));
memcpy(s+curlen, t, len);
sh->len = curlen+len;
sh->free = sh->free-len;
s[curlen+len] = '\0';
return s;
}
字符串拷贝 sdscpylen
/* Destructively modify the sds string 's' to hold the specified binary
* safe string pointed by 't' of length 'len' bytes. */
/**
原来的指向s的指针已经不能用了
*/
sds sdscpylen(sds s, const char *t, size_t len) {
struct sdshdr *sh = (void*) (s-(sizeof(struct sdshdr)));
size_t totlen = sh->free+sh->len;
if (totlen < len) {
s = sdsMakeRoomFor(s,len-sh->len);
if (s == NULL) return NULL;
sh = (void*) (s-(sizeof(struct sdshdr)));
totlen = sh->free+sh->len;
}
memcpy(s, t, len);
s[len] = '\0';
sh->len = len;
sh->free = totlen-len;
return s;
}
/* Like sdscpylen() but 't' must be a null-termined string so that the length
* of the string is obtained with strlen(). */
/**
t指针指向的目标必须是C字符串('\0'结尾)
*/
sds sdscpy(sds s, const char *t) {
return sdscpylen(s, t, strlen(t));
}