Memcached有自己的内存管理,避免了频繁使用malloc/free造成内存碎片的问题。在看源码之前,先大概了解一下Memcached内存管理的概念。
在Memcached中,会先分配一大块连续内存(默认为64Msettings.maxbytes = 64 * 1024 * 1024
),可以在启动时通过-m指令制定。一大块内存分配好后,会再细分为大小相同的Slab(默认为1M);一个Slab在细分为更小的Chunk,在同一个Slab中,Chunk的大小相同;在每个Chunk中保存着item,item是真正存储数据的地方。
参考《Memcached全面剖析》(长野雅广、前坂徹著charlee 译)中的图片,来形象看一下各个结构:
图中,Slab Class: N即为Slab,里面保存的Chunks;不同Slab中Chunk的大小依次增长,增长的快慢是由参数settings.factor
决定的,这个参会默认为1.25,可以通过-f指定。
下面来看一下Slab对应的结构体:
typedef struct {
unsigned int size; /* sizes of items *///这里其实是Chunk的大小
unsigned int perslab; /* how many items per slab *///一个Slab中包含多少个Chunk
void *slots; /* list of item ptrs */
unsigned int sl_curr; /* total free items in list */
unsigned int slabs; /* how many slabs were allocated for this class */
void **slab_list; /* array of slab pointers *///指针的指针,指向chunk数组
unsigned int list_size; /* size of prev array */
unsigned int killing; /* index+1 of dying slab, or zero if none */
size_t requested; /* The number of requested bytes */
} slabclass_t;
static slabclass_t slabclass[MAX_NUMBER_OF_SLAB_CLASSES];//这里保存着Slab数组
来看一下item对应的数据结构:
typedef struct _stritem {
/* Protected by LRU locks */
struct _stritem *next;//next和prev指针,用在链表用。和slab的void* slots指针对应。
struct _stritem *prev;
/* Rest are protected by an item lock */
struct _stritem *h_next; /* hash chain next */
rel_time_t time; /* least recent access */
rel_time_t exptime; /* expire time */
int nbytes; /* size of data */
unsigned short refcount;//引用个数
uint8_t nsuffix; /* length of flags-and-length string */
uint8_t it_flags; /* ITEM_* above */
uint8_t slabs_clsid;/* which slab class we're in */
uint8_t nkey; /* key length, w/terminating null and padding */
/* this odd type prevents type-punning issues when we do
* the little shuffle to save space when not using CAS. */
union {
uint64_t cas;
char end;
} data[];
/* if it_flags & ITEM_CAS we have 8 bytes CAS */
/* then null-terminated key */
/* then " flags length\r\n" (no terminating null) */
/* then data with terminating \r\n (no terminating null; it's binary!) */
} item;
item是存储key-value的地方,前面是一些属性,用来管理存储空间;存储数据的地方为data[]
。
下面这张图来看一下Slab、subclass、item的关系
从图中可以看出,subclass数组保存着结构体,结构体中保存有指向item的连接指针slots,指向chunk数组的指针slab_list。
下面源码分析,包括分配一大块内存、把大块内存分解为chunk,分配item添加到slabclass结构体中。
从main函数开始看起:
//main函数中调用
slabs_init(settings.maxbytes, settings.factor, preallocate);
//具体实现在slabs.c中
static void *mem_base = NULL;//分配内存起始地址
static void *mem_current = NULL;//当前分配的地址
static size_t mem_avail = 0;//内存剩余大小
void slabs_init(const size_t limit, const double factor, const bool prealloc) {
int i = POWER_SMALLEST - 1;
//chunk的大小,还要包括item在内
unsigned int size = sizeof(item) + settings.chunk_size;
mem_limit = limit;//初始化mem_limit
if (prealloc) {//是否预分配
/* Allocate everything in a big chunk with malloc */
mem_base = malloc(mem_limit);
if (mem_base != NULL) {//分配成功,给全局变量赋值
mem_current = mem_base;
mem_avail = mem_limit;
} else {
fprintf(stderr, "Warning: Failed to allocate requested memory in"
" one large chunk.\nWill allocate in smaller chunks\n");
}
}
memset(slabclass, 0, sizeof(slabclass));//slabclass是slabclass_t类型数组
while (++i < MAX_NUMBER_OF_SLAB_CLASSES-1 && size <= settings.item_size_max / factor) {
/* Make sure items are always n-byte aligned */
if (size % CHUNK_ALIGN_BYTES)%按照8字节对齐
size += CHUNK_ALIGN_BYTES - (size % CHUNK_ALIGN_BYTES);
slabclass[i].size = size;
slabclass[i].perslab = settings.item_size_max / slabclass[i].size;//chunk的个数
size *= factor;//乘上增长因子,为下一个slab的chunk的size的值
if (settings.verbose > 1) {
fprintf(stderr, "slab class %3d: chunk size %9u perslab %7u\n",
i, slabclass[i].size, slabclass[i].perslab);
}
}
//设置最后一个subclass
power_largest = i;
slabclass[power_largest].size = settings.item_size_max;
slabclass[power_largest].perslab = 1;//只有一个chunk,大小为1M
if (settings.verbose > 1) {
fprintf(stderr, "slab class %3d: chunk size %9u perslab %7u\n",
i, slabclass[i].size, slabclass[i].perslab);
}
/* for the test suite: faking of how much we've already malloc'd */
{
char *t_initial_malloc = getenv("T_MEMD_INITIAL_MALLOC");//读取环境变量
if (t_initial_malloc) {
mem_malloced = (size_t)atol(t_initial_malloc);
}
}
if (prealloc) {
slabs_preallocate(power_largest);//进一步细分每个Slab,power_largest为最后一个Slab
}
}
static void slabs_preallocate (const unsigned int maxslabs) {
int i;
unsigned int prealloc = 0;
/* pre-allocate a 1MB slab in every size class so people don't get
confused by non-intuitive "SERVER_ERROR out of memory"
messages. this is the most common question on the mailing
list. if you really don't want this, you can rebuild without
these three lines. */
for (i = POWER_SMALLEST; i < MAX_NUMBER_OF_SLAB_CLASSES; i++) {
if (++prealloc > maxslabs)
return;
if (do_slabs_newslab(i) == 0) {//分配一个新的slab
fprintf(stderr, "Error while preallocating slab memory!\n"
"If using -L or other prealloc options, max memory must be "
"at least %d megabytes.\n", power_largest);
exit(1);
}
}
}
在函数do_slabs_newslab中会给slabclass_t的slab_list指针赋值,把item加到slabclass_t的slot上
static int do_slabs_newslab(const unsigned int id) {
slabclass_t *p = &slabclass[id];//第id个Slab的地址
int len = settings.slab_reassign ? settings.item_size_max
: p->size * p->perslab;
char *ptr;
if ((mem_limit && mem_malloced + len > mem_limit && p->slabs > 0)) {//超出分配空间
mem_limit_reached = true;
MEMCACHED_SLABS_SLABCLASS_ALLOCATE_FAILED(id);
return 0;
}
if ((grow_slab_list(id) == 0) ||//初始化slabclass_t中的slab_list指针
((ptr = memory_allocate((size_t)len)) == 0)) {//给Memcached分配内存(前面可能已经预分配)
MEMCACHED_SLABS_SLABCLASS_ALLOCATE_FAILED(id);
return 0;
}
memset(ptr, 0, (size_t)len);
split_slab_page_into_freelist(ptr, id);//把item加到slabclass_t的slot上
p->slab_list[p->slabs++] = ptr;
mem_malloced += len;
MEMCACHED_SLABS_SLABCLASS_ALLOCATE(id);
return 1;
}
//给slab_list分配空间(增加空间)
static int grow_slab_list (const unsigned int id) {
slabclass_t *p = &slabclass[id];
if (p->slabs == p->list_size) {
size_t new_size = (p->list_size != 0) ? p->list_size * 2 : 16;
void *new_list = realloc(p->slab_list, new_size * sizeof(void *));
if (new_list == 0) return 0;
p->list_size = new_size;
p->slab_list = new_list;
}
return 1;
}
static void split_slab_page_into_freelist(char *ptr, const unsigned int id) {
slabclass_t *p = &slabclass[id];//取出第id个subclass_t
int x;
for (x = 0; x < p->perslab; x++) {//给subclass_t划分空间
do_slabs_free(ptr, 0, id);//向subclass_t添加空闲item
ptr += p->size;
}
}
//给Memcached分配内存
static void *memory_allocate(size_t size) {
void *ret;
if (mem_base == NULL) {//如果没有预分配,这里分配
/* We are not using a preallocated large memory chunk */
ret = malloc(size);
} else {
ret = mem_current;
if (size > mem_avail) {
return NULL;
}
/* mem_current pointer _must_ be aligned!!! */
if (size % CHUNK_ALIGN_BYTES) {//按照8字节对齐
size += CHUNK_ALIGN_BYTES - (size % CHUNK_ALIGN_BYTES);
}
mem_current = ((char*)mem_current) + size;
if (size < mem_avail) {
mem_avail -= size;
} else {
mem_avail = 0;
}
}
return ret;
}