Memcached内存管理分析

最新推荐文章于 2024-07-16 12:38:33 发布

KangRoger

最新推荐文章于 2024-07-16 12:38:33 发布

阅读量1.4k

点赞数

分类专栏： Memcached 文章标签： memcached 内存管理内存分配 slab

本文链接：https://blog.csdn.net/KangRoger/article/details/48184419

版权

Memcached 专栏收录该内容

5 篇文章 0 订阅

订阅专栏

Memcached有自己的内存管理，避免了频繁使用malloc/free造成内存碎片的问题。在看源码之前，先大概了解一下Memcached内存管理的概念。
在Memcached中，会先分配一大块连续内存（默认为64Msettings.maxbytes = 64 * 1024 * 1024），可以在启动时通过-m指令制定。一大块内存分配好后，会再细分为大小相同的Slab（默认为1M）；一个Slab在细分为更小的Chunk，在同一个Slab中，Chunk的大小相同；在每个Chunk中保存着item，item是真正存储数据的地方。

参考《Memcached全面剖析》（长野雅广、前坂徹著charlee 译）中的图片，来形象看一下各个结构：

这里写图片描述

图中，Slab Class: N即为Slab，里面保存的Chunks；不同Slab中Chunk的大小依次增长，增长的快慢是由参数settings.factor决定的，这个参会默认为1.25，可以通过-f指定。

下面来看一下Slab对应的结构体：

typedef struct {
    unsigned int size;      /* sizes of items *///这里其实是Chunk的大小
    unsigned int perslab;   /* how many items per slab *///一个Slab中包含多少个Chunk

    void *slots;           /* list of item ptrs */
    unsigned int sl_curr;   /* total free items in list */

    unsigned int slabs;     /* how many slabs were allocated for this class */

    void **slab_list;       /* array of slab pointers *///指针的指针，指向chunk数组
    unsigned int list_size; /* size of prev array */

    unsigned int killing;  /* index+1 of dying slab, or zero if none */
    size_t requested; /* The number of requested bytes */
} slabclass_t;

static slabclass_t slabclass[MAX_NUMBER_OF_SLAB_CLASSES];//这里保存着Slab数组

来看一下item对应的数据结构：

typedef struct _stritem {
    /* Protected by LRU locks */
    struct _stritem *next;//next和prev指针，用在链表用。和slab的void* slots指针对应。
    struct _stritem *prev;
    /* Rest are protected by an item lock */
    struct _stritem *h_next;    /* hash chain next */
    rel_time_t      time;       /* least recent access */
    rel_time_t      exptime;    /* expire time */
    int             nbytes;     /* size of data */
    unsigned short  refcount;//引用个数
    uint8_t         nsuffix;    /* length of flags-and-length string */
    uint8_t         it_flags;   /* ITEM_* above */
    uint8_t         slabs_clsid;/* which slab class we're in */
    uint8_t         nkey;       /* key length, w/terminating null and padding */
    /* this odd type prevents type-punning issues when we do
     * the little shuffle to save space when not using CAS. */
    union {
        uint64_t cas;
        char end;
    } data[];
    /* if it_flags & ITEM_CAS we have 8 bytes CAS */
    /* then null-terminated key */
    /* then " flags length\r\n" (no terminating null) */
    /* then data with terminating \r\n (no terminating null; it's binary!) */
} item;

item是存储key-value的地方，前面是一些属性，用来管理存储空间；存储数据的地方为data[]。
下面这张图来看一下Slab、subclass、item的关系
这里写图片描述

从图中可以看出，subclass数组保存着结构体，结构体中保存有指向item的连接指针slots，指向chunk数组的指针slab_list。

下面源码分析，包括分配一大块内存、把大块内存分解为chunk，分配item添加到slabclass结构体中。

从main函数开始看起：

//main函数中调用
slabs_init(settings.maxbytes, settings.factor, preallocate);
//具体实现在slabs.c中
static void *mem_base = NULL;//分配内存起始地址
static void *mem_current = NULL;//当前分配的地址
static size_t mem_avail = 0;//内存剩余大小
void slabs_init(const size_t limit, const double factor, const bool prealloc) {
    int i = POWER_SMALLEST - 1;
    //chunk的大小，还要包括item在内
    unsigned int size = sizeof(item) + settings.chunk_size;

    mem_limit = limit;//初始化mem_limit

    if (prealloc) {//是否预分配
        /* Allocate everything in a big chunk with malloc */
        mem_base = malloc(mem_limit);
        if (mem_base != NULL) {//分配成功，给全局变量赋值
            mem_current = mem_base;
            mem_avail = mem_limit;
        } else {
            fprintf(stderr, "Warning: Failed to allocate requested memory in"
                    " one large chunk.\nWill allocate in smaller chunks\n");
        }
    }

    memset(slabclass, 0, sizeof(slabclass));//slabclass是slabclass_t类型数组

    while (++i < MAX_NUMBER_OF_SLAB_CLASSES-1 && size <= settings.item_size_max / factor) {
        /* Make sure items are always n-byte aligned */
        if (size % CHUNK_ALIGN_BYTES)%按照8字节对齐
            size += CHUNK_ALIGN_BYTES - (size % CHUNK_ALIGN_BYTES);

        slabclass[i].size = size;
        slabclass[i].perslab = settings.item_size_max / slabclass[i].size;//chunk的个数
        size *= factor;//乘上增长因子，为下一个slab的chunk的size的值
        if (settings.verbose > 1) {
            fprintf(stderr, "slab class %3d: chunk size %9u perslab %7u\n",
                    i, slabclass[i].size, slabclass[i].perslab);
        }
    }

    //设置最后一个subclass
    power_largest = i;
    slabclass[power_largest].size = settings.item_size_max;
    slabclass[power_largest].perslab = 1;//只有一个chunk，大小为1M
    if (settings.verbose > 1) {
        fprintf(stderr, "slab class %3d: chunk size %9u perslab %7u\n",
                i, slabclass[i].size, slabclass[i].perslab);
    }

    /* for the test suite:  faking of how much we've already malloc'd */
    {
        char *t_initial_malloc = getenv("T_MEMD_INITIAL_MALLOC");//读取环境变量
        if (t_initial_malloc) {
            mem_malloced = (size_t)atol(t_initial_malloc);
        }

    }

    if (prealloc) {
        slabs_preallocate(power_largest);//进一步细分每个Slab，power_largest为最后一个Slab
    }
}

static void slabs_preallocate (const unsigned int maxslabs) {
    int i;
    unsigned int prealloc = 0;

    /* pre-allocate a 1MB slab in every size class so people don't get
       confused by non-intuitive "SERVER_ERROR out of memory"
       messages.  this is the most common question on the mailing
       list.  if you really don't want this, you can rebuild without
       these three lines.  */

    for (i = POWER_SMALLEST; i < MAX_NUMBER_OF_SLAB_CLASSES; i++) {
        if (++prealloc > maxslabs)
            return;
        if (do_slabs_newslab(i) == 0) {//分配一个新的slab
            fprintf(stderr, "Error while preallocating slab memory!\n"
                "If using -L or other prealloc options, max memory must be "
                "at least %d megabytes.\n", power_largest);
            exit(1);
        }
    }

}

在函数do_slabs_newslab中会给slabclass_t的slab_list指针赋值，把item加到slabclass_t的slot上

static int do_slabs_newslab(const unsigned int id) {
    slabclass_t *p = &slabclass[id];//第id个Slab的地址
    int len = settings.slab_reassign ? settings.item_size_max
        : p->size * p->perslab;
    char *ptr;

    if ((mem_limit && mem_malloced + len > mem_limit && p->slabs > 0)) {//超出分配空间
        mem_limit_reached = true;
        MEMCACHED_SLABS_SLABCLASS_ALLOCATE_FAILED(id);
        return 0;
    }

    if ((grow_slab_list(id) == 0) ||//初始化slabclass_t中的slab_list指针
        ((ptr = memory_allocate((size_t)len)) == 0)) {//给Memcached分配内存（前面可能已经预分配）

        MEMCACHED_SLABS_SLABCLASS_ALLOCATE_FAILED(id);
        return 0;
    }

    memset(ptr, 0, (size_t)len);
    split_slab_page_into_freelist(ptr, id);//把item加到slabclass_t的slot上

    p->slab_list[p->slabs++] = ptr;
    mem_malloced += len;
    MEMCACHED_SLABS_SLABCLASS_ALLOCATE(id);

    return 1;
}

//给slab_list分配空间（增加空间）
static int grow_slab_list (const unsigned int id) {
    slabclass_t *p = &slabclass[id];
    if (p->slabs == p->list_size) {
        size_t new_size =  (p->list_size != 0) ? p->list_size * 2 : 16;
        void *new_list = realloc(p->slab_list, new_size * sizeof(void *));
        if (new_list == 0) return 0;
        p->list_size = new_size;
        p->slab_list = new_list;
    }
    return 1;
}

static void split_slab_page_into_freelist(char *ptr, const unsigned int id) {
    slabclass_t *p = &slabclass[id];//取出第id个subclass_t
    int x;
    for (x = 0; x < p->perslab; x++) {//给subclass_t划分空间
        do_slabs_free(ptr, 0, id);//向subclass_t添加空闲item
        ptr += p->size;
    }
}

//给Memcached分配内存

static void *memory_allocate(size_t size) {
    void *ret;

    if (mem_base == NULL) {//如果没有预分配，这里分配
        /* We are not using a preallocated large memory chunk */
        ret = malloc(size);
    } else {
        ret = mem_current;

        if (size > mem_avail) {
            return NULL;
        }

        /* mem_current pointer _must_ be aligned!!! */
        if (size % CHUNK_ALIGN_BYTES) {//按照8字节对齐
            size += CHUNK_ALIGN_BYTES - (size % CHUNK_ALIGN_BYTES);
        }

        mem_current = ((char*)mem_current) + size;
        if (size < mem_avail) {
            mem_avail -= size;
        } else {
            mem_avail = 0;
        }
    }

    return ret;
}