memcached内存管理(2) ----------------items

Memcached Item详解

最新推荐文章于 2025-03-17 06:09:33 发布

原创最新推荐文章于 2025-03-17 06:09:33 发布 · 1.9k 阅读

0 ·

CC 4.0 BY-SA版权

文章标签：

#memcached

c/c++ 同时被 3 个专栏收录

17 篇文章

订阅专栏

linux

17 篇文章

订阅专栏

memcached

4 篇文章

订阅专栏

本文深入解析Memcached中item数据结构的细节，包括item的组成、内存分配与回收机制，以及LRU实现等关键内容。

memcached.h中item的声明

/**
 * Structure for storing items within memcached.
 */
typedef struct _stritem {
    struct _stritem *next;
    struct _stritem *prev;
    struct _stritem *h_next;    /* hash chain next */
    rel_time_t      time;       /* least recent access */
    rel_time_t      exptime;    /* expire time */
    int             nbytes;     /* size of data */
    unsigned short  refcount;
    uint8_t         nsuffix;    /* length of flags-and-length string */
    uint8_t         it_flags;   /* ITEM_* above */
    uint8_t         slabs_clsid;/* which slab class we're in */
    uint8_t         nkey;       /* key length, w/terminating null and padding */
    /* this odd type prevents type-punning issues when we do
     * the little shuffle to save space when not using CAS. */
union {
        uint64_t cas;
        char end;
    } data[];

} item;

next，pre是双向链表使用的，用于slots，heads和tails，h_next则用于hash表

time 是最近访问时间，time=current_time, 新的item都会放在对应id双向链表的开头，而do_item_alloc会从链表尾开始搜链表，变相的实现了LRU

exptime 过期时间

nbytes 实际数据的字节数

refcount 引用次数

nsuffix 后缀的长度，以下代码赋值

*nsuffix = (uint8_t) snprintf(suffix, 40, " %d %d\r\n", flags, nbytes - 2);

即通过snprintf返回的值

it_flags

slab_clsid 所属的slabclass的id

nkey 键长

    union {
        uint64_t cas;
        char end;
    } data[];

变长数组，真正存放key-value的地方

具体的内容为 cas + key + suffix + data

cas 是一个编号，不一定会使用

key是键

suffix是后缀，由前面的snprintf赋值

data就是我们的数据了

一组调用的宏，位于memcached.h

#define ITEM_get_cas(i) (((i)->it_flags & ITEM_CAS) ? \
        (i)->data->cas : (uint64_t)0)

#define ITEM_set_cas(i,v) { \
    if ((i)->it_flags & ITEM_CAS) { \
        (i)->data->cas = v; \
    } \
}

#define ITEM_key(item) (((char*)&((item)->data)) \
         + (((item)->it_flags & ITEM_CAS) ? sizeof(uint64_t) : 0))

#define ITEM_suffix(item) ((char*) &((item)->data) + (item)->nkey + 1 \
         + (((item)->it_flags & ITEM_CAS) ? sizeof(uint64_t) : 0))

#define ITEM_data(item) ((char*) &((item)->data) + (item)->nkey + 1 \
         + (item)->nsuffix \
         + (((item)->it_flags & ITEM_CAS) ? sizeof(uint64_t) : 0))

#define ITEM_ntotal(item) (sizeof(struct _stritem) + (item)->nkey + 1 \
         + (item)->nsuffix + (item)->nbytes \
         + (((item)->it_flags & ITEM_CAS) ? sizeof(uint64_t) : 0))

用于获取具体item的cas，key，suffix和data的起始地址。

从宏ITEM_ntotal可以看出一个item 的实际长度为 sizeof(item) + nkey + 1 + nsuffix + nbytes ( + sizoef(uint64_t), 如果使用了cas)

上面的数据结构就是item的大概，下面就看看item是如何获取内存和释放内存

item.c中的重要全局变量

static item *heads[LARGEST_ID];
static item *tails[LARGEST_ID];

为对应的slabclass中从slots获取而来空间（以item方式组织的双向链表），分别指向链表的头尾，LRU。

do_item_alloc函数

/**
 * Generates the variable-sized part of the header for an object.
 *
 * key     - The key
 * nkey    - The length of the key
 * flags   - key flags
 * nbytes  - Number of bytes to hold value and addition CRLF terminator
 * suffix  - Buffer for the "VALUE" line suffix (flags, size).
 * nsuffix - The length of the suffix is stored here.
 *
 * Returns the total size of the header.
 */
static size_t item_make_header(const uint8_t nkey, const int flags, const int nbytes,
                     char *suffix, uint8_t *nsuffix) {
    /* suffix is defined at 40 chars elsewhere.. */
    *nsuffix = (uint8_t) snprintf(suffix, 40, " %d %d\r\n", flags, nbytes - 2);
    return sizeof(item) + nkey + *nsuffix + nbytes;
}

给suffix赋值，并返回item总的长度（除去cas的）。总长度用于决定该item属于哪个slabclass

/*@null@*/
item *do_item_alloc(char *key, const size_t nkey, const int flags, const rel_time_t exptime, const int nbytes) {
    uint8_t nsuffix;
    item *it = NULL;
    char suffix[40];
    size_t ntotal = item_make_header(nkey + 1, flags, nbytes, suffix, &nsuffix);
    if (settings.use_cas) {
        ntotal += sizeof(uint64_t);
    }

    unsigned int id = slabs_clsid(ntotal);
    if (id == 0)
        return 0;

    mutex_lock(&cache_lock);
    /* do a quick check if we have any expired items in the tail.. */

   item *search;
    rel_time_t oldest_live = settings.oldest_live;
    //从尾部开始搜索，因为尾部的time总是最早的，所以就是一种LRU实现
    search = tails[id];
    if (search != NULL && (refcount_incr(&search->refcount) == 2)) {
        if ((search->exptime != 0 && search->exptime < current_time)
            || (search->time <= oldest_live && oldest_live <= current_time)) {  // dead by flush   //如果尾部的item已经超时，那么就替换掉
            STATS_LOCK();
            stats.reclaimed++;
            STATS_UNLOCK();
            itemstats[id].reclaimed++;
            if ((search->it_flags & ITEM_FETCHED) == 0) {
                STATS_LOCK();
                stats.expired_unfetched++;
                STATS_UNLOCK();
                itemstats[id].expired_unfetched++;
            }
            
            //替换掉已经超时的item
            it = search;
            slabs_adjust_mem_requested(it->slabs_clsid, ITEM_ntotal(it), ntotal);   //虽然属于同一个slabclass，但是长度仍可能不一样，需要修改一下
            do_item_unlink_nolock(it, hash(ITEM_key(it), it->nkey, 0));                     //将超时的item从双向链表和hash表中除去
            /* Initialize the item block: */
            it->slabs_clsid = 0;                                                                                           //slab_clsid设为0
        } else if ((it = slabs_alloc(ntotal, id)) == NULL) {//没有超时，则从slabclass中的slots获取空间，还是失败的话
            if (settings.evict_to_free == 0) {                       //evict_to_free = 0 的话则直接返回null,否则强行将最后一个item替换掉
                itemstats[id].outofmemory++;
                mutex_unlock(&cache_lock);
                return NULL;
            }
            itemstats[id].evicted++;
            itemstats[id].evicted_time = current_time - search->time;
            if (search->exptime != 0)
                itemstats[id].evicted_nonzero++;
            if ((search->it_flags & ITEM_FETCHED) == 0) {
                STATS_LOCK();
                stats.evicted_unfetched++;
                STATS_UNLOCK();
                itemstats[id].evicted_unfetched++;
            }
            STATS_LOCK();
            stats.evictions++;
            STATS_UNLOCK();
            it = search;
            slabs_adjust_mem_requested(it->slabs_clsid, ITEM_ntotal(it), ntotal);
            do_item_unlink_nolock(it, hash(ITEM_key(it), it->nkey, 0));
            /* Initialize the item block: */
            it->slabs_clsid = 0;

            /* If we've just evicted an item, and the automover is set to
             * angry bird mode, attempt to rip memory into this slab class.
             * TODO: Move valid object detection into a function, and on a
             * "successful" memory pull, look behind and see if the next alloc
             * would be an eviction. Then kick off the slab mover before the
             * eviction happens.
             */
            if (settings.slab_automove == 2)
                slabs_reassign(-1, id);
        } else {
            refcount_decr(&search->refcount);
        }
    } else {
        /* If the LRU is empty or locked, attempt to allocate memory */
        it = slabs_alloc(ntotal, id);
        if (search != NULL)
            refcount_decr(&search->refcount);
    }

    if (it == NULL) {
        itemstats[id].outofmemory++;
        /* Last ditch effort. There was a very rare bug which caused
         * refcount leaks. We leave this just in case they ever happen again.
         * We can reasonably assume no item can stay locked for more than
         * three hours, so if we find one in the tail which is that old,
         * free it anyway.
         */
        if (search != NULL &&
            search->refcount != 2 &&
            search->time + TAIL_REPAIR_TIME < current_time) {
            itemstats[id].tailrepairs++;
            search->refcount = 1;
            do_item_unlink_nolock(search, hash(ITEM_key(search), search->nkey, 0));
        }
        mutex_unlock(&cache_lock);
        return NULL;
    }

    assert(it->slabs_clsid == 0);
    assert(it != heads[id]);

    /* Item initialization can happen outside of the lock; the item's already
     * been removed from the slab LRU.
     */


    //初始化一些item属性，可以看出这里只是申请了data所需要的空间，而未给data真正的赋值，并且将其连入到LRU和hash表的操作也不在这
    it->refcount = 1;     /* the caller will have a reference */
    mutex_unlock(&cache_lock);
    it->next = it->prev = it->h_next = 0;
    it->slabs_clsid = id;

    DEBUG_REFCNT(it, '*');
    it->it_flags = settings.use_cas ? ITEM_CAS : 0;
    it->nkey = nkey;
    it->nbytes = nbytes;
    memcpy(ITEM_key(it), key, nkey);
    it->exptime = exptime;
    memcpy(ITEM_suffix(it), suffix, (size_t)nsuffix);
    it->nsuffix = nsuffix;
    return it;
}

上面代码如果要替换掉最后的item时会将其从LRU和hash表中除掉

void do_item_unlink(item *it, const uint32_t hv) {
    MEMCACHED_ITEM_UNLINK(ITEM_key(it), it->nkey, it->nbytes);
    mutex_lock(&cache_lock);
    if ((it->it_flags & ITEM_LINKED) != 0) {
        it->it_flags &= ~ITEM_LINKED;
        STATS_LOCK();
        stats.curr_bytes -= ITEM_ntotal(it);
        stats.curr_items -= 1;
        STATS_UNLOCK();
        assoc_delete(ITEM_key(it), it->nkey, hv);              //从hash表中删除，在assoc.c中再解释
        item_unlink_q(it);                                                     //从LRU中删除 
        do_item_remove(it);                                                //返回item的空间 
    }
    mutex_unlock(&cache_lock);
}

item_unlink_q函数

static void item_unlink_q(item *it) {
    item **head, **tail;
    assert(it->slabs_clsid < LARGEST_ID);
    head = &heads[it->slabs_clsid];
    tail = &tails[it->slabs_clsid];

    if (*head == it) {
        assert(it->prev == 0);
        *head = it->next;
    }
    if (*tail == it) {
        assert(it->next == 0);
        *tail = it->prev;
    }
    assert(it->next != it);
    assert(it->prev != it);

    if (it->next) it->next->prev = it->prev;
    if (it->prev) it->prev->next = it->next;
    sizes[it->slabs_clsid]--;
    return;
}

还是很好懂的，就是普通的双向链表操作。

do_item_remove函数

void do_item_remove(item *it) {
    MEMCACHED_ITEM_REMOVE(ITEM_key(it), it->nkey, it->nbytes);
    assert((it->it_flags & ITEM_SLABBED) == 0);

    if (refcount_decr(&it->refcount) == 0) {
        item_free(it);
    }
}

如果refcount为0则调用item_free返还item占用的空间

void item_free(item *it) {
    size_t ntotal = ITEM_ntotal(it);
    unsigned int clsid;
    assert((it->it_flags & ITEM_LINKED) == 0);
    assert(it != heads[it->slabs_clsid]);
    assert(it != tails[it->slabs_clsid]);
    assert(it->refcount == 0);

    /* so slab size changer can tell later if item is already free or not */
    clsid = it->slabs_clsid;
    it->slabs_clsid = 0;
    DEBUG_REFCNT(it, 'F');
    slabs_free(it, ntotal, clsid);
}

实际就是调用slabs_free将item占用的内存放回到slabclass的slots中。

还有几个重要的函数

do_item_link 就是将item放入对用的LRU和hash表中

int do_item_link(item *it, const uint32_t hv) {
    MEMCACHED_ITEM_LINK(ITEM_key(it), it->nkey, it->nbytes);
    assert((it->it_flags & (ITEM_LINKED|ITEM_SLABBED)) == 0);
    mutex_lock(&cache_lock);
    it->it_flags |= ITEM_LINKED;
    it->time = current_time;

    STATS_LOCK();
    stats.curr_bytes += ITEM_ntotal(it);
    stats.curr_items += 1;
    stats.total_items += 1;
    STATS_UNLOCK();

    /* Allocate a new CAS ID on link. */
    ITEM_set_cas(it, (settings.use_cas) ? get_cas_id() : 0);
    assoc_insert(it, hv);
    item_link_q(it);
    refcount_incr(&it->refcount);
    mutex_unlock(&cache_lock);

    return 1;
}

item_link_q函数，就是将item加入到LRU中

static void item_link_q(item *it) { /* item is the new head */
    item **head, **tail;
    assert(it->slabs_clsid < LARGEST_ID);
    assert((it->it_flags & ITEM_SLABBED) == 0);

    head = &heads[it->slabs_clsid];
    tail = &tails[it->slabs_clsid];
    assert(it != *head);
    assert((*head && *tail) || (*head == 0 && *tail == 0));
    it->prev = 0;
    it->next = *head;
    if (it->next) it->next->prev = it;
    *head = it;
    if (*tail == 0) *tail = it;
    sizes[it->slabs_clsid]++;
    return;
}

do_item_update函数，就是改变其最后访问时间，反映到链表上就是将其移到链表头

void do_item_update(item *it) {
    MEMCACHED_ITEM_UPDATE(ITEM_key(it), it->nkey, it->nbytes);
    if (it->time < current_time - ITEM_UPDATE_INTERVAL) {
        assert((it->it_flags & ITEM_SLABBED) == 0);

        mutex_lock(&cache_lock);
        if ((it->it_flags & ITEM_LINKED) != 0) {
            item_unlink_q(it);
            it->time = current_time;
            item_link_q(it);
        }
        mutex_unlock(&cache_lock);
    }
}

do_item_touch函数，改变exptime

item *do_item_touch(const char *key, size_t nkey, uint32_t exptime,
                    const uint32_t hv) {
    item *it = do_item_get(key, nkey, hv);
    if (it != NULL) {
        it->exptime = exptime;
    }
    return it;
}

do_item_get函数：通过hash表（key）来找到所要的所要的item ，其实就是调用assoc_find函数，将在后面再讲

/** wrapper around assoc_find which does the lazy expiration logic */
item *do_item_get(const char *key, const size_t nkey, const uint32_t hv) {
    mutex_lock(&cache_lock);
    item *it = assoc_find(key, nkey, hv);
    if (it != NULL) {
        refcount_incr(&it->refcount);
        /* Optimization for slab reassignment. prevents popular items from
         * jamming in busy wait. Can only do this here to satisfy lock order
         * of item_lock, cache_lock, slabs_lock. */
        if (slab_rebalance_signal &&
            ((void *)it >= slab_rebal.slab_start && (void *)it < slab_rebal.slab_end)) {
            do_item_unlink_nolock(it, hv);
            do_item_remove(it);
            it = NULL;
        }
    }
    mutex_unlock(&cache_lock);
    int was_found = 0;

    if (settings.verbose > 2) {
        if (it == NULL) {
            fprintf(stderr, "> NOT FOUND %s", key);
        } else {
            fprintf(stderr, "> FOUND KEY %s", ITEM_key(it));
            was_found++;
        }
    }

    if (it != NULL) {
        if (settings.oldest_live != 0 && settings.oldest_live <= current_time &&
            it->time <= settings.oldest_live) {
            do_item_unlink(it, hv);
            do_item_remove(it);
            it = NULL;
            if (was_found) {
                fprintf(stderr, " -nuked by flush");
            }
        } else if (it->exptime != 0 && it->exptime <= current_time) {
            do_item_unlink(it, hv);
            do_item_remove(it);
            it = NULL;
            if (was_found) {
                fprintf(stderr, " -nuked by expire");
            }
        } else {
            it->it_flags |= ITEM_FETCHED;
            DEBUG_REFCNT(it, '+');
        }
    }

    if (settings.verbose > 2)
        fprintf(stderr, "\n");

    return it;
}

大概就这样了吧。

再盗一张图