memcached.h中item的声明
/**
* Structure for storing items within memcached.
*/
typedef struct _stritem {
struct _stritem *next;
struct _stritem *prev;
struct _stritem *h_next; /* hash chain next */
rel_time_t time; /* least recent access */
rel_time_t exptime; /* expire time */
int nbytes; /* size of data */
unsigned short refcount;
uint8_t nsuffix; /* length of flags-and-length string */
uint8_t it_flags; /* ITEM_* above */
uint8_t slabs_clsid;/* which slab class we're in */
uint8_t nkey; /* key length, w/terminating null and padding */
/* this odd type prevents type-punning issues when we do
* the little shuffle to save space when not using CAS. */
union {
uint64_t cas;
char end;
} data[];
} item;
next,pre是双向链表使用的,用于slots,heads和tails,h_next则用于hash表
time 是最近访问时间,time=current_time, 新的item都会放在对应id双向链表的开头,而do_item_alloc会从链表尾开始搜链表,变相的实现了LRU
exptime 过期时间
nbytes 实际数据的字节数
refcount 引用次数
nsuffix 后缀的长度,以下代码赋值
*nsuffix = (uint8_t) snprintf(suffix, 40, " %d %d\r\n", flags, nbytes - 2);
即通过snprintf返回的值
it_flags
slab_clsid 所属的slabclass的id
nkey 键长
union {
uint64_t cas;
char end;
} data[];
变长数组,真正存放key-value的地方
具体的内容为 cas + key + suffix + data
cas 是一个编号,不一定会使用
key是键
suffix是后缀,由前面的snprintf赋值
data就是我们的数据了
一组调用的宏,位于memcached.h
#define ITEM_get_cas(i) (((i)->it_flags & ITEM_CAS) ? \
(i)->data->cas : (uint64_t)0)
#define ITEM_set_cas(i,v) { \
if ((i)->it_flags & ITEM_CAS) { \
(i)->data->cas = v; \
} \
}
#define ITEM_key(item) (((char*)&((item)->data)) \
+ (((item)->it_flags & ITEM_CAS) ? sizeof(uint64_t) : 0))
#define ITEM_suffix(item) ((char*) &((item)->data) + (item)->nkey + 1 \
+ (((item)->it_flags & ITEM_CAS) ? sizeof(uint64_t) : 0))
#define ITEM_data(item) ((char*) &((item)->data) + (item)->nkey + 1 \
+ (item)->nsuffix \
+ (((item)->it_flags & ITEM_CAS) ? sizeof(uint64_t) : 0))
#define ITEM_ntotal(item) (sizeof(struct _stritem) + (item)->nkey + 1 \
+ (item)->nsuffix + (item)->nbytes \
+ (((item)->it_flags & ITEM_CAS) ? sizeof(uint64_t) : 0))
用于获取具体item的cas,key,suffix和data的起始地址。
从宏ITEM_ntotal可以看出一个item 的实际长度为 sizeof(item) + nkey + 1 + nsuffix + nbytes ( + sizoef(uint64_t), 如果使用了cas)
上面的数据结构就是item的大概,下面就看看item是如何获取内存和释放内存
item.c中的重要全局变量
static item *heads[LARGEST_ID];
static item *tails[LARGEST_ID];
为对应的slabclass中从slots获取而来空间(以item方式组织的双向链表),分别指向链表的头尾,LRU。
do_item_alloc函数
/**
* Generates the variable-sized part of the header for an object.
*
* key - The key
* nkey - The length of the key
* flags - key flags
* nbytes - Number of bytes to hold value and addition CRLF terminator
* suffix - Buffer for the "VALUE" line suffix (flags, size).
* nsuffix - The length of the suffix is stored here.
*
* Returns the total size of the header.
*/
static size_t item_make_header(const uint8_t nkey, const int flags, const int nbytes,
char *suffix, uint8_t *nsuffix) {
/* suffix is defined at 40 chars elsewhere.. */
*nsuffix = (uint8_t) snprintf(suffix, 40, " %d %d\r\n", flags, nbytes - 2);
return sizeof(item) + nkey + *nsuffix + nbytes;
}
给suffix赋值,并返回item总的长度(除去cas的)。总长度用于决定该item属于哪个slabclass
/*@null@*/
item *do_item_alloc(char *key, const size_t nkey, const int flags, const rel_time_t exptime, const int nbytes) {
uint8_t nsuffix;
item *it = NULL;
char suffix[40];
size_t ntotal = item_make_header(nkey + 1, flags, nbytes, suffix, &nsuffix);
if (settings.use_cas) {
ntotal += sizeof(uint64_t);
}
unsigned int id = slabs_clsid(ntotal);
if (id == 0)
return 0;
mutex_lock(&cache_lock);
/* do a quick check if we have any expired items in the tail.. */
item *search;
rel_time_t oldest_live = settings.oldest_live;
//从尾部开始搜索,因为尾部的time总是最早的,所以就是一种LRU实现
search = tails[id];
if (search != NULL && (refcount_incr(&search->refcount) == 2)) {
if ((search->exptime != 0 && search->exptime < current_time)
|| (search->time <= oldest_live && oldest_live <= current_time)) { // dead by flush //如果尾部的item已经超时,那么就替换掉
STATS_LOCK();
stats.reclaimed++;
STATS_UNLOCK();
itemstats[id].reclaimed++;
if ((search->it_flags & ITEM_FETCHED) == 0) {
STATS_LOCK();
stats.expired_unfetched++;
STATS_UNLOCK();
itemstats[id].expired_unfetched++;
}
//替换掉已经超时的item
it = search;
slabs_adjust_mem_requested(it->slabs_clsid, ITEM_ntotal(it), ntotal); //虽然属于同一个slabclass,但是长度仍可能不一样,需要修改一下
do_item_unlink_nolock(it, hash(ITEM_key(it), it->nkey, 0)); //将超时的item从双向链表和hash表中除去
/* Initialize the item block: */
it->slabs_clsid = 0; //slab_clsid设为0
} else if ((it = slabs_alloc(ntotal, id)) == NULL) {//没有超时,则从slabclass中的slots获取空间,还是失败的话
if (settings.evict_to_free == 0) { //evict_to_free = 0 的话则直接返回null,否则强行将最后一个item替换掉
itemstats[id].outofmemory++;
mutex_unlock(&cache_lock);
return NULL;
}
itemstats[id].evicted++;
itemstats[id].evicted_time = current_time - search->time;
if (search->exptime != 0)
itemstats[id].evicted_nonzero++;
if ((search->it_flags & ITEM_FETCHED) == 0) {
STATS_LOCK();
stats.evicted_unfetched++;
STATS_UNLOCK();
itemstats[id].evicted_unfetched++;
}
STATS_LOCK();
stats.evictions++;
STATS_UNLOCK();
it = search;
slabs_adjust_mem_requested(it->slabs_clsid, ITEM_ntotal(it), ntotal);
do_item_unlink_nolock(it, hash(ITEM_key(it), it->nkey, 0));
/* Initialize the item block: */
it->slabs_clsid = 0;
/* If we've just evicted an item, and the automover is set to
* angry bird mode, attempt to rip memory into this slab class.
* TODO: Move valid object detection into a function, and on a
* "successful" memory pull, look behind and see if the next alloc
* would be an eviction. Then kick off the slab mover before the
* eviction happens.
*/
if (settings.slab_automove == 2)
slabs_reassign(-1, id);
} else {
refcount_decr(&search->refcount);
}
} else {
/* If the LRU is empty or locked, attempt to allocate memory */
it = slabs_alloc(ntotal, id);
if (search != NULL)
refcount_decr(&search->refcount);
}
if (it == NULL) {
itemstats[id].outofmemory++;
/* Last ditch effort. There was a very rare bug which caused
* refcount leaks. We leave this just in case they ever happen again.
* We can reasonably assume no item can stay locked for more than
* three hours, so if we find one in the tail which is that old,
* free it anyway.
*/
if (search != NULL &&
search->refcount != 2 &&
search->time + TAIL_REPAIR_TIME < current_time) {
itemstats[id].tailrepairs++;
search->refcount = 1;
do_item_unlink_nolock(search, hash(ITEM_key(search), search->nkey, 0));
}
mutex_unlock(&cache_lock);
return NULL;
}
assert(it->slabs_clsid == 0);
assert(it != heads[id]);
/* Item initialization can happen outside of the lock; the item's already
* been removed from the slab LRU.
*/
//初始化一些item属性,可以看出这里只是申请了data所需要的空间,而未给data真正的赋值,并且将其连入到LRU和hash表的操作也不在这
it->refcount = 1; /* the caller will have a reference */
mutex_unlock(&cache_lock);
it->next = it->prev = it->h_next = 0;
it->slabs_clsid = id;
DEBUG_REFCNT(it, '*');
it->it_flags = settings.use_cas ? ITEM_CAS : 0;
it->nkey = nkey;
it->nbytes = nbytes;
memcpy(ITEM_key(it), key, nkey);
it->exptime = exptime;
memcpy(ITEM_suffix(it), suffix, (size_t)nsuffix);
it->nsuffix = nsuffix;
return it;
}
上面代码如果要替换掉最后的item时会将其从LRU和hash表中除掉
void do_item_unlink(item *it, const uint32_t hv) {
MEMCACHED_ITEM_UNLINK(ITEM_key(it), it->nkey, it->nbytes);
mutex_lock(&cache_lock);
if ((it->it_flags & ITEM_LINKED) != 0) {
it->it_flags &= ~ITEM_LINKED;
STATS_LOCK();
stats.curr_bytes -= ITEM_ntotal(it);
stats.curr_items -= 1;
STATS_UNLOCK();
assoc_delete(ITEM_key(it), it->nkey, hv); //从hash表中删除,在assoc.c中再解释
item_unlink_q(it); //从LRU中删除
do_item_remove(it); //返回item的空间
}
mutex_unlock(&cache_lock);
}
item_unlink_q函数
static void item_unlink_q(item *it) {
item **head, **tail;
assert(it->slabs_clsid < LARGEST_ID);
head = &heads[it->slabs_clsid];
tail = &tails[it->slabs_clsid];
if (*head == it) {
assert(it->prev == 0);
*head = it->next;
}
if (*tail == it) {
assert(it->next == 0);
*tail = it->prev;
}
assert(it->next != it);
assert(it->prev != it);
if (it->next) it->next->prev = it->prev;
if (it->prev) it->prev->next = it->next;
sizes[it->slabs_clsid]--;
return;
}
还是很好懂的,就是普通的双向链表操作。
do_item_remove函数
void do_item_remove(item *it) {
MEMCACHED_ITEM_REMOVE(ITEM_key(it), it->nkey, it->nbytes);
assert((it->it_flags & ITEM_SLABBED) == 0);
if (refcount_decr(&it->refcount) == 0) {
item_free(it);
}
}
如果refcount为0则调用item_free返还item占用的空间
void item_free(item *it) {
size_t ntotal = ITEM_ntotal(it);
unsigned int clsid;
assert((it->it_flags & ITEM_LINKED) == 0);
assert(it != heads[it->slabs_clsid]);
assert(it != tails[it->slabs_clsid]);
assert(it->refcount == 0);
/* so slab size changer can tell later if item is already free or not */
clsid = it->slabs_clsid;
it->slabs_clsid = 0;
DEBUG_REFCNT(it, 'F');
slabs_free(it, ntotal, clsid);
}
实际就是调用slabs_free将item占用的内存放回到slabclass的slots中。
还有几个重要的函数
do_item_link 就是将item放入对用的LRU和hash表中
int do_item_link(item *it, const uint32_t hv) {
MEMCACHED_ITEM_LINK(ITEM_key(it), it->nkey, it->nbytes);
assert((it->it_flags & (ITEM_LINKED|ITEM_SLABBED)) == 0);
mutex_lock(&cache_lock);
it->it_flags |= ITEM_LINKED;
it->time = current_time;
STATS_LOCK();
stats.curr_bytes += ITEM_ntotal(it);
stats.curr_items += 1;
stats.total_items += 1;
STATS_UNLOCK();
/* Allocate a new CAS ID on link. */
ITEM_set_cas(it, (settings.use_cas) ? get_cas_id() : 0);
assoc_insert(it, hv);
item_link_q(it);
refcount_incr(&it->refcount);
mutex_unlock(&cache_lock);
return 1;
}
item_link_q函数,就是将item加入到LRU中
static void item_link_q(item *it) { /* item is the new head */
item **head, **tail;
assert(it->slabs_clsid < LARGEST_ID);
assert((it->it_flags & ITEM_SLABBED) == 0);
head = &heads[it->slabs_clsid];
tail = &tails[it->slabs_clsid];
assert(it != *head);
assert((*head && *tail) || (*head == 0 && *tail == 0));
it->prev = 0;
it->next = *head;
if (it->next) it->next->prev = it;
*head = it;
if (*tail == 0) *tail = it;
sizes[it->slabs_clsid]++;
return;
}
do_item_update函数,就是改变其最后访问时间,反映到链表上就是将其移到链表头
void do_item_update(item *it) {
MEMCACHED_ITEM_UPDATE(ITEM_key(it), it->nkey, it->nbytes);
if (it->time < current_time - ITEM_UPDATE_INTERVAL) {
assert((it->it_flags & ITEM_SLABBED) == 0);
mutex_lock(&cache_lock);
if ((it->it_flags & ITEM_LINKED) != 0) {
item_unlink_q(it);
it->time = current_time;
item_link_q(it);
}
mutex_unlock(&cache_lock);
}
}
do_item_touch函数,改变exptime
item *do_item_touch(const char *key, size_t nkey, uint32_t exptime,
const uint32_t hv) {
item *it = do_item_get(key, nkey, hv);
if (it != NULL) {
it->exptime = exptime;
}
return it;
}
do_item_get函数:通过hash表(key)来找到所要的所要的item ,其实就是调用assoc_find函数,将在后面再讲
/** wrapper around assoc_find which does the lazy expiration logic */
item *do_item_get(const char *key, const size_t nkey, const uint32_t hv) {
mutex_lock(&cache_lock);
item *it = assoc_find(key, nkey, hv);
if (it != NULL) {
refcount_incr(&it->refcount);
/* Optimization for slab reassignment. prevents popular items from
* jamming in busy wait. Can only do this here to satisfy lock order
* of item_lock, cache_lock, slabs_lock. */
if (slab_rebalance_signal &&
((void *)it >= slab_rebal.slab_start && (void *)it < slab_rebal.slab_end)) {
do_item_unlink_nolock(it, hv);
do_item_remove(it);
it = NULL;
}
}
mutex_unlock(&cache_lock);
int was_found = 0;
if (settings.verbose > 2) {
if (it == NULL) {
fprintf(stderr, "> NOT FOUND %s", key);
} else {
fprintf(stderr, "> FOUND KEY %s", ITEM_key(it));
was_found++;
}
}
if (it != NULL) {
if (settings.oldest_live != 0 && settings.oldest_live <= current_time &&
it->time <= settings.oldest_live) {
do_item_unlink(it, hv);
do_item_remove(it);
it = NULL;
if (was_found) {
fprintf(stderr, " -nuked by flush");
}
} else if (it->exptime != 0 && it->exptime <= current_time) {
do_item_unlink(it, hv);
do_item_remove(it);
it = NULL;
if (was_found) {
fprintf(stderr, " -nuked by expire");
}
} else {
it->it_flags |= ITEM_FETCHED;
DEBUG_REFCNT(it, '+');
}
}
if (settings.verbose > 2)
fprintf(stderr, "\n");
return it;
}
大概就这样了吧。
再盗一张图