LRU部分的功能介绍
前面说过,关于缓存系统必须采用一定的策略对老数据或者不经常使用的数据进行更新操作,否则容易导致缓存系统要么占用内存越来越大,要么新数据无法保存,这样就失去了作为缓存的意义;从操作系统借鉴,LRU算法无论在memcached还是redis中都被广泛使用,由于我个人时间有限,虽然自己已经看完了整体代码,但是完全的融合写出来精力有限,所以暂时借用了别人的一篇博客来说明,后面等时间充足再来写自己理解的整体流程。
LRU:是Least Recently Used 近期最少使用算法。
Memcached的LRU几种策略
1. 惰性删除。memcached一般不会主动去清除已经过期或者失效的缓存,当get请求一个item的时候,才会去检查item是否失效。
2. flush命令。flush命令会将所有的item设置为失效。
3. 创建的时候检查。Memcached会在创建ITEM的时候去LRU的链表尾部开始检查,是否有失效的ITEM,如果没有的话就重新创建。
4. LRU爬虫。memcached默认是关闭LRU爬虫的。LRU爬虫是一个单独的线程,会去清理失效的ITEM。
LRU部分的整体逻辑
见http://blog.csdn.net/luotuo44/article/details/42963793
这篇文章讲解的很详细,我这里就不做浅显的解释了。
这里需要说明下,主要分清楚lru_maintenance_thread和item_crawler_thread线程的功能。一个用于更新LRU数组,一个用于处理item,主要是遍历item然后将过期数据删除。
LRU部分的代码分解
自定义类型
typedef struct {
struct _stritem *next;
struct _stritem *prev;
struct _stritem *h_next; /* hash chain next */
rel_time_t time; /* least recent access */
rel_time_t exptime; /* expire time */
int nbytes; /* size of data */
unsigned short refcount;
uint8_t nsuffix; /* length of flags-and-length string */
uint8_t it_flags; /* ITEM_* above */
uint8_t slabs_clsid;/* which slab class we're in */
uint8_t nkey; /* key length, w/terminating null and padding */
uint32_t remaining; /* Max keys to crawl per slab per invocation */
} crawler;
变量分解
1、LRU的变量类型,分别是HOT_LRU, WARM_LRU, COLD_LRU, NOEXP_LRU
2、item的指针数组heads;
3、item的指针数组tails;
4、整型数组sizes;
5、整型crawler_count;
6、执行lru的爬虫过程标记位do_run_lru_crawler_thread;
7、执行lru的maintenance过程的标记位do_run_lru_maintainer_thread;
8、爬虫互斥锁lru_crawler_lock;
9、爬虫条件锁lru_crawler_cond;
10、LRU的maintanance互斥锁lru_maintainer_lock;
代码分解
1、start_lru_maintainer_thread函数,;
int start_lru_maintainer_thread(void) {
int ret;
pthread_mutex_lock(&lru_maintainer_lock);
do_run_lru_maintainer_thread = 1; //执行maintainer线程标志位
settings.lru_maintainer_thread = true;
if ((ret = pthread_create(&lru_maintainer_tid, NULL,
lru_maintainer_thread, NULL)) != 0) { //回调函数lru_maintainer_thread
fprintf(stderr, "Can't create LRU maintainer thread: %s\n",
strerror(ret));
pthread_mutex_unlock(&lru_maintainer_lock);
return -1;
}
pthread_mutex_unlock(&lru_maintainer_lock);
return 0;
}
2、lru_maintainer_thread,;
static void *lru_maintainer_thread(void *arg) {
int i;
useconds_t to_sleep = MIN_LRU_MAINTAINER_SLEEP;//最小lru间隔, 1s
rel_time_t last_crawler_check = 0;
pthread_mutex_lock(&lru_maintainer_lock);
if (settings.verbose > 2)
fprintf(stderr, "Starting LRU maintainer background thread\n");
while (do_run_lru_maintainer_thread) {
int did_moves = 0; //已完成数目
pthread_mutex_unlock(&lru_maintainer_lock);
usleep(to_sleep);
pthread_mutex_lock(&lru_maintainer_lock);
STATS_LOCK();
stats.lru_maintainer_juggles++;
STATS_UNLOCK();
/* We were asked to immediately wake up and poke a particular slab
* class due to a low watermark being hit */
if (lru_maintainer_check_clsid != 0) {
did_moves = lru_maintainer_juggle(lru_maintainer_check_clsid);
lru_maintainer_check_clsid = 0;
} else {
for (i = POWER_SMALLEST; i < MAX_NUMBER_OF_SLAB_CLASSES; i++) {
//1是判断是否需要移动ITEM,实际上也操作移动了ITEM
did_moves += lru_maintainer_juggle(i);
}
}
if (did_moves == 0) {
if (to_sleep < MAX_LRU_MAINTAINER_SLEEP)
to_sleep += 1000;
} else {
to_sleep /= 2;
if (to_sleep < MIN_LRU_MAINTAINER_SLEEP)
to_sleep = MIN_LRU_MAINTAINER_SLEEP;
}
/* Once per second at most */
if (settings.lru_crawler && last_crawler_check != current_time) {
//执行删除,这个函数不介绍,是因为作者这部分自己都认为没写清楚,感兴趣的同学可以看下源码,自己试着参与
lru_maintainer_crawler_check();
last_crawler_check = current_time;
}
}
pthread_mutex_unlock(&lru_maintainer_lock);
if (settings.verbose > 2)
fprintf(stderr, "LRU maintainer thread stopping\n");
return NULL;
}
3、lru_maintainer_juggle,循环N次来判断LRU是否该迁移,返回迁移数;
static int lru_maintainer_juggle(const int slabs_clsid) {
int i;
int did_moves = 0;
bool mem_limit_reached = false;
unsigned int total_chunks = 0;
unsigned int chunks_perslab = 0;
unsigned int chunks_free = 0;
/* TODO: if free_chunks below high watermark, increase aggressiveness */
chunks_free = slabs_available_chunks(slabs_clsid, &mem_limit_reached,
&total_chunks, &chunks_perslab);
if (settings.expirezero_does_not_evict)
total_chunks -= noexp_lru_size(slabs_clsid);
/* If slab automove is enabled on any level, and we have more than 2 pages
* worth of chunks free in this class, ask (gently) to reassign a page
* from this class back into the global pool (0)
*/
if (settings.slab_automove > 0 && chunks_free > (chunks_perslab * 2.5)) {
slabs_reassign(slabs_clsid, SLAB_GLOBAL_PAGE_POOL);
} //再平衡,这里不考虑
/* Juggle HOT/WARM up to N times */
//移动HOT_LRU, WARM_LRU, COLD_LRU
for (i = 0; i < 1000; i++) {
int do_more = 0;
if (lru_pull_tail(slabs_clsid, HOT_LRU, total_chunks, false, 0) ||
lru_pull_tail(slabs_clsid, WARM_LRU, total_chunks, false, 0)) {
do_more++;
}
do_more += lru_pull_tail(slabs_clsid, COLD_LRU, total_chunks, false, 0);
if (do_more == 0)
break;
did_moves++;
}
return did_moves;
}
4、lru_pull_tail函数,返回应该删除、过期或驱逐的item的数目,可以被工作线程(如do_item_alloc)或LRU的maintainer线程调用;
static int lru_pull_tail(const int orig_id, const int cur_lru,
const unsigned int total_chunks, const bool do_evict, const uint32_t cur_hv) {
item *it = NULL;
int id = orig_id;
int removed = 0;
if (id == 0)
return 0;
int tries = 5;
item *search;
item *next_it;
void *hold_lock = NULL;
unsigned int move_to_lru = 0;
uint64_t limit;
id |= cur_lru; //去对应队列寻找数据
pthread_mutex_lock(&lru_locks[id]);
search = tails[id]; //寻找队尾item
/* We walk up *only* for locked items, and if bottom is expired. */
for (; tries > 0 && search != NULL; tries--, search=next_it) {
/* we might relink search mid-loop, so search->prev isn't reliable */
next_it = search->prev; //这里面说的是在循环过程中,prev可能会变化,这个数据并不可靠
if (search->nbytes == 0 && search->nkey == 0 && search->it_flags == 1) { //该item是一个crawleritem
/* We are a crawler, ignore it. */
tries++;
continue;
}
uint32_t hv = hash(ITEM_key(search), search->nkey); //计算hash值
/* Attempt to hash item lock the "search" item. If locked, no
* other callers can incr the refcount. Also skip ourselves. */
if (hv == cur_hv || (hold_lock = item_trylock(hv)) == NULL) //如果是当前item,或者被锁定
continue;
/* Now see if the item is refcount locked */
if (refcount_incr(&search->refcount) != 2) {
/* Note pathological case with ref'ed items in tail.
* Can still unlink the item, but it won't be reusable yet */
itemstats[id].lrutail_reflocked++;
/* In case of refcount leaks, enable for quick workaround. */
/* WARNING: This can cause terrible corruption */
if (settings.tail_repair_time &&
search->time + settings.tail_repair_time < current_time) {
itemstats[id].tailrepairs++;
search->refcount = 1;
/* This will call item_remove -> item_free since refcnt is 1 */
do_item_unlink_nolock(search, hv);
item_trylock_unlock(hold_lock);
continue;
}
}
//
/* Expired or flushed */
//是否有过期或被释放数据
if ((search->exptime != 0 && search->exptime < current_time) //是否过期
|| item_is_flushed(search)) { //是否该被移除
itemstats[id].reclaimed++;
if ((search->it_flags & ITEM_FETCHED) == 0) { //没有被fetch
itemstats[id].expired_unfetched++;
}
/* refcnt 2 -> 1 */
do_item_unlink_nolock(search, hv);
/* refcnt 1 -> 0 -> item_free */
do_item_remove(search);
item_trylock_unlock(hold_lock);
removed++;
/* If all we're finding are expired, can keep going */
continue;
}
/* If we're HOT_LRU or WARM_LRU and over size limit, send to COLD_LRU.
* If we're COLD_LRU, send to WARM_LRU unless we need to evict
*/
//这里根据LRU的类型进行操作,如果是HOT_LRU和WARM_LRU超过限制,则将数据移到COLD_LRU;
//如果是COLD_LRU,则当我们不要淘汰的时候移到WARM_LRU中去
switch (cur_lru) {
case HOT_LRU:
limit = total_chunks * settings.hot_lru_pct / 100;
case WARM_LRU:
limit = total_chunks * settings.warm_lru_pct / 100;
if (sizes[id] > limit) { //超过限制,移送到COLD_LRU
itemstats[id].moves_to_cold++;
move_to_lru = COLD_LRU;
do_item_unlink_q(search); //从当前队列中去移除
it = search;
removed++;
break; //退出
} else if ((search->it_flags & ITEM_ACTIVE) != 0) {
/* Only allow ACTIVE relinking if we're not too large. */
itemstats[id].moves_within_lru++;
search->it_flags &= ~ITEM_ACTIVE;
do_item_update_nolock(search);
do_item_remove(search); //减少引用
item_trylock_unlock(hold_lock);
} else {
/* Don't want to move to COLD, not active, bail out */
it = search; //删除
}
break;
case COLD_LRU:
it = search; /* No matter what, we're stopping */ //COLD_LRU,无论什么情况,都要停止
if (do_evict) { //确定淘汰
if (settings.evict_to_free == 0) {
/* Don't think we need a counter for this. It'll OOM. */
break;
}
itemstats[id].evicted++;
itemstats[id].evicted_time = current_time - search->time;
if (search->exptime != 0)
itemstats[id].evicted_nonzero++;
if ((search->it_flags & ITEM_FETCHED) == 0) {
itemstats[id].evicted_unfetched++;
}
do_item_unlink_nolock(search, hv);
removed++;
if (settings.slab_automove == 2) {
slabs_reassign(-1, orig_id);
}
} else if ((search->it_flags & ITEM_ACTIVE) != 0
&& settings.lru_maintainer_thread) {
//不删除的话,移动到WARM_LRU
itemstats[id].moves_to_warm++;
search->it_flags &= ~ITEM_ACTIVE;
move_to_lru = WARM_LRU;
do_item_unlink_q(search);
removed++;
}
break;
}
if (it != NULL)
break;
}
pthread_mutex_unlock(&lru_locks[id]);
if (it != NULL) {
if (move_to_lru) { //移动LRU, HOT-WARM->COLD, COLD->WARM
it->slabs_clsid = ITEM_clsid(it);
it->slabs_clsid |= move_to_lru;
item_link_q(it);
}
do_item_remove(it); //减少引用次数
item_trylock_unlock(hold_lock);
}
return removed;
}
5、crawler_link_q函数,爬虫添加item到队尾;
static void crawler_link_q(item *it) { /* item is the new tail */
item **head, **tail;
assert(it->it_flags == 1);
assert(it->nbytes == 0);
head = &heads[it->slabs_clsid];
tail = &tails[it->slabs_clsid];
assert(*tail != 0);
assert(it != *tail);
assert((*head && *tail) || (*head == 0 && *tail == 0));
it->prev = *tail;
it->next = 0;
if (it->prev) {
assert(it->prev->next == 0);
it->prev->next = it;
}
*tail = it;
if (*head == 0) *head = it;
return;
}
6、crawler_unlink_q函数,从队列中移除item;
static void crawler_unlink_q(item *it) {
item **head, **tail;
head = &heads[it->slabs_clsid];
tail = &tails[it->slabs_clsid];
if (*head == it) {
assert(it->prev == 0);
*head = it->next;
}
if (*tail == it) {
assert(it->next == 0);
*tail = it->prev;
}
assert(it->next != it);
assert(it->prev != it);
if (it->next) it->next->prev = it->prev;
if (it->prev) it->prev->next = it->next;
return;
}
7、do_lru_crawler_start函数,启动lru的爬虫线程;
static int do_lru_crawler_start(uint32_t id, uint32_t remaining) {
int i;
uint32_t sid;
uint32_t tocrawl[3];
int starts = 0;
tocrawl[0] = id | HOT_LRU;
tocrawl[1] = id | WARM_LRU;
tocrawl[2] = id | COLD_LRU;
for (i = 0; i < 3; i++) {
sid = tocrawl[i];
pthread_mutex_lock(&lru_locks[sid]);
if (tails[sid] != NULL) {
if (settings.verbose > 2)
fprintf(stderr, "Kicking LRU crawler off for LRU %d\n", sid);
crawlers[sid].nbytes = 0;
crawlers[sid].nkey = 0;
crawlers[sid].it_flags = 1; /* For a crawler, this means enabled. */
crawlers[sid].next = 0;
crawlers[sid].prev = 0;
crawlers[sid].time = 0;
crawlers[sid].remaining = remaining;
crawlers[sid].slabs_clsid = sid;
crawler_link_q((item *)&crawlers[sid]); //添加进一个空的item给队列,用于快速定位
crawler_count++; //启动的标记,后面的函数只有当crawler_count>0时才继续进行
starts++;
}
pthread_mutex_unlock(&lru_locks[sid]);
}
if (starts) {
STATS_LOCK();
stats.lru_crawler_running = true;
stats.lru_crawler_starts++;
STATS_UNLOCK();
pthread_mutex_lock(&lru_crawler_stats_lock);
memset(&crawlerstats[id], 0, sizeof(crawlerstats_t));
crawlerstats[id].start_time = current_time;
pthread_mutex_unlock(&lru_crawler_stats_lock);
}
return starts;
}
8、item_crawler_thread爬虫线程;
static void *item_crawler_thread(void *arg) {
int i;
int crawls_persleep = settings.crawls_persleep; //间隔
pthread_mutex_lock(&lru_crawler_lock);
if (settings.verbose > 2)
fprintf(stderr, "Starting LRU crawler background thread\n");
while (do_run_lru_crawler_thread) {
pthread_cond_wait(&lru_crawler_cond, &lru_crawler_lock);
while (crawler_count) { //待爬的数目
item *search = NULL;
void *hold_lock = NULL;
for (i = POWER_SMALLEST; i < LARGEST_ID; i++) {
if (crawlers[i].it_flags != 1) { //没有crawler跳过
continue;
}
pthread_mutex_lock(&lru_locks[i]);
search = crawler_crawl_q((item *)&crawlers[i]);
if (search == NULL ||
(crawlers[i].remaining && --crawlers[i].remaining < 1)) {
if (settings.verbose > 2)
fprintf(stderr, "Nothing left to crawl for %d\n", i);
crawlers[i].it_flags = 0;
crawler_count--;
crawler_unlink_q((item *)&crawlers[i]);
pthread_mutex_unlock(&lru_locks[i]);
pthread_mutex_lock(&lru_crawler_stats_lock);
crawlerstats[CLEAR_LRU(i)].end_time = current_time;
crawlerstats[CLEAR_LRU(i)].run_complete = true;
pthread_mutex_unlock(&lru_crawler_stats_lock);
continue;
}
uint32_t hv = hash(ITEM_key(search), search->nkey);
/* Attempt to hash item lock the "search" item. If locked, no
* other callers can incr the refcount
*/
if ((hold_lock = item_trylock(hv)) == NULL) {
pthread_mutex_unlock(&lru_locks[i]);
continue;
}
/* Now see if the item is refcount locked */
if (refcount_incr(&search->refcount) != 2) {
refcount_decr(&search->refcount);
if (hold_lock)
item_trylock_unlock(hold_lock);
pthread_mutex_unlock(&lru_locks[i]);
continue;
}
/* Frees the item or decrements the refcount. */
/* Interface for this could improve: do the free/decr here
* instead? */
pthread_mutex_lock(&lru_crawler_stats_lock);
item_crawler_evaluate(search, hv, i);//调用item_crawler_evaluate执行去引用计数或删除操作
pthread_mutex_unlock(&lru_crawler_stats_lock);
if (hold_lock)
item_trylock_unlock(hold_lock);
pthread_mutex_unlock(&lru_locks[i]);
if (crawls_persleep <= 0 && settings.lru_crawler_sleep) {
usleep(settings.lru_crawler_sleep);
crawls_persleep = settings.crawls_persleep;
}
}
}
if (settings.verbose > 2)
fprintf(stderr, "LRU crawler thread sleeping\n");
STATS_LOCK();
stats.lru_crawler_running = false;
STATS_UNLOCK();
}
pthread_mutex_unlock(&lru_crawler_lock);
if (settings.verbose > 2)
fprintf(stderr, "LRU crawler thread stopping\n");
return NULL;
}
9、 item_crawler_evaluate函数,检查是否过期,是的话删除;
static void item_crawler_evaluate(item *search, uint32_t hv, int i) {
int slab_id = CLEAR_LRU(i);
crawlerstats_t *s = &crawlerstats[slab_id];
itemstats[i].crawler_items_checked++;
if ((search->exptime != 0 && search->exptime < current_time)
|| is_flushed(search)) {
itemstats[i].crawler_reclaimed++;
s->reclaimed++;
if (settings.verbose > 1) {
int ii;
char *key = ITEM_key(search);
fprintf(stderr, "LRU crawler found an expired item (flags: %d, slab: %d): ",
search->it_flags, search->slabs_clsid);
for (ii = 0; ii < search->nkey; ++ii) {
fprintf(stderr, "%c", key[ii]);
}
fprintf(stderr, "\n");
}
if ((search->it_flags & ITEM_FETCHED) == 0) {
itemstats[i].expired_unfetched++;
}
do_item_unlink_nolock(search, hv);
do_item_remove(search); //删除
assert(search->slabs_clsid == 0);
} else {
s->seen++;
refcount_decr(&search->refcount);
if (search->exptime == 0) {
s->noexp++;
} else if (search->exptime - current_time > 3599) {
s->ttl_hourplus++;
} else {
rel_time_t ttl_remain = search->exptime - current_time;
int bucket = ttl_remain / 60;
s->histo[bucket]++;
}
}
}