[redis] maxmemory 数据淘汰策略
redis是内存数据库,通过redis.conf配置最大内存暂用(当进行内存分配时,会统计已使用内存,在zmalloc接口中有实现)。当redis使用的内存超过限制时则会触发数据淘汰机制,直到当前内存使用量小于阈值。
maxmemory <bytes>
当redis使用的内存超过限制时,redis有两种方案释放内存:
- 数据库中的所有kv对
- 将要到期的kv对
淘汰的策略包括:
- 随机淘汰
- 先淘汰到期的或快要到期的数据
- 近似LRU
- 近似LFU
debug
修改redis.conf的maxmemory,在server.c line3755处打断点。
if (server.maxmemory && !isInsideYieldingLongCommand()) {
// ...
}
调用栈为:
performEvictions()
processCommand(client *c)
...
淘汰数据的逻辑是:
redis淘汰策略
源码位置evict.c
采样淘汰
if (server.maxmemory_policy & (MAXMEMORY_FLAG_LRU|MAXMEMORY_FLAG_LFU) ||
server.maxmemory_policy == MAXMEMORY_VOLATILE_TTL)
{
struct evictionPoolEntry *pool = EvictionPoolLRU;
while(bestkey == NULL) {
unsigned long total_keys = 0, keys;
/* We don't want to make local-db choices when expiring keys,
* so to start populate the eviction pool sampling keys from
* every DB. */
for (i = 0; i < server.dbnum; i++) {
db = server.db+i;
dict = (server.maxmemory_policy & MAXMEMORY_FLAG_ALLKEYS) ?
db->dict : db->expires;
if ((keys = dictSize(dict)) != 0) {
evictionPoolPopulate(i, dict, db->dict, pool);
total_keys += keys;
}
}
if (!total_keys) break; /* No keys to evict. */
/* Go backward from best to worst element to evict. */
for (k = EVPOOL_SIZE-1; k >= 0; k--) {
if (pool[k].key == NULL) continue;
bestdbid = pool[k].dbid;
if (server.maxmemory_policy & MAXMEMORY_FLAG_ALLKEYS) {
de = dictFind(server.db[bestdbid].dict,
pool[k].key);
} else {
de = dictFind(server.db[bestdbid].expires,
pool[k].key);
}
/* Remove the entry from the pool. */
if (pool[k].key != pool[k].cached)
sdsfree(pool[k].key);
pool[k].key = NULL;
pool[k].idle = 0;
/* If the key exists, is our pick. Otherwise it is
* a ghost and we need to try the next element. */
if (de) {
bestkey = dictGetKey(de);
break;
} else {
/* Ghost... Iterate again. */
}
}
}
}
我们从dict或者expire字典里采样key,然后插入到EvictionPoolLRU这个global池子中,然后遍历EvictionPoolLRU池选择idle最大的element淘汰(同步or异步淘汰)。
这个操作会在以下条件停止:
while (mem_freed < (long long)mem_tofree)
循环时间较长,为了避免因为一直淘汰元素占用cpu资源,在event loop中注册startEvictionTimeProc。
一致性
在我们淘汰元素时,如果是主从架构,需要将删除动作传播到slave服务器。本质上就是写入aof文件
/* Propagate expires into slaves and the AOF file.
* When a key expires in the master, a DEL operation for this key is sent
* to all the slaves and the AOF file if enabled.
*
* This way the key expiry is centralized in one place, and since both
* AOF and the master->slave link guarantee operation ordering, everything
* will be consistent even if we allow write operations against expiring
* keys.
*
* This function may be called from:
* 1. Within call(): Example: Lazy-expire on key access.
* In this case the caller doesn't have to do anything
* because call() handles server.also_propagate(); or
* 2. Outside of call(): Example: Active-expire, eviction.
* In this the caller must remember to call
* propagatePendingCommands, preferably at the end of
* the deletion batch, so that DELs will be wrapped
* in MULTI/EXEC */
void propagateDeletion(redisDb *db, robj *key, int lazy) {
robj *argv[2];
argv[0] = lazy ? shared.unlink : shared.del;
argv[1] = key;
incrRefCount(argv[0]);
incrRefCount(argv[1]);
/* If the master decided to expire a key we must propagate it to replicas no matter what..
* Even if module executed a command without asking for propagation. */
int prev_replication_allowed = server.replication_allowed;
server.replication_allowed = 1;
alsoPropagate(db->id,argv,2,PROPAGATE_AOF|PROPAGATE_REPL);
server.replication_allowed = prev_replication_allowed;
decrRefCount(argv[0]);
decrRefCount(argv[1]);
}
/* Used inside commands to schedule the propagation of additional commands
* after the current command is propagated to AOF / Replication.
*
* dbid is the database ID the command should be propagated into.
* Arguments of the command to propagate are passed as an array of redis
* objects pointers of len 'argc', using the 'argv' vector.
*
* The function does not take a reference to the passed 'argv' vector,
* so it is up to the caller to release the passed argv (but it is usually
* stack allocated). The function automatically increments ref count of
* passed objects, so the caller does not need to. */
void alsoPropagate(int dbid, robj **argv, int argc, int target) {
robj **argvcopy;
int j;
if (!shouldPropagate(target))
return;
argvcopy = zmalloc(sizeof(robj*)*argc);
for (j = 0; j < argc; j++) {
argvcopy[j] = argv[j];
incrRefCount(argv[j]);
}
redisOpArrayAppend(&server.also_propagate,dbid,argvcopy,argc,target);
}
int redisOpArrayAppend(redisOpArray *oa, int dbid, robj **argv, int argc, int target) {
redisOp *op;
int prev_capacity = oa->capacity;
if (oa->numops == 0) {
oa->capacity = 16;
} else if (oa->numops >= oa->capacity) {
oa->capacity *= 2;
}
if (prev_capacity != oa->capacity)
oa->ops = zrealloc(oa->ops,sizeof(redisOp)*oa->capacity);
op = oa->ops+oa->numops;
op->dbid = dbid;
op->argv = argv;
op->argc = argc;
op->target = target;
oa->numops++;
return oa->numops;
}
这里的删除是通过传递两个robj对象来做的,其实就是将各个动作(del,unlink等)抽象陈robj对象。
struct sharedObjectsStruct {
robj *crlf, *ok, *err, *emptybulk, *czero, *cone, *pong, *space,
*queued, *null[4], *nullarray[4], *emptymap[4], *emptyset[4],
*emptyarray, *wrongtypeerr, *nokeyerr, *syntaxerr, *sameobjecterr,
*outofrangeerr, *noscripterr, *loadingerr,
*slowevalerr, *slowscripterr, *slowmoduleerr, *bgsaveerr,
*masterdownerr, *roslaveerr, *execaborterr, *noautherr, *noreplicaserr,
*busykeyerr, *oomerr, *plus, *messagebulk, *pmessagebulk, *subscribebulk,
*unsubscribebulk, *psubscribebulk, *punsubscribebulk, *del, *unlink,
*rpop, *lpop, *lpush, *rpoplpush, *lmove, *blmove, *zpopmin, *zpopmax,
*emptyscan, *multi, *exec, *left, *right, *hset, *srem, *xgroup, *xclaim,
*script, *replconf, *eval, *persist, *set, *pexpireat, *pexpire,
*time, *pxat, *absttl, *retrycount, *force, *justid, *entriesread,
*lastid, *ping, *setid, *keepttl, *load, *createconsumer,
*getack, *special_asterick, *special_equals, *default_username, *redacted,
*ssubscribebulk,*sunsubscribebulk, *smessagebulk,
*select[PROTO_SHARED_SELECT_CMDS],
*integers[OBJ_SHARED_INTEGERS],
*mbulkhdr[OBJ_SHARED_BULKHDR_LEN], /* "*<value>\r\n" */
*bulkhdr[OBJ_SHARED_BULKHDR_LEN], /* "$<value>\r\n" */
*maphdr[OBJ_SHARED_BULKHDR_LEN], /* "%<value>\r\n" */
*sethdr[OBJ_SHARED_BULKHDR_LEN]; /* "~<value>\r\n" */
sds minstring, maxstring;
};