一 序
之前在整理Object的时候,关于LRU的淘汰地方没有看代码,本篇补上。
redisobject中除了type、encoding、ptr和refcount属性外,还有一个lru属性用来计算空转时长。OBJECT IDLETIME命令可以打印出给定键的空转时长,是用当前时间减去键的lru时间计算得出的。OBJECT IDLETIME命令是特殊的,这个命令在访问键的对象时,不会修改值对象的lru属性。
键的空转时长还有一个作用,如果服务器打开了maxmemory选项,并且服务器用于回收内存的算法是volatile-lru 或者 allkeys-lru,那么当服务器占用的内存数超过了maxmemory选项所设置的上限值时,空转时长较高的那部分键会优先被服务器释放,从而回收内存。
这里没有展开,等后面整理淘汰算法的时候再看。
二 processCommand
Redis 每服务客户端执行一个命令的时候,会检测使用的内存是否超额。如果超额,即进行数据淘汰。函数是processCommand,源码在server.c
/* If this function gets called we already read a whole
* command, arguments are in the client argv/argc fields.
* processCommand() execute the command or prepare the
* server for a bulk read from the client.
*这个函数执行时,我们已经读入了一个完整的命令到客户端,
* 这个函数负责执行这个命令,
* 或者服务器准备从客户端中进行一次读取。
* If C_OK is returned the client is still alive and valid and
* other operations can be performed by the caller. Otherwise
* if C_ERR is returned the client was destroyed (i.e. after QUIT).
* 如果client没有被关闭则返回C_OK,调用者可以继续执行其他的操作,否则返回C_ERR,表示client被销毁
*/
int processCommand(client *c) {
/* The QUIT command is handled separately. Normal command procs will
* go through checking for replication and QUIT will cause trouble
* when FORCE_REPLICATION is enabled and would be implemented in
* a regular command proc. */
// 如果是 quit 命令,则单独处理
if (!strcasecmp(c->argv[0]->ptr,"quit")) {
addReply(c,shared.ok);
c->flags |= CLIENT_CLOSE_AFTER_REPLY;//设置client的状态为回复后立即关闭,返回C_ERR
return C_ERR;
}
/* Now lookup the command and check ASAP about trivial error conditions
* such as wrong arity, bad command name and so forth. */
// 从数据库的字典中查找该命令
c->cmd = c->lastcmd = lookupCommand(c->argv[0]->ptr);
if (!c->cmd) {
// 没找到指定的命令
flagTransaction(c);
addReplyErrorFormat(c,"unknown command '%s'",
(char*)c->argv[0]->ptr);
return C_OK;
// 参数个数不匹配
} else if ((c->cmd->arity > 0 && c->cmd->arity != c->argc) ||
(c->argc < -c->cmd->arity)) {
flagTransaction(c); //如果是事务状态的命令,则设置事务为失败
addReplyErrorFormat(c,"wrong number of arguments for '%s' command",
c->cmd->name);
return C_OK;
}
/* Check if the user is authenticated */
//检查认证信息
if (server.requirepass && !c->authenticated && c->cmd->proc != authCommand)
{
flagTransaction(c);//如果是事务状态的命令,则设置事务为失败
addReply(c,shared.noautherr);
return C_OK;
}
/* If cluster is enabled perform the cluster redirection here.
* However we don't perform the redirection if:
* 1) The sender of this command is our master.
* 2) The command has no key arguments. */
// 如果开启了集群模式,则执行集群的重定向操作,下面的两种情况例外:
// 1. 命令的发送是主节点服务器
// 2. 命令没有key
if (server.cluster_enabled &&
!(c->flags & CLIENT_MASTER) &&
!(c->flags & CLIENT_LUA &&
server.lua_caller->flags & CLIENT_MASTER) &&
!(c->cmd->getkeys_proc == NULL && c->cmd->firstkey == 0 &&
c->cmd->proc != execCommand))
{
int hashslot;
int error_code;
// 从集群中返回一个能够执行命令的节点
clusterNode *n = getNodeByQuery(c,c->cmd,c->argv,c->argc,
&hashslot,&error_code);
// 返回的节点不合格
if (n == NULL || n != server.cluster->myself) {
// 如果是执行事务的命令,则取消事务
if (c->cmd->proc == execCommand) {
discardTransaction(c);
} else {// 将事务状态设置为失败
flagTransaction(c);
}
// 执行client的重定向操作
clusterRedirectClient(c,n,hashslot,error_code);
return C_OK;
}
}
/* Handle the maxmemory directive.
*
* First we try to free some memory if possible (if there are volatile
* keys in the dataset). If there are not the only thing we can do
* is returning an error. */
// 如果设置了最大内存,那么检查内存是否超过限制,并做相应的操作
if (server.maxmemory) {
// 如果内存已超过限制,那么尝试通过删除过期键来释放内存
int retval = freeMemoryIfNeeded();
/* freeMemoryIfNeeded may flush slave output buffers. This may result
* into a slave, that may be the active client, to be freed. */
// freeMemoryIfNeeded()函数之后需要冲洗从节点的输出缓冲区,这可能导致被释放的从节点是一个活跃的client
// 如果当前的client被释放,返回C_ERR
if (server.current_client == NULL) return C_ERR;
/* It was impossible to free enough memory, and the command the client
* is trying to execute is denied during OOM conditions? Error. */
// 如果命令会耗费大量的内存但是释放内存失败
// 那么向客户端返回内存错误
if ((c->cmd->flags & CMD_DENYOOM) && retval == C_ERR) {
flagTransaction(c); // 将事务状态设置为失败
addReply(c, shared.oomerr);
return C_OK;
}
}
/* Don't accept write commands if there are problems persisting on disk
* and if this is a master instance. */
// 如果 BGSAVE 命令执行错误而且服务器是一个主节点,那么不接受写命令
if (((server.stop_writes_on_bgsave_err &&
server.saveparamslen > 0 &&
server.lastbgsave_status == C_ERR) ||
server.aof_last_write_status == C_ERR) &&
server.masterhost == NULL &&
(c->cmd->flags & CMD_WRITE ||
c->cmd->proc == pingCommand))
{
flagTransaction(c); // 将事务状态设置为失败
// 如果上一次执行AOF成功回复BGSAVE错误回复
if (server.aof_last_write_status == C_OK)
addReply(c, shared.bgsaveerr);
else
addReplySds(c,
sdscatprintf(sdsempty(),
"-MISCONF Errors writing to the AOF file: %s\r\n",
strerror(server.aof_last_write_errno)));
return C_OK;
}
/* Don't accept write commands if there are not enough good slaves and
* user configured the min-slaves-to-write option. */
// 如果没有足够的良好的从节点而且用户配置了 min-slaves-to-write,那么不接受写命令
if (server.masterhost == NULL &&
server.repl_min_slaves_to_write &&
server.repl_min_slaves_max_lag &&
c->cmd->flags & CMD_WRITE &&
server.repl_good_slaves_count < server.repl_min_slaves_to_write)
{ // 将事务状态设置为失败
flagTransaction(c);
addReply(c, shared.noreplicaserr);
return C_OK;
}
/* Don't accept write commands if this is a read only slave. But
* accept write commands if this is our master. */
// 如果这是一个只读的从节点服务器,则不接受写命令
if (server.masterhost && server.repl_slave_ro &&
!(c->flags & CLIENT_MASTER) &&
c->cmd->flags & CMD_WRITE)
{
addReply(c, shared.roslaveerr);
return C_OK;
}
/* Only allow SUBSCRIBE and UNSUBSCRIBE in the context of Pub/Sub */
// 如果处于发布订阅模式,但是执行的不是发布订阅命令,返回
if (c->flags & CLIENT_PUBSUB &&
c->cmd->proc != pingCommand &&
c->cmd->proc != subscribeCommand &&
c->cmd->proc != unsubscribeCommand &&
c->cmd->proc != psubscribeCommand &&
c->cmd->proc != punsubscribeCommand) {
addReplyError(c,"only (P)SUBSCRIBE / (P)UNSUBSCRIBE / PING / QUIT allowed in this context");
return C_OK;
}
/* Only allow INFO and SLAVEOF when slave-serve-stale-data is no and
* we are a slave with a broken link with master. */
// 如果是从节点且和主节点断开了连接,不允许从服务器带有过期数据,返回
if (server.masterhost && server.repl_state != REPL_STATE_CONNECTED &&
server.repl_serve_stale_data == 0 &&
!(c->cmd->flags & CMD_STALE))
{
flagTransaction(c);
addReply(c, shared.masterdownerr);
return C_OK;
}
/* Loading DB? Return an error if the command has not the
* CMD_LOADING flag. */
// 如果服务器正在载入数据到数据库,那么只执行带有 REDIS_CMD_LOADING 标识的命令,否则将出错
if (server.loading && !(c->cmd->flags & CMD_LOADING)) {
addReply(c, shared.loadingerr);
return C_OK;
}
/* Lua script too slow? Only allow a limited number of commands. */
// Lua 脚本超时,只允许执行限定的操作,比如 SHUTDOWN 和 SCRIPT KILL
if (server.lua_timedout &&
c->cmd->proc != authCommand &&
c->cmd->proc != replconfCommand &&
!(c->cmd->proc == shutdownCommand &&
c->argc == 2 &&
tolower(((char*)c->argv[1]->ptr)[0]) == 'n') &&
!(c->cmd->proc == scriptCommand &&
c->argc == 2 &&
tolower(((char*)c->argv[1]->ptr)[0]) == 'k'))
{
flagTransaction(c);
addReply(c, shared.slowscripterr);
return C_OK;
}
/* Exec the command 执行命令 */
// client处于事务环境中,但是执行命令不是exec、discard、multi和watch
if (c->flags & CLIENT_MULTI &&
c->cmd->proc != execCommand && c->cmd->proc != discardCommand &&
c->cmd->proc != multiCommand && c->cmd->proc != watchCommand)
{ // 除了上述的四个命令,其他的命令添加到事务队列中
queueMultiCommand(c);
addReply(c,shared.queued);
} else { // 执行普通的命令
call(c,CMD_CALL_FULL);
// 保存写全局的复制偏移量
c->woff = server.master_repl_offset;
// 如果因为BLPOP而阻塞的命令已经准备好,则处理client的阻塞状态
if (listLength(server.ready_keys))
handleClientsBlockedOnLists();
}
return C_OK;
}
可以看到处理命令处理函数的过程,会涉及到内存使用量的检测,如果检测到内存使用超额,会触发数据淘汰机制。我们来看看淘汰机制触发的函数 freeMemoryIfNeeded() ,源码在。
int freeMemoryIfNeeded(void) {
size_t mem_used, mem_tofree, mem_freed;
int slaves = listLength(server.slaves);
mstime_t latency, eviction_latency;
/* When clients are paused the dataset should be static not just from the
* POV of clients not being able to write, but also from the POV of
* expires and evictions of keys not being performed. */
//客户端暂停,直接返回
if (clientsArePaused()) return C_OK;
/* Remove the size of slaves output buffers and AOF buffer from the
* count of used memory. */
// 计算出 Redis 目前占用的内存总数,但有两个方面的内存不会计算在内:
// 1)从服务器的输出缓冲区的内存
// 2)AOF 缓冲区的内存
mem_used = zmalloc_used_memory();
if (slaves) { // 存在从节点
listIter li;
listNode *ln;
listRewind(server.slaves,&li);
// 遍历从节点链表
while((ln = listNext(&li))) {
client *slave = listNodeValue(ln);
// 获取当前从节点的输出缓冲区的大小,不包含静态的固定回复缓冲区,因为他总被分配
unsigned long obuf_bytes = getClientOutputBufferMemoryUsage(slave);
// 减去当前从节点的输出缓冲区的大小
if (obuf_bytes > mem_used)
mem_used = 0;
else
mem_used -= obuf_bytes;
}
}
// 如果开启了AOF操作
if (server.aof_state != AOF_OFF) {
// 减去AOF缓冲区的大小
mem_used -= sdslen(server.aof_buf);
// 减去AOF重写缓冲区的大小
mem_used -= aofRewriteBufferSize();
}
/* Check if we are over the memory limit. */
// 如果目前使用的内存大小比设置的 maxmemory 要小,那么无须执行进一步操作
if (mem_used <= server.maxmemory) return C_OK;
// 如果占用内存比 maxmemory 要大,但是 maxmemory 策略为不淘汰,那么直接返回C_ERR
if (server.maxmemory_policy == MAXMEMORY_NO_EVICTION)
return C_ERR; /* We need to free memory, but policy forbids. */
/* Compute how much memory we need to free. */
// 计算需要回收的大小
mem_tofree = mem_used - server.maxmemory;
mem_freed = 0; // 初始化已释放内存的字节数为 0
// 设置回收延迟检测开始的时间
latencyStartMonitor(latency);
// 循环回收,直到到达需要回收大小(遍历数据库,释放内存并记录被释放内存的字节数)
while (mem_freed < mem_tofree) {
int j, k, keys_freed = 0;
// 遍历所有的数据库
for (j = 0; j < server.dbnum; j++) {
long bestval = 0; /* just to prevent warning */
sds bestkey = NULL;
dictEntry *de;
redisDb *db = server.db+j;
dict *dict;
if (server.maxmemory_policy == MAXMEMORY_ALLKEYS_LRU ||
server.maxmemory_policy == MAXMEMORY_ALLKEYS_RANDOM)
{ // 如果策略是 allkeys-lru 或者 allkeys-random
// 那么淘汰的目标为所有数据库键
dict = server.db[j].dict;
} else { // 否则从过期键字典中选择回收的键。选择样品字典
dict = server.db[j].expires;
}
// 跳过空字典
if (dictSize(dict) == 0) continue;
/* volatile-random and allkeys-random policy */
// 如果回收策略有 ALLKEYS_RANDOM 或 VOLATILE_RANDOM,则是随机挑选
if (server.maxmemory_policy == MAXMEMORY_ALLKEYS_RANDOM ||
server.maxmemory_policy == MAXMEMORY_VOLATILE_RANDOM)
{
// 随机返回一个key
de = dictGetRandomKey(dict);
bestkey = dictGetKey(de);
}
/* volatile-lru and allkeys-lru policy */
// 如果回收策略有 ALLKEYS_LRU 或 VOLATILE_LRU,则使用LRU策略
// 那么从一集 sample 键中选出 IDLE 时间最长的那个键
else if (server.maxmemory_policy == MAXMEMORY_ALLKEYS_LRU ||
server.maxmemory_policy == MAXMEMORY_VOLATILE_LRU)
{
// 回收池
struct evictionPoolEntry *pool = db->eviction_pool;
while(bestkey == NULL) {
// 从样品字典dict中随机选择样品
evictionPoolPopulate(dict, db->dict, db->eviction_pool);
/* Go backward from best to worst element to evict. */
// 从空转时间最长的开始遍历
for (k = MAXMEMORY_EVICTION_POOL_SIZE-1; k >= 0; k--) {
// 跳过空位置
if (pool[k].key == NULL) continue;
// 从样品字典dict中查找当前key
de = dictFind(dict,pool[k].key);
/* Remove the entry from the pool. */
// 从收回池中删除
sdsfree(pool[k].key);
/* Shift all elements on its right to left. */
// 释放位置
memmove(pool+k,pool+k+1,
sizeof(pool[0])*(MAXMEMORY_EVICTION_POOL_SIZE-k-1));
/* Clear the element on the right which is empty
* since we shifted one position to the left. */
// 重置key和空转时间
pool[MAXMEMORY_EVICTION_POOL_SIZE-1].key = NULL;
pool[MAXMEMORY_EVICTION_POOL_SIZE-1].idle = 0;
/* If the key exists, is our pick. Otherwise it is
* a ghost and we need to try the next element. */
// 如果从样品字典中可以找到,则保存键
if (de) {
bestkey = dictGetKey(de);
break;
} else {// 没找到,则继续找下一个样品空间所保存的键
/* Ghost... */
continue;
}
} // 如果当前选出的所有的样品都没找到,则重新选择一批样品,知道找到一个可以释放的键
}
}
/* volatile-ttl */
// 策略为 volatile-ttl ,从一集 sample 键中选出过期时间距离当前时间最接近的键
else if (server.maxmemory_policy == MAXMEMORY_VOLATILE_TTL) {
// 抽样个数为maxmemory_samples个
for (k = 0; k < server.maxmemory_samples; k++) {
sds thiskey;
long thisval;
// 返回一个键,获取他的生存时间
de = dictGetRandomKey(dict);
thiskey = dictGetKey(de);
thisval = (long) dictGetVal(de);
/* Expire sooner (minor expire unix timestamp) is better
* candidate for deletion */
// 如果当前键的生存时间更短,则保存
if (bestkey == NULL || thisval < bestval) {
bestkey = thiskey;
bestval = thisval;
}
}
}
/* Finally remove the selected key. */
// 删除所有被选择的键
if (bestkey) {
long long delta;
robj *keyobj = createStringObject(bestkey,sdslen(bestkey));
// 当一个键在主节点中过期时,主节点会发送del命令给从节点和AOF文件
propagateExpire(db,keyobj);
/* We compute the amount of memory freed by dbDelete() alone.
* It is possible that actually the memory needed to propagate
* the DEL in AOF and replication link is greater than the one
* we are freeing removing the key, but we can't account for
* that otherwise we would never exit the loop.
*
* AOF and Output buffer memory will be freed eventually so
* we only care about memory used by the key space. */
// 单独计算dbDelete()所释放的空间大小, 在AOF和复制链接中传播DEL的内存实际上大于我们释放的key的内存
// 但是无法解释,窦泽不会退出循环
// AOF和输出缓冲区的内存最终被释放,所以我们只关心键空间使用的内存
delta = (long long) zmalloc_used_memory();
// 设置删除key对象的开始时间
latencyStartMonitor(eviction_latency);
dbDelete(db,keyobj);//删除
// 保存删除key对象时间
latencyEndMonitor(eviction_latency);
// 添加到延迟诊断字典中
latencyAddSampleIfNeeded("eviction-del",eviction_latency);
// 删除嵌套的延迟事件
latencyRemoveNestedEvent(latency,eviction_latency);
// 计算删除这个键的大小
delta -= (long long) zmalloc_used_memory();
// 更新内存释放量
mem_freed += delta;
// 服务器总的回收键的个数计数器加1
server.stat_evictedkeys++;
// 事件通知
notifyKeyspaceEvent(NOTIFY_EVICTED, "evicted",
keyobj, db->id);
// 释放键对象
decrRefCount(keyobj);
keys_freed++; // 释放键的个数加1
/* When the memory to free starts to be big enough, we may
* start spending so much time here that is impossible to
* deliver data to the slaves fast enough, so we force the
* transmission here inside the loop. */
// 如果有从节点,则刷新所有的输出缓冲区数据
if (slaves) flushSlavesOutputBuffers();
}
}
if (!keys_freed) { // 如果所有数据库都没有释放键,返回C_ERR
latencyEndMonitor(latency);
latencyAddSampleIfNeeded("eviction-cycle",latency);
return C_ERR; /* nothing to free... */
}
} // 计算回收延迟的时间
latencyEndMonitor(latency);
latencyAddSampleIfNeeded("eviction-cycle",latency);
return C_OK;
}
其他:
Redis的LRU算法不是一个严格的LRU实现。这意味着Redis不能选择最佳候选键来回收,也就是最久未被访问的那些键。相反,Redis 会尝试执行一个近似的LRU算法,通过采样一小部分键,然后在采样键中回收最适合(拥有最久访问时间)的那个。
然而,从Redis3.0开始,算法被改进为维护一个回收候选键池。这改善了算法的性能,使得更接近于真实的LRU算法的行为。Redis的LRU算法有一点很重要,你可以调整算法的精度,通过改变每次回收时检查的采样数量。
这个参数可以通过如下配置指令:
maxmemory-samples 5
总结:借用二代的话,就是从直接删除来看,redis不能当做数据库来用。
参考: