前言
数据淘汰,是一个友好的功能,不敢说优秀的功能。带来一些好处,也带来一些头疼的问题。 某天一个同事说:redis的数据老是丢失,不能用。 去环境中看下,发现使用数据淘汰,只是在内存不足的情况下,数据被淘汰。成了他的数据丢失。 在高峰情况下,使用 lru策略,经常发生数据淘汰的情况,大程度的降低并发量。
淘汰的数据集合
redis默认有16个db,每个db有一个dict和一个expires的属性
typedef struct redisDb {
dict *dict; /* The keyspace for this DB */
dict *expires; /* Timeout of keys with a timeout set */
dict *blocking_keys; /* Keys with clients waiting for data (BLPOP) */
dict *ready_keys; /* Blocked keys that received a PUSH */
dict *watched_keys; /* WATCHED keys for MULTI/EXEC CAS */
struct evictionPoolEntry *eviction_pool; /* Eviction pool of keys */
int id; /* Database ID */
long long avg_ttl; /* Average TTL, just for stats */
} redisDb;
dict与expire都是一个名为 dict 结构体,实现了一个hash链表的数据结构。 不同的是 dict(参数别名为 allkeys)保存的是真实数据。 expire (参数别名为 volatile) 的数据只有执行一下命令才会保存进入:
EXPIRE
EXPIREAT
PERSIST
PEXPIRE
PEXPIREAT
SET key value [EX seconds] [PX milliseconds] [NX|XX]
淘汰方式
- LRU(最小使用次数)
- 随机参数
- TTL(淘汰最快过期的数据)
依据淘汰数据与淘汰方式获得一下六种策略
- volatile-lru:依据lru方式选中 expire集合中数据,然后在依据key删除 dict集合数据
- volatile-ttl:依据ttl方式选中 expire集合中数据,然后在依据key删除 dict集合数据
- volatile-random:依据随机方式选中 expire集合中数据,然后在依据key删除 dict集合数据
- allkeys-lru:依据lru方式淘汰 dict集合数据
- allkeys-random:依据l随机方式淘汰 dict集合数据
- no-enviction:不淘汰
redis数据淘汰相关参数
- maxmemory
默认为 0 , 执行redis使用缓存大小 如果为 0,那么不会淘汰数据。
- maxmemory_policy
淘汰策略 请看 依据淘汰数据与淘汰方式获得一下六种策略
- maxmemory_samples
样本次数。对于 lru与ttl策略来说,这个参数至关重要。
淘汰时机,条件,结束条件以及计算方式
- 计算方式是 当前数据申请内存-所有slaves缓存-aof_buf缓存-aof_rewrite_buf_blocks缓存
slaves 缓存与 aof缓存请看其他博客。
c文件为 server.c 方法:freeMemoryIfNeeded
size_t mem_used, mem_tofree, mem_freed;
int slaves = listLength(server.slaves);
mstime_t latency, eviction_latency;
/* Remove the size of slaves output buffers and AOF buffer from the
* count of used memory. */
// 当前分配的使用内存
mem_used = zmalloc_used_memory();
if (slaves) {//识别是否有slave
listIter li;
listNode *ln;
listRewind(server.slaves,&li);//获得所有slave
while((ln = listNext(&li))) {//迭代savel
client *slave = listNodeValue(ln);
unsigned long obuf_bytes = getClientOutputBufferMemoryUsage(slave);
if (obuf_bytes > mem_used)
mem_used = 0;
else
mem_used -= obuf_bytes;
}
}
if (server.aof_state != AOF_OFF) {
mem_used -= sdslen(server.aof_buf);
mem_used -= aofRewriteBufferSize();
}
/* Check if we are over the memory limit. */
if (mem_used <= server.maxmemory) return C_OK;
- 条件
上面公式得到的内存大小是否大于 参数 maxmemory
if (mem_used <= server.maxmemory) return C_OK;
淘汰策略 不是 no-enviction
if (server.maxmemory_policy == MAXMEMORY_NO_EVICTION)
return C_ERR; /* We need to free memory, but policy forbids. */
- 结束条件
需要关注方法里面的 mem_tofree 变量与mem_freed变量。mem_freed 大于 mem_tofree
while (mem_freed < mem_tofree) {
.............
}
mem_tofree 变量 关于meme_used 请看计算公式
mem_tofree = mem_used - server.maxmemory;
mem_freed 变量。累加每次删除数据内存改变大小
delta = (long long) zmalloc_used_memory();
..... 执行删除数据操作
delta -= (long long) zmalloc_used_memory();
mem_freed += delta;
- 触发时机
- 执行configSet命令,修改maxmemory值的时候
config_set_memory_field("maxmemory",server.maxmemory) {
if (server.maxmemory) {
if (server.maxmemory < zmalloc_used_memory()) {
serverLog(LL_WARNING,"WARNING: the new maxmemory value set via CONFIG SET is smaller than the current memory usage. This will result in keys eviction and/or inability to accept new write commands depending on the maxmemory-policy.");
}
freeMemoryIfNeeded();
}
}
- 执行lua脚本中的call与pcall命令的时候(luaRedisGenericCommand)。如果大量使用lua脚本那么这个时机需要关注
luaRedisCallCommand,luaRedisPCallCommand
if (server.maxmemory && server.lua_write_dirty == 0 &&
(cmd->flags & CMD_DENYOOM))
{
if (freeMemoryIfNeeded() == C_ERR) {
luaPushError(lua, shared.oomerr->ptr);
goto cleanup;
}
}
- 每次执行命令之前
代码流程走向是 epoll 读事件方法 readQueryFromClient --> processInputBuffer--> processCommand-->
if (server.maxmemory) {
int retval = freeMemoryIfNeeded();
/* freeMemoryIfNeeded may flush slave output buffers. This may result
* into a slave, that may be the active client, to be freed. */
if (server.current_client == NULL) return C_ERR;
/* It was impossible to free enough memory, and the command the client
* is trying to execute is denied during OOM conditions? Error. */
if ((c->cmd->flags & CMD_DENYOOM) && retval == C_ERR) {
flagTransaction(c);
addReply(c, shared.oomerr);
return C_OK;
}
}
分析每个方式的实现细节
random策略
随机的实现非常简单,就是dict中,随机抽取一个key,然后删除 如果随机参数的数据是热点数据,那么十分影响缓冲命中率,没有命中得会去db去查,在高并发情况下十分影响性能 使用场景:热点量小,资源充足,并发低 如果内存中存在重要数据,那么也不适合使用随机算法
if (server.maxmemory_policy == MAXMEMORY_ALLKEYS_RANDOM ||
server.maxmemory_policy == MAXMEMORY_VOLATILE_RANDOM)
{
de = dictGetRandomKey(dict);
bestkey = dictGetKey(de);
}
在说 LRU,TTL之前,先说一个重点。 samples 中文为 样品。作用为:从dict里面获得samples个数的样本。然后从里面计算除一个最合适的数据。如果随机淘汰只有一个动作,那么TTL有samples个随机淘汰动作。
TTL策略
循环maxmemory_samples次 从expire 中获得数据,选择其中val值最小的(null 小于任何数)
for (k = 0; k < server.maxmemory_samples; k++) {
sds thiskey;
long thisval;
de = dictGetRandomKey(dict);
thiskey = dictGetKey(de);
thisval = (long) dictGetVal(de);
/* Expire sooner (minor expire unix timestamp) is better
* candidate for deletion */
if (bestkey == NULL || thisval < bestval) {
bestkey = thiskey;
bestval = thisval;
}
}
- LRU策略
LRU是最复杂的策略,redis的算法并不是真正的lru,而是随机maxmemory_samples个数据进行略复杂的识别
else if (server.maxmemory_policy == MAXMEMORY_ALLKEYS_LRU ||
server.maxmemory_policy == MAXMEMORY_VOLATILE_LRU)
{
struct evictionPoolEntry *pool = db->eviction_pool;
while(bestkey == NULL) {
evictionPoolPopulate(dict, db->dict, db->eviction_pool);
/* Go backward from best to worst element to evict. */
for (k = MAXMEMORY_EVICTION_POOL_SIZE-1; k >= 0; k--) {
if (pool[k].key == NULL) continue;
de = dictFind(dict,pool[k].key);
/* Remove the entry from the pool. */
sdsfree(pool[k].key);
/* Shift all elements on its right to left. */
memmove(pool+k,pool+k+1,
sizeof(pool[0])*(MAXMEMORY_EVICTION_POOL_SIZE-k-1));
/* Clear the element on the right which is empty
* since we shifted one position to the left. */
pool[MAXMEMORY_EVICTION_POOL_SIZE-1].key = NULL;
pool[MAXMEMORY_EVICTION_POOL_SIZE-1].idle = 0;
/* If the key exists, is our pick. Otherwise it is
* a ghost and we need to try the next element. */
if (de) {
bestkey = dictGetKey(de);
break;
} else {
/* Ghost... */
continue;
}
}
}
}
evictionPoolPopulate重要是调用dictGetSomeKeys方法获得maxmemory_samples( >= 16 )数量的数据。然后与 pool 中的数据进行对比,获得最少使用的数据。
#define EVICTION_SAMPLES_ARRAY_SIZE 16
void evictionPoolPopulate(dict *sampledict, dict *keydict, struct evictionPoolEntry *pool) {
int j, k, count;
dictEntry *_samples[EVICTION_SAMPLES_ARRAY_SIZE];
dictEntry **samples;
/* Try to use a static buffer: this function is a big hit...
* Note: it was actually measured that this helps. */
if (server.maxmemory_samples <= EVICTION_SAMPLES_ARRAY_SIZE) {
samples = _samples;
} else {
samples = zmalloc(sizeof(samples[0])*server.maxmemory_samples);
}
count = dictGetSomeKeys(sampledict,samples,server.maxmemory_samples);
for (j = 0; j < count; j++) {
unsigned long long idle;
sds key;
robj *o;
dictEntry *de;
de = samples[j];
key = dictGetKey(de);
/* If the dictionary we are sampling from is not the main
* dictionary (but the expires one) we need to lookup the key
* again in the key dictionary to obtain the value object. */
if (sampledict != keydict) de = dictFind(keydict, key);
o = dictGetVal(de);
idle = estimateObjectIdleTime(o);
/* Insert the element inside the pool.
* First, find the first empty bucket or the first populated
* bucket that has an idle time smaller than our idle time. */
k = 0;
while (k < MAXMEMORY_EVICTION_POOL_SIZE &&
pool[k].key &&
pool[k].idle < idle) k++;
if (k == 0 && pool[MAXMEMORY_EVICTION_POOL_SIZE-1].key != NULL) {
/* Can't insert if the element is < the worst element we have
* and there are no empty buckets. */
continue;
} else if (k < MAXMEMORY_EVICTION_POOL_SIZE && pool[k].key == NULL) {
/* Inserting into empty position. No setup needed before insert. */
} else {
/* Inserting in the middle. Now k points to the first element
* greater than the element to insert. */
if (pool[MAXMEMORY_EVICTION_POOL_SIZE-1].key == NULL) {
/* Free space on the right? Insert at k shifting
* all the elements from k to end to the right. */
memmove(pool+k+1,pool+k,
sizeof(pool[0])*(MAXMEMORY_EVICTION_POOL_SIZE-k-1));
} else {
/* No free space on right? Insert at k-1 */
k--;
/* Shift all elements on the left of k (included) to the
* left, so we discard the element with smaller idle time. */
sdsfree(pool[0].key);
memmove(pool,pool+1,sizeof(pool[0])*k);
}
}
pool[k].key = sdsdup(key);
pool[k].idle = idle;
}
if (samples != _samples) zfree(samples);
}
当数据淘汰之后,redis会做什么处理
- 会触发 NOTIFY_EVICTED 事件
notifyKeyspaceEvent(NOTIFY_EVICTED, "evicted",
keyobj, db->id);
- 强制执行一个数据同步到slave
if (slaves) flushSlavesOutputBuffers();
后记
策略性能排序: LRU > TTL > RANDOM > no-enviction 冷数据淘汰正确性: RANDMON > TTL > LRU 不是所有业务都适合数据淘汰。比如需要分布式锁的业务,数据关联业务(用lru处理逻辑) 在资源有限情况下。可以对简单业务数据进行淘汰。比如用户数据,等等 注意 LRU 策略在并发的情况下,性能降低的情况