【redis源码分析】Redis Sentinel 是如何实际解决分布式共识问题的-CSDN博客

本文链接：https://blog.csdn.net/2401_85766992/article/details/139759369

1、它不再执行任何操作，如故障转移

2、当其他Sentinel节点询问它对于某个主节点主观下线的判定结果时，它将返回节点未下线的判定结果

3、如果TITL模式下Sentinel机制可以正常运行30秒，则该节点退出TITL模式

故障转移主逻辑

/* Perform scheduled operations for all the instances in the dictionary.

Recursively call the function against dictionaries of slaves. */

void sentinelHandleDictOfRedisInstances(dict *instances) {

dictIterator *di;

dictEntry *de;

sentinelRedisInstance *switch_to_promoted = NULL;

/* There are a number of things we need to perform against every master. */

di = dictGetIterator(instances);

while((de = dictNext(di)) != NULL) {

sentinelRedisInstance *ri = dictGetVal(de);

sentinelHandleRedisInstance(ri); //调用主逻辑函数

if (ri->flags & SRI_MASTER) {

//如果当前处理的是主节点，还需要递归处理主节点实例下的slaves 和 sentinels

sentinelHandleDictOfRedisInstances(ri->slaves);

sentinelHandleDictOfRedisInstances(ri->sentinels);

if (ri->failover_state == SENTINEL_FAILOVER_STATE_UPDATE_CONFIG) {

switch_to_promoted = ri;

}

//完成故障转移的最后一步

if (switch_to_promoted)

sentinelFailoverSwitchToPromotedSlave(switch_to_promoted);

dictReleaseIterator(di);

}

/* ======================== SENTINEL timer handler ==========================

This is the “main” our Sentinel, being sentinel completely non blocking
in design. The function is called every second.
-------------------------------------------------------------------------- */

/* Perform scheduled operations for the specified Redis instance. */

void sentinelHandleRedisInstance(sentinelRedisInstance *ri) {

/* ========== MONITORING HALF ============ */

/* Every kind of instance */

sentinelReconnectInstance(ri); //建立网络连接

sentinelSendPeriodicCommands(ri);

/* ============== ACTING HALF ============= */

/* We don’t proceed with the acting half if we are in TILT mode.

TILT happens when we find something odd with the time, like a
sudden change in the clock. */

if (sentinel.tilt) {

if (mstime()-sentinel.tilt_start_time < SENTINEL_TILT_PERIOD) return;

sentinel.tilt = 0;

sentinelEvent(LL_WARNING,“-tilt”,NULL,“#tilt mode exited”);

}

/* Every kind of instance */

sentinelCheckSubjectivelyDown(ri); //检查是否存在主观下线的节点

/* Masters and slaves */

if (ri->flags & (SRI_MASTER|SRI_SLAVE)) {

/* Nothing so far. */

}

/* Only masters */

if (ri->flags & SRI_MASTER) { //只对主节点执行

sentinelCheckObjectivelyDown(ri); //检查是否存在客观下线的节点

if (sentinelStartFailoverIfNeeded(ri)) //判断是够可以进行故障转移

sentinelAskMasterStateToOtherSentinels(ri,SENTINEL_ASK_FORCED); //发送投票请求

sentinelFailoverStateMachine(ri); //实现一个故障转移状态机，实现故障转移逻辑

sentinelAskMasterStateToOtherSentinels(ri,SENTINEL_NO_FLAGS); //询问其他 Sentinel 节点对该节点主观下线的判定结果

}

主观下线：我个人认为你下线了

客观下线：超过半数的人都认为你下线了

心跳监测

Sentinel 会定时发送消息给主从节点和其他 Sentinel 节点，看它们还活着不：

/* Send periodic PING, INFO, and PUBLISH to the Hello channel to

the specified master or slave instance. */

void sentinelSendPeriodicCommands(sentinelRedisInstance *ri) {

mstime_t now = mstime();

mstime_t info_period, ping_period;

int retval;

/* Return ASAP if we have already a PING or INFO already pending, or

in the case the instance is not properly connected. */

if (ri->link->disconnected) return;

/* For INFO, PING, PUBLISH that are not critical commands to send we

also have a limit of SENTINEL_MAX_PENDING_COMMANDS. We don’t
want to use a lot of memory just because a link is not working
properly (note that anyway there is a redundant protection about this,
that is, the link will be disconnected and reconnected if a long
timeout condition is detected. */

if (ri->link->pending_commands >=

SENTINEL_MAX_PENDING_COMMANDS * ri->link->refcount) return;

/* If this is a slave of a master in O_DOWN condition we start sending

it INFO every second, instead of the usual SENTINEL_INFO_PERIOD
period. In this state we want to closely monitor slaves in case they
are turned into masters by another Sentinel, or by the sysadmin.
Similarly we monitor the INFO output more often if the slave reports
to be disconnected from the master, so that we can have a fresh
disconnection time figure. */

if ((ri->flags & SRI_SLAVE) &&

((ri->master->flags & (SRI_O_DOWN|SRI_FAILOVER_IN_PROGRESS)) ||

(ri->master_link_down_time != 0)))

{

info_period = 1000;

} else {

info_period = SENTINEL_INFO_PERIOD;

}

/* We ping instances every time the last received pong is older than

the configured ‘down-after-milliseconds’ time, but every second
anyway if ‘down-after-milliseconds’ is greater than 1 second. */

ping_period = ri->down_after_period;

if (ping_period > SENTINEL_PING_PERIOD) ping_period = SENTINEL_PING_PERIOD;

/* Send INFO to masters and slaves, not sentinels. */

if ((ri->flags & SRI_SENTINEL) == 0 &&

(ri->info_refresh == 0 ||

(now - ri->info_refresh) > info_period))

{

retval = redisAsyncCommand(ri->link->cc,

sentinelInfoReplyCallback, ri, “%s”,

sentinelInstanceMapCommand(ri,“INFO”));

if (retval == C_OK) ri->link->pending_commands++;

}

/* Send PING to all the three kinds of instances. */

if ((now - ri->link->last_pong_time) > ping_period &&

(now - ri->link->last_ping_time) > ping_period/2) {

sentinelSendPing(ri);

}

/* PUBLISH hello messages to all the three kinds of instances. */

if ((now - ri->last_pub_time) > SENTINEL_PUBLISH_PERIOD) {

sentinelSendHello(ri);

}

判断下线及投票表决

首先，这种事情需要我自己先说服我自己，他已经挂了，所以：

/* ===================== SENTINEL availability checks ======================= */

/* Is this instance down from our point of view? */

void sentinelCheckSubjectivelyDown(sentinelRedisInstance *ri) {

mstime_t elapsed = 0;

//计算目标节点上次响应后过去的时间

if (ri->link->act_ping_time)

elapsed = mstime() - ri->link->act_ping_time;

else if (ri->link->disconnected)

elapsed = mstime() - ri->link->last_avail_time;

/* Check if we are in need for a reconnection of one of the

links, because we are detecting low activity.
1. Check if the command link seems connected, was connected not less
than SENTINEL_MIN_LINK_RECONNECT_PERIOD, but still we have a
pending ping for more than half the timeout. */

if (ri->link->cc &&

(mstime() - ri->link->cc_conn_time) >

SENTINEL_MIN_LINK_RECONNECT_PERIOD &&

ri->link->act_ping_time != 0 && /* There is a pending ping… */

/* The pending ping is delayed, and we did not receive

error replies as well. */

(mstime() - ri->link->act_ping_time) > (ri->down_after_period/2) &&

(mstime() - ri->link->last_pong_time) > (ri->down_after_period/2))

{

instanceLinkCloseConnection(ri->link,ri->link->cc);

}

/* 2) Check if the pubsub link seems connected, was connected not less

than SENTINEL_MIN_LINK_RECONNECT_PERIOD, but still we have no
activity in the Pub/Sub channel for more than
SENTINEL_PUBLISH_PERIOD * 3.

if (ri->link->pc &&

(mstime() - ri->link->pc_conn_time) >

SENTINEL_MIN_LINK_RECONNECT_PERIOD &&

(mstime() - ri->link->pc_last_activity) > (SENTINEL_PUBLISH_PERIOD*3))

{

instanceLinkCloseConnection(ri->link,ri->link->pc);

}

/* Update the SDOWN flag. We believe the instance is SDOWN if:

1. It is not replying.
1. We believe it is a master, it reports to be a slave for enough time
to meet the down_after_period, plus enough time to get two times
INFO report from the instance. */

if (elapsed > ri->down_after_period ||

(ri->flags & SRI_MASTER &&

ri->role_reported == SRI_SLAVE &&

mstime() - ri->role_reported_time >

(ri->down_after_period+SENTINEL_INFO_PERIOD*2)))

{

/* Is subjectively down */

if ((ri->flags & SRI_S_DOWN) == 0) {

sentinelEvent(LL_WARNING,“+sdown”,ri,“%@”);

ri->s_down_since_time = mstime();

ri->flags |= SRI_S_DOWN;

}

} else {

/* Is subjectively up */

if (ri->flags & SRI_S_DOWN) {

sentinelEvent(LL_WARNING,“-sdown”,ri,“%@”);

ri->flags &= ~(SRI_S_DOWN|SRI_SCRIPT_KILL_SENT);

}

我说服了自己之后，为了避免决策失误，我便开始问询身边同频的朋友的意见：

//该函数内含选举逻辑

//其他 Sentinel 节点会回复一个标志位，如果为 true，则代表他也认为那个节点下线了

void sentinelAskMasterStateToOtherSentinels(sentinelRedisInstance *master, int flags) {

dictIterator *di;

dictEntry *de;

di = dictGetIterator(master->sentinels);

while((de = dictNext(di)) != NULL) {

sentinelRedisInstance *ri = dictGetVal(de);

mstime_t elapsed = mstime() - ri->last_master_down_reply_time;

char port[32];

int retval;

/* If the master state from other sentinel is too old, we clear it. */

if (elapsed > SENTINEL_ASK_PERIOD*5) {

ri->flags &= ~SRI_MASTER_DOWN;

sdsfree(ri->leader);

ri->leader = NULL;

}

/* Only ask if master is down to other sentinels if:

1. We believe it is down, or there is a failover in progress.
1. Sentinel is connected.
1. We did not receive the info within SENTINEL_ASK_PERIOD ms. */

if ((master->flags & SRI_S_DOWN) == 0) continue;

if (ri->link->disconnected) continue;

if (!(flags & SENTINEL_ASK_FORCED) &&

mstime() - ri->last_master_down_reply_time < SENTINEL_ASK_PERIOD)

continue;

/* Ask */

ll2string(port,sizeof(port),master->addr->port);

retval = redisAsyncCommand(ri->link->cc,

sentinelReceiveIsMasterDownReply, ri,

“%s is-master-down-by-addr %s %s %llu %s”,

sentinelInstanceMapCommand(ri,“SENTINEL”),

master->addr->ip, port,

sentinel.current_epoch,

(master->failover_state > SENTINEL_FAILOVER_STATE_NONE) ?

sentinel.myid : “*”);

if (retval == C_OK) ri->link->pending_commands++;

}

dictReleaseIterator(di);

}

投票选举 leader哨兵

现在认定他挂了，我们一群监视的要推举一个主事儿的来料理他的后事，由于是我先发现他不对劲儿的，也是我先获取了他最终挂掉的信息，所以我抢先发起了料理后事的请求，其他哨兵只能先给我投票，如果我落选了，他们才有机会发起选举：

1、拉票

void sentinelAskMasterStateToOtherSentinels(sentinelRedisInstance *master, int flags) {

dictIterator *di;

dictEntry *de;

di = dictGetIterator(master->sentinels);

while((de = dictNext(di)) != NULL) {

sentinelRedisInstance *ri = dictGetVal(de);

mstime_t elapsed = mstime() - ri->last_master_down_reply_time;

char port[32];

int retval;

/* If the master state from other sentinel is too old, we clear it. */

if (elapsed > SENTINEL_ASK_PERIOD*5) {

ri->flags &= ~SRI_MASTER_DOWN;

sdsfree(ri->leader);

ri->leader = NULL;

}

/* Only ask if master is down to other sentinels if:

1. We believe it is down, or there is a failover in progress.
1. Sentinel is connected.
1. We did not receive the info within SENTINEL_ASK_PERIOD ms. */

if ((master->flags & SRI_S_DOWN) == 0) continue;

if (ri->link->disconnected) continue;

if (!(flags & SENTINEL_ASK_FORCED) &&

mstime() - ri->last_master_down_reply_time < SENTINEL_ASK_PERIOD)

continue;

/* Ask */

ll2string(port,sizeof(port),master->addr->port);

retval = redisAsyncCommand(ri->link->cc,

sentinelReceiveIsMasterDownReply, ri,

“%s is-master-down-by-addr %s %s %llu %s”,

sentinelInstanceMapCommand(ri,“SENTINEL”),

master->addr->ip, port,

sentinel.current_epoch,

(master->failover_state > SENTINEL_FAILOVER_STATE_NONE) ?

sentinel.myid : “*”);

if (retval == C_OK) ri->link->pending_commands++;

}

dictReleaseIterator(di);

}

2、投票

/* Vote for the sentinel with ‘req_runid’ or return the old vote if already

voted for the specified ‘req_epoch’ or one greater.
If a vote is not available returns NULL, otherwise return the Sentinel
runid and populate the leader_epoch with the epoch of the vote. */

char *sentinelVoteLeader(sentinelRedisInstance *master, uint64_t req_epoch, char *req_runid, uint64_t *leader_epoch) {

if (req_epoch > sentinel.current_epoch) {

sentinel.current_epoch = req_epoch;

sentinelFlushConfig();

sentinelEvent(LL_WARNING,“+new-epoch”,master,“%llu”,

(unsigned long long) sentinel.current_epoch);

}

if (master->leader_epoch < req_epoch && sentinel.current_epoch <= req_epoch)

{

sdsfree(master->leader);

master->leader = sdsnew(req_runid);

master->leader_epoch = sentinel.current_epoch;

sentinelFlushConfig();

sentinelEvent(LL_WARNING,“+vote-for-leader”,master,“%s %llu”,

master->leader, (unsigned long long) master->leader_epoch);

/* If we did not voted for ourselves, set the master failover start

time to now, in order to force a delay before we can start a
failover for the same master. */

if (strcasecmp(master->leader,sentinel.myid))

master->failover_start_time = mstime()+rand()%SENTINEL_MAX_DESYNC;

}

*leader_epoch = master->leader_epoch;

return master->leader ? sdsnew(master->leader) : NULL;

}

3、确定干活

/* This function checks if there are the conditions to start the failover,

that is:
1. Master must be in ODOWN condition.
1. No failover already in progress.
1. No failover already attempted recently.
We still don’t know if we’ll win the election so it is possible that we
start the failover but that we’ll not be able to act.
Return non-zero if a failover was started. */

int sentinelStartFailoverIfNeeded(sentinelRedisInstance *master) {

/* We can’t failover if the master is not in O_DOWN state. */

if (!(master->flags & SRI_O_DOWN)) return 0;

/* Failover already in progress? */

if (master->flags & SRI_FAILOVER_IN_PROGRESS) return 0;

/* Last failover attempt started too little time ago? */

//failover_start_time 可以理解为一个锁，直到上次故障转移的时间超过failover_start_time 的两倍，才可以开始新的故障转移，默认三分钟。

if (mstime() - master->failover_start_time <

master->failover_timeout*2)

{

if (master->failover_delay_logged != master->failover_start_time) {

tim
e_t clock = (master->failover_start_time +

master->failover_timeout*2) / 1000;

char ctimebuf[26];

ctime_r(&clock,ctimebuf);

ctimebuf[24] = ‘\0’; /* Remove newline. */

master->failover_delay_logged = master->failover_start_time;

serverLog(LL_WARNING,

“Next failover delay: I will not start a failover before %s”,

ctimebuf);

}

return 0;

}

sentinelStartFailover(master);

return 1;

}

4、主持大局

/* Setup the master state to start a failover. */

void sentinelStartFailover(sentinelRedisInstance *master) {

serverAssert(master->flags & SRI_MASTER);

master->failover_state = SENTINEL_FAILOVER_STATE_WAIT_START;

master->flags |= SRI_FAILOVER_IN_PROGRESS;

master->failover_epoch = ++sentinel.current_epoch;

sentinelEvent(LL_WARNING,“+new-epoch”,master,“%llu”,

(unsigned long long) sentinel.current_epoch);

sentinelEvent(LL_WARNING,“+try-failover”,master,“%@”);

master->failover_start_time = mstime()+rand()%SENTINEL_MAX_DESYNC;

master->failover_state_change_time = mstime();

}

故障转移状态机

现在由我来主持大局，完成这个故障转移工作。

那我是不是得有个指导，或者说执行步骤啊？不然我怎么开展工作呢？

void sentinelFailoverStateMachine(sentinelRedisInstance *ri) {

serverAssert(ri->flags & SRI_MASTER);

if (!(ri->flags & SRI_FAILOVER_IN_PROGRESS)) return;

switch(ri->failover_state) {

case SENTINEL_FAILOVER_STATE_WAIT_START:

sentinelFailoverWaitStart(ri); //统计投票结果

break;

case SENTINEL_FAILOVER_STATE_SELECT_SLAVE:

sentinelFailoverSelectSlave(ri);//选择从节点

break;

case SENTINEL_FAILOVER_STATE_SEND_SLAVEOF_NOONE:

sentinelFailoverSendSlaveOfNoOne(ri);//取消该节点之前的主从关系，晋升成为主节点