【redis源码分析】Redis Sentinel 是如何实际解决分布式共识问题的

1、它不再执行任何操作,如故障转移

2、当其他Sentinel节点询问它对于某个主节点主观下线的判定结果时,它将返回节点未下线的判定结果

3、如果TITL模式下Sentinel机制可以正常运行30秒,则该节点退出TITL模式


故障转移主逻辑


/* Perform scheduled operations for all the instances in the dictionary.

  • Recursively call the function against dictionaries of slaves. */

void sentinelHandleDictOfRedisInstances(dict *instances) {

dictIterator *di;

dictEntry *de;

sentinelRedisInstance *switch_to_promoted = NULL;

/* There are a number of things we need to perform against every master. */

di = dictGetIterator(instances);

while((de = dictNext(di)) != NULL) {

sentinelRedisInstance *ri = dictGetVal(de);

sentinelHandleRedisInstance(ri); //调用主逻辑函数

if (ri->flags & SRI_MASTER) {

//如果当前处理的是主节点,还需要递归处理主节点实例下的slaves 和 sentinels

sentinelHandleDictOfRedisInstances(ri->slaves);

sentinelHandleDictOfRedisInstances(ri->sentinels);

if (ri->failover_state == SENTINEL_FAILOVER_STATE_UPDATE_CONFIG) {

switch_to_promoted = ri;

}

}

}

//完成故障转移的最后一步

if (switch_to_promoted)

sentinelFailoverSwitchToPromotedSlave(switch_to_promoted);

dictReleaseIterator(di);

}


/* ======================== SENTINEL timer handler ==========================

  • This is the “main” our Sentinel, being sentinel completely non blocking

  • in design. The function is called every second.

  • -------------------------------------------------------------------------- */

/* Perform scheduled operations for the specified Redis instance. */

void sentinelHandleRedisInstance(sentinelRedisInstance *ri) {

/* ========== MONITORING HALF ============ */

/* Every kind of instance */

sentinelReconnectInstance(ri); //建立网络连接

sentinelSendPeriodicCommands(ri);

/* ============== ACTING HALF ============= */

/* We don’t proceed with the acting half if we are in TILT mode.

  • TILT happens when we find something odd with the time, like a

  • sudden change in the clock. */

if (sentinel.tilt) {

if (mstime()-sentinel.tilt_start_time < SENTINEL_TILT_PERIOD) return;

sentinel.tilt = 0;

sentinelEvent(LL_WARNING,“-tilt”,NULL,“#tilt mode exited”);

}

/* Every kind of instance */

sentinelCheckSubjectivelyDown(ri); //检查是否存在主观下线的节点

/* Masters and slaves */

if (ri->flags & (SRI_MASTER|SRI_SLAVE)) {

/* Nothing so far. */

}

/* Only masters */

if (ri->flags & SRI_MASTER) { //只对主节点执行

sentinelCheckObjectivelyDown(ri); //检查是否存在客观下线的节点

if (sentinelStartFailoverIfNeeded(ri)) //判断是够可以进行故障转移

sentinelAskMasterStateToOtherSentinels(ri,SENTINEL_ASK_FORCED); //发送投票请求

sentinelFailoverStateMachine(ri); //实现一个故障转移状态机,实现故障转移逻辑

sentinelAskMasterStateToOtherSentinels(ri,SENTINEL_NO_FLAGS); //询问其他 Sentinel 节点对该节点主观下线的判定结果

}

}

主观下线:我个人认为你下线了

客观下线:超过半数的人都认为你下线了


心跳监测


Sentinel 会定时发送消息给主从节点和其他 Sentinel 节点,看它们还活着不:

/* Send periodic PING, INFO, and PUBLISH to the Hello channel to

  • the specified master or slave instance. */

void sentinelSendPeriodicCommands(sentinelRedisInstance *ri) {

mstime_t now = mstime();

mstime_t info_period, ping_period;

int retval;

/* Return ASAP if we have already a PING or INFO already pending, or

  • in the case the instance is not properly connected. */

if (ri->link->disconnected) return;

/* For INFO, PING, PUBLISH that are not critical commands to send we

  • also have a limit of SENTINEL_MAX_PENDING_COMMANDS. We don’t

  • want to use a lot of memory just because a link is not working

  • properly (note that anyway there is a redundant protection about this,

  • that is, the link will be disconnected and reconnected if a long

  • timeout condition is detected. */

if (ri->link->pending_commands >=

SENTINEL_MAX_PENDING_COMMANDS * ri->link->refcount) return;

/* If this is a slave of a master in O_DOWN condition we start sending

  • it INFO every second, instead of the usual SENTINEL_INFO_PERIOD

  • period. In this state we want to closely monitor slaves in case they

  • are turned into masters by another Sentinel, or by the sysadmin.

  • Similarly we monitor the INFO output more often if the slave reports

  • to be disconnected from the master, so that we can have a fresh

  • disconnection time figure. */

if ((ri->flags & SRI_SLAVE) &&

((ri->master->flags & (SRI_O_DOWN|SRI_FAILOVER_IN_PROGRESS)) ||

(ri->master_link_down_time != 0)))

{

info_period = 1000;

} else {

info_period = SENTINEL_INFO_PERIOD;

}

/* We ping instances every time the last received pong is older than

  • the configured ‘down-after-milliseconds’ time, but every second

  • anyway if ‘down-after-milliseconds’ is greater than 1 second. */

ping_period = ri->down_after_period;

if (ping_period > SENTINEL_PING_PERIOD) ping_period = SENTINEL_PING_PERIOD;

/* Send INFO to masters and slaves, not sentinels. */

if ((ri->flags & SRI_SENTINEL) == 0 &&

(ri->info_refresh == 0 ||

(now - ri->info_refresh) > info_period))

{

retval = redisAsyncCommand(ri->link->cc,

sentinelInfoReplyCallback, ri, “%s”,

sentinelInstanceMapCommand(ri,“INFO”));

if (retval == C_OK) ri->link->pending_commands++;

}

/* Send PING to all the three kinds of instances. */

if ((now - ri->link->last_pong_time) > ping_period &&

(now - ri->link->last_ping_time) > ping_period/2) {

sentinelSendPing(ri);

}

/* PUBLISH hello messages to all the three kinds of instances. */

if ((now - ri->last_pub_time) > SENTINEL_PUBLISH_PERIOD) {

sentinelSendHello(ri);

}

}


判断下线 及 投票表决


首先,这种事情需要我自己先说服我自己,他已经挂了,所以:

/* ===================== SENTINEL availability checks ======================= */

/* Is this instance down from our point of view? */

void sentinelCheckSubjectivelyDown(sentinelRedisInstance *ri) {

mstime_t elapsed = 0;

//计算目标节点上次响应后过去的时间

if (ri->link->act_ping_time)

elapsed = mstime() - ri->link->act_ping_time;

else if (ri->link->disconnected)

elapsed = mstime() - ri->link->last_avail_time;

/* Check if we are in need for a reconnection of one of the

  • links, because we are detecting low activity.

    1. Check if the command link seems connected, was connected not less
  • than SENTINEL_MIN_LINK_RECONNECT_PERIOD, but still we have a

  • pending ping for more than half the timeout. */

if (ri->link->cc &&

(mstime() - ri->link->cc_conn_time) >

SENTINEL_MIN_LINK_RECONNECT_PERIOD &&

ri->link->act_ping_time != 0 && /* There is a pending ping… */

/* The pending ping is delayed, and we did not receive

  • error replies as well. */

(mstime() - ri->link->act_ping_time) > (ri->down_after_period/2) &&

(mstime() - ri->link->last_pong_time) > (ri->down_after_period/2))

{

instanceLinkCloseConnection(ri->link,ri->link->cc);

}

/* 2) Check if the pubsub link seems connected, was connected not less

  • than SENTINEL_MIN_LINK_RECONNECT_PERIOD, but still we have no

  • activity in the Pub/Sub channel for more than

  • SENTINEL_PUBLISH_PERIOD * 3.

*/

if (ri->link->pc &&

(mstime() - ri->link->pc_conn_time) >

SENTINEL_MIN_LINK_RECONNECT_PERIOD &&

(mstime() - ri->link->pc_last_activity) > (SENTINEL_PUBLISH_PERIOD*3))

{

instanceLinkCloseConnection(ri->link,ri->link->pc);

}

/* Update the SDOWN flag. We believe the instance is SDOWN if:

    1. It is not replying.
    1. We believe it is a master, it reports to be a slave for enough time
  • to meet the down_after_period, plus enough time to get two times

  • INFO report from the instance. */

if (elapsed > ri->down_after_period ||

(ri->flags & SRI_MASTER &&

ri->role_reported == SRI_SLAVE &&

mstime() - ri->role_reported_time >

(ri->down_after_period+SENTINEL_INFO_PERIOD*2)))

{

/* Is subjectively down */

if ((ri->flags & SRI_S_DOWN) == 0) {

sentinelEvent(LL_WARNING,“+sdown”,ri,“%@”);

ri->s_down_since_time = mstime();

ri->flags |= SRI_S_DOWN;

}

} else {

/* Is subjectively up */

if (ri->flags & SRI_S_DOWN) {

sentinelEvent(LL_WARNING,“-sdown”,ri,“%@”);

ri->flags &= ~(SRI_S_DOWN|SRI_SCRIPT_KILL_SENT);

}

}

}


我说服了自己之后,为了避免决策失误,我便开始问询身边同频的朋友的意见:

//该函数内含选举逻辑

//其他 Sentinel 节点会回复一个标志位,如果为 true,则代表他也认为那个节点下线了

void sentinelAskMasterStateToOtherSentinels(sentinelRedisInstance *master, int flags) {

dictIterator *di;

dictEntry *de;

di = dictGetIterator(master->sentinels);

while((de = dictNext(di)) != NULL) {

sentinelRedisInstance *ri = dictGetVal(de);

mstime_t elapsed = mstime() - ri->last_master_down_reply_time;

char port[32];

int retval;

/* If the master state from other sentinel is too old, we clear it. */

if (elapsed > SENTINEL_ASK_PERIOD*5) {

ri->flags &= ~SRI_MASTER_DOWN;

sdsfree(ri->leader);

ri->leader = NULL;

}

/* Only ask if master is down to other sentinels if:

    1. We believe it is down, or there is a failover in progress.
    1. Sentinel is connected.
    1. We did not receive the info within SENTINEL_ASK_PERIOD ms. */

if ((master->flags & SRI_S_DOWN) == 0) continue;

if (ri->link->disconnected) continue;

if (!(flags & SENTINEL_ASK_FORCED) &&

mstime() - ri->last_master_down_reply_time < SENTINEL_ASK_PERIOD)

continue;

/* Ask */

ll2string(port,sizeof(port),master->addr->port);

retval = redisAsyncCommand(ri->link->cc,

sentinelReceiveIsMasterDownReply, ri,

“%s is-master-down-by-addr %s %s %llu %s”,

sentinelInstanceMapCommand(ri,“SENTINEL”),

master->addr->ip, port,

sentinel.current_epoch,

(master->failover_state > SENTINEL_FAILOVER_STATE_NONE) ?

sentinel.myid : “*”);

if (retval == C_OK) ri->link->pending_commands++;

}

dictReleaseIterator(di);

}


投票选举 leader哨兵


现在认定他挂了,我们一群监视的要推举一个主事儿的来料理他的后事,由于是我先发现他不对劲儿的,也是我先获取了他最终挂掉的信息,所以我抢先发起了料理后事的请求,其他哨兵只能先给我投票,如果我落选了,他们才有机会发起选举:

1、拉票

void sentinelAskMasterStateToOtherSentinels(sentinelRedisInstance *master, int flags) {

dictIterator *di;

dictEntry *de;

di = dictGetIterator(master->sentinels);

while((de = dictNext(di)) != NULL) {

sentinelRedisInstance *ri = dictGetVal(de);

mstime_t elapsed = mstime() - ri->last_master_down_reply_time;

char port[32];

int retval;

/* If the master state from other sentinel is too old, we clear it. */

if (elapsed > SENTINEL_ASK_PERIOD*5) {

ri->flags &= ~SRI_MASTER_DOWN;

sdsfree(ri->leader);

ri->leader = NULL;

}

/* Only ask if master is down to other sentinels if:

    1. We believe it is down, or there is a failover in progress.
    1. Sentinel is connected.
    1. We did not receive the info within SENTINEL_ASK_PERIOD ms. */

if ((master->flags & SRI_S_DOWN) == 0) continue;

if (ri->link->disconnected) continue;

if (!(flags & SENTINEL_ASK_FORCED) &&

mstime() - ri->last_master_down_reply_time < SENTINEL_ASK_PERIOD)

continue;

/* Ask */

ll2string(port,sizeof(port),master->addr->port);

retval = redisAsyncCommand(ri->link->cc,

sentinelReceiveIsMasterDownReply, ri,

“%s is-master-down-by-addr %s %s %llu %s”,

sentinelInstanceMapCommand(ri,“SENTINEL”),

master->addr->ip, port,

sentinel.current_epoch,

(master->failover_state > SENTINEL_FAILOVER_STATE_NONE) ?

sentinel.myid : “*”);

if (retval == C_OK) ri->link->pending_commands++;

}

dictReleaseIterator(di);

}

2、投票

/* Vote for the sentinel with ‘req_runid’ or return the old vote if already

  • voted for the specified ‘req_epoch’ or one greater.

  • If a vote is not available returns NULL, otherwise return the Sentinel

  • runid and populate the leader_epoch with the epoch of the vote. */

char *sentinelVoteLeader(sentinelRedisInstance *master, uint64_t req_epoch, char *req_runid, uint64_t *leader_epoch) {

if (req_epoch > sentinel.current_epoch) {

sentinel.current_epoch = req_epoch;

sentinelFlushConfig();

sentinelEvent(LL_WARNING,“+new-epoch”,master,“%llu”,

(unsigned long long) sentinel.current_epoch);

}

if (master->leader_epoch < req_epoch && sentinel.current_epoch <= req_epoch)

{

sdsfree(master->leader);

master->leader = sdsnew(req_runid);

master->leader_epoch = sentinel.current_epoch;

sentinelFlushConfig();

sentinelEvent(LL_WARNING,“+vote-for-leader”,master,“%s %llu”,

master->leader, (unsigned long long) master->leader_epoch);

/* If we did not voted for ourselves, set the master failover start

  • time to now, in order to force a delay before we can start a

  • failover for the same master. */

if (strcasecmp(master->leader,sentinel.myid))

master->failover_start_time = mstime()+rand()%SENTINEL_MAX_DESYNC;

}

*leader_epoch = master->leader_epoch;

return master->leader ? sdsnew(master->leader) : NULL;

}

3、确定干活

/* This function checks if there are the conditions to start the failover,

  • that is:

    1. Master must be in ODOWN condition.
    1. No failover already in progress.
    1. No failover already attempted recently.
  • We still don’t know if we’ll win the election so it is possible that we

  • start the failover but that we’ll not be able to act.

  • Return non-zero if a failover was started. */

int sentinelStartFailoverIfNeeded(sentinelRedisInstance *master) {

/* We can’t failover if the master is not in O_DOWN state. */

if (!(master->flags & SRI_O_DOWN)) return 0;

/* Failover already in progress? */

if (master->flags & SRI_FAILOVER_IN_PROGRESS) return 0;

/* Last failover attempt started too little time ago? */

//failover_start_time 可以理解为一个锁,直到上次故障转移的时间超过failover_start_time 的两倍,才可以开始新的故障转移,默认三分钟。

if (mstime() - master->failover_start_time <

master->failover_timeout*2)

{

if (master->failover_delay_logged != master->failover_start_time) {

tim
e_t clock = (master->failover_start_time +

master->failover_timeout*2) / 1000;

char ctimebuf[26];

ctime_r(&clock,ctimebuf);

ctimebuf[24] = ‘\0’; /* Remove newline. */

master->failover_delay_logged = master->failover_start_time;

serverLog(LL_WARNING,

“Next failover delay: I will not start a failover before %s”,

ctimebuf);

}

return 0;

}

sentinelStartFailover(master);

return 1;

}

4、主持大局

/* Setup the master state to start a failover. */

void sentinelStartFailover(sentinelRedisInstance *master) {

serverAssert(master->flags & SRI_MASTER);

master->failover_state = SENTINEL_FAILOVER_STATE_WAIT_START;

master->flags |= SRI_FAILOVER_IN_PROGRESS;

master->failover_epoch = ++sentinel.current_epoch;

sentinelEvent(LL_WARNING,“+new-epoch”,master,“%llu”,

(unsigned long long) sentinel.current_epoch);

sentinelEvent(LL_WARNING,“+try-failover”,master,“%@”);

master->failover_start_time = mstime()+rand()%SENTINEL_MAX_DESYNC;

master->failover_state_change_time = mstime();

}


故障转移状态机


现在由我来主持大局,完成这个故障转移工作。

那我是不是得有个指导,或者说执行步骤啊?不然我怎么开展工作呢?

void sentinelFailoverStateMachine(sentinelRedisInstance *ri) {

serverAssert(ri->flags & SRI_MASTER);

if (!(ri->flags & SRI_FAILOVER_IN_PROGRESS)) return;

switch(ri->failover_state) {

case SENTINEL_FAILOVER_STATE_WAIT_START:

sentinelFailoverWaitStart(ri); //统计投票结果

break;

case SENTINEL_FAILOVER_STATE_SELECT_SLAVE:

sentinelFailoverSelectSlave(ri);//选择从节点

break;

case SENTINEL_FAILOVER_STATE_SEND_SLAVEOF_NOONE:

sentinelFailoverSendSlaveOfNoOne(ri);//取消该节点之前的主从关系,晋升成为主节点

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值