redis之辛勤的哨兵(五)你一个人说了不算

前几部分讲解了哨兵监控master,当master出现异常时将自动切换,从replicate中选择一个最优的replicate升级为新的master,然后通知其他replicate从新的master进行数据同步。
然而只有一个哨兵进行监控,当网络波动或者哨兵本身出现问题时,这样非常容易出现误判,导致master切换。所以本文主要讲解哨兵集群,多个哨兵共同监控master,多个哨兵都认为master下线才确定master下线。

一、如何发现其他哨兵

请添加图片描述
每个哨兵配置中,只配置了需要监控的master的地址,并没有配置其他哨兵,如何发现呢?这里发布订阅就派上用场了。

1.1 订阅hello通道

在哨兵和master建立连接命令连接时,同时也建立了一个专门用于发布订阅的连接。并且订阅了__sentinel__:hello通道。

sentinelTimer() ->
	sentinelHandleDictOfRedisInstances() ->
		sentinelHandleRedisInstance() ->
			sentinelReconnectInstance()
#define SENTINEL_HELLO_CHANNEL "__sentinel__:hello"
void sentinelReconnectInstance(sentinelRedisInstance *ri) {
...
/* Pub / Sub */
    if ((ri->flags & (SRI_MASTER|SRI_SLAVE)) && link->pc == NULL) {
        link->pc = redisAsyncConnectBind(ri->addr->ip,ri->addr->port,NET_FIRST_BIND_ADDR);
       ...
  
            sentinelSetClientName(ri,link->pc,"pubsub");
            /* Now we subscribe to the Sentinels "Hello" channel. */
            retval = redisAsyncCommand(link->pc,
                sentinelReceiveHelloMessages, ri, "%s %s",
                sentinelInstanceMapCommand(ri,"SUBSCRIBE"),
                SENTINEL_HELLO_CHANNEL);
            ...
        }
    }

1.2 不间断发送哨兵自身消息

哨兵定期的将自身信息以及监控的master信息通过__sentinel__:hello通道发出去,所有订阅了此通道的哨兵都会收到信息,这样哨兵就知道了其他的哨兵。可以看到是2s发送一次当前哨兵信息。

sentinelTimer() ->
	sentinelHandleDictOfRedisInstances() ->
		sentinelHandleRedisInstance() ->
			sentinelSendPeriodicCommands()
#define SENTINEL_PUBLISH_PERIOD 2000
void sentinelSendPeriodicCommands(sentinelRedisInstance *ri) {
...
 /* PUBLISH hello messages to all the three kinds of instances. */
    if ((now - ri->last_pub_time) > SENTINEL_PUBLISH_PERIOD) {
        sentinelSendHello(ri);
    }
}
int sentinelSendHello(sentinelRedisInstance *ri) {
    char ip[NET_IP_STR_LEN];
    char payload[NET_IP_STR_LEN+1024];
 	...
 	
    /* Format and send the Hello message. */
    snprintf(payload,sizeof(payload),
        "%s,%d,%s,%llu," /* Info about this sentinel. */
        "%s,%s,%d,%llu", /* Info about current master. */
        announce_ip, announce_port, sentinel.myid,
        (unsigned long long) sentinel.current_epoch,
        /* --- */
        master->name,announceSentinelAddr(master_addr),master_addr->port,
        (unsigned long long) master->config_epoch);
    retval = redisAsyncCommand(ri->link->cc,
        sentinelPublishReplyCallback, ri, "%s %s %s",
        sentinelInstanceMapCommand(ri,"PUBLISH"),
        SENTINEL_HELLO_CHANNEL,payload);
    ...
    
    return C_OK;
}

1.3 哨兵接收处理hello消息

在订阅__sentinel__:hello通道时,注册了回调函数sentinelReceiveHelloMessages进行处理其他哨兵发布的hello消息。

/* This is our Pub/Sub callback for the Hello channel. It's useful in order
 * to discover other sentinels attached at the same master. */
void sentinelReceiveHelloMessages(redisAsyncContext *c, void *reply, void *privdata) {
    sentinelRedisInstance *ri = privdata;
    redisReply *r;
    UNUSED(c);

    if (!reply || !ri) return;
    r = reply;

    /* Update the last activity in the pubsub channel. Note that since we
     * receive our messages as well this timestamp can be used to detect
     * if the link is probably disconnected even if it seems otherwise. */
    ri->link->pc_last_activity = mstime();

    /* Sanity check in the reply we expect, so that the code that follows
     * can avoid to check for details. */
    if (r->type != REDIS_REPLY_ARRAY ||
        r->elements != 3 ||
        r->element[0]->type != REDIS_REPLY_STRING ||
        r->element[1]->type != REDIS_REPLY_STRING ||
        r->element[2]->type != REDIS_REPLY_STRING ||
        strcmp(r->element[0]->str,"message") != 0) return;

    /* We are not interested in meeting ourselves */
    if (strstr(r->element[2]->str,sentinel.myid) != NULL) return;

    sentinelProcessHelloMessage(r->element[2]->str, r->element[2]->len);
}

从消息中获取哨兵的IP,port,runid,epoch,master的name,IP,port,epoch,并建立哨兵对象

void sentinelProcessHelloMessage(char *hello, int hello_len) {
    /* Format is composed of 8 tokens:
     * 0=ip,1=port,2=runid,3=current_epoch,4=master_name,
     * 5=master_ip,6=master_port,7=master_config_epoch. */
    int numtokens, port, removed, master_port;
    uint64_t current_epoch, master_config_epoch;
    char **token = sdssplitlen(hello, hello_len, ",", 1, &numtokens);
    sentinelRedisInstance *si, *master;

    if (numtokens == 8) {
        /* Obtain a reference to the master this hello message is about */
        master = sentinelGetMasterByName(token[4]);
        if (!master) goto cleanup; /* Unknown master, skip the message. */

        /* First, try to see if we already have this sentinel. */
        port = atoi(token[1]);
        master_port = atoi(token[6]);
        si = getSentinelRedisInstanceByAddrAndRunID(
                        master->sentinels,token[0],port,token[2]);
        current_epoch = strtoull(token[3],NULL,10);
        master_config_epoch = strtoull(token[7],NULL,10);

        if (!si) {
            /* If not, remove all the sentinels that have the same runid
             * because there was an address change, and add the same Sentinel
             * with the new address back. */
            removed = removeMatchingSentinelFromMaster(master,token[2]);
            if (removed) {
                sentinelEvent(LL_NOTICE,"+sentinel-address-switch",master,
                    "%@ ip %s port %d for %s", token[0],port,token[2]);
            } else {
                /* Check if there is another Sentinel with the same address this
                 * new one is reporting. What we do if this happens is to set its
                 * port to 0, to signal the address is invalid. We'll update it
                 * later if we get an HELLO message. */
                sentinelRedisInstance *other =
                    getSentinelRedisInstanceByAddrAndRunID(
                        master->sentinels, token[0],port,NULL);
                if (other) {
                    sentinelEvent(LL_NOTICE,"+sentinel-invalid-addr",other,"%@");
                    other->addr->port = 0; /* It means: invalid address. */
                    sentinelUpdateSentinelAddressInAllMasters(other);
                }
            }

            /* Add the new sentinel. */
            si = createSentinelRedisInstance(token[2],SRI_SENTINEL,
                            token[0],port,master->quorum,master);

            if (si) {
                if (!removed) sentinelEvent(LL_NOTICE,"+sentinel",si,"%@");
                /* The runid is NULL after a new instance creation and
                 * for Sentinels we don't have a later chance to fill it,
                 * so do it now. */
                si->runid = sdsnew(token[2]);
                sentinelTryConnectionSharing(si);
                if (removed) sentinelUpdateSentinelAddressInAllMasters(si);
                sentinelFlushConfig();
            }
        }

        /* Update local current_epoch if received current_epoch is greater.*/
        if (current_epoch > sentinel.current_epoch) {
            sentinel.current_epoch = current_epoch;
            sentinelFlushConfig();
            sentinelEvent(LL_WARNING,"+new-epoch",master,"%llu",
                (unsigned long long) sentinel.current_epoch);
        }

        /* Update master info if received configuration is newer. */
        if (si && master->config_epoch < master_config_epoch) {
            master->config_epoch = master_config_epoch;
            if (master_port != master->addr->port ||
                !sentinelAddrEqualsHostname(master->addr, token[5]))
            {
                sentinelAddr *old_addr;

                sentinelEvent(LL_WARNING,"+config-update-from",si,"%@");
                sentinelEvent(LL_WARNING,"+switch-master",
                    master,"%s %s %d %s %d",
                    master->name,
                    announceSentinelAddr(master->addr), master->addr->port,
                    token[5], master_port);

                old_addr = dupSentinelAddr(master->addr);
                sentinelResetMasterAndChangeAddress(master, token[5], master_port);
                sentinelCallClientReconfScript(master,
                    SENTINEL_OBSERVER,"start",
                    old_addr,master->addr);
                releaseSentinelAddr(old_addr);
            }
        }

        /* Update the state of the Sentinel. */
        if (si) si->last_hello_time = mstime();
    }

cleanup:
    sdsfreesplitres(token,numtokens);
}

1.4 为啥要不间断的发送hello消息呢?

  1. 对于发布订阅来说,消息是不存储的,消息发布的时候,如果此时你没有订阅,你是收不到消息的,即使你后续订阅了,也收不到那条消息,相当于消息丢失,所以需要发送多次
  2. 对于哨兵来说随时都可能加入新的哨兵,所以需要不时的发送消息,快速的将哨兵加入到集群中来,完成拓扑关系

请添加图片描述
上图过程中,sentinel1发布消息时,sentinel3还没有订阅,所以需要后续再发布信息,经过一段时间后,每个哨兵都包含了其他所有哨兵的信息。
并且和其他哨兵也建立了连接。
请添加图片描述

二、如何确定master下线

下线状态分为主观下线和客观下线。
主观下线:某个哨兵认为监控的某个master异常
客观下线:多数哨兵都认为监控的某个master异常

只有客观下线的master才被认为异常,开始进行故障切换。

2.1 判断下线

sentinelTimer() ->
	sentinelHandleDictOfRedisInstances() ->
		sentinelHandleRedisInstance()

2.1.1 主观下线

void sentinelHandleRedisInstance(sentinelRedisInstance *ri) {
...
    sentinelCheckSubjectivelyDown(ri);
...
}
void sentinelCheckSubjectivelyDown(sentinelRedisInstance *ri) {
...
 /* Update the SDOWN flag. We believe the instance is SDOWN if:
     *
     * 1) It is not replying.
     * 2) We believe it is a master, it reports to be a slave for enough time
     *    to meet the down_after_period, plus enough time to get two times
     *    INFO report from the instance. */
    if (elapsed > ri->down_after_period ||
        (ri->flags & SRI_MASTER &&
         ri->role_reported == SRI_SLAVE &&
         mstime() - ri->role_reported_time >
          (ri->down_after_period+SENTINEL_INFO_PERIOD*2)))
    {
        /* Is subjectively down */
        if ((ri->flags & SRI_S_DOWN) == 0) {
            sentinelEvent(LL_WARNING,"+sdown",ri,"%@");
            ri->s_down_since_time = mstime();
            ri->flags |= SRI_S_DOWN;
        }
    }
...
}

2.1.2 客观下线

void sentinelHandleRedisInstance(sentinelRedisInstance *ri) {
    ...
	
	 /* Only masters */
    if (ri->flags & SRI_MASTER) {
    	//判断客观下线
        sentinelCheckObjectivelyDown(ri);
        ...
    }

遍历监控当前master的所有哨兵,看他们监控的状态, 当大于等于quorum个哨兵都认为master下线,则达成统一,一致认为下线。而quorum是配置的,
sentinel monitor <master-name> <ip> <redis-port> <quorum>,只有大于等于quorum个哨兵认为master下线才确定master下线。

void sentinelCheckObjectivelyDown(sentinelRedisInstance *master) {
    dictIterator *di;
    dictEntry *de;
    unsigned int quorum = 0, odown = 0;

    if (master->flags & SRI_S_DOWN) {
        /* Is down for enough sentinels? */
        quorum = 1; /* the current sentinel. */
        /* Count all the other sentinels. */
        di = dictGetIterator(master->sentinels);
        while((de = dictNext(di)) != NULL) {
            sentinelRedisInstance *ri = dictGetVal(de);

            if (ri->flags & SRI_MASTER_DOWN) quorum++;
        }
        dictReleaseIterator(di);
        if (quorum >= master->quorum) odown = 1;
    }

    /* Set the flag accordingly to the outcome. */
    if (odown) {
        if ((master->flags & SRI_O_DOWN) == 0) {
            sentinelEvent(LL_WARNING,"+odown",master,"%@ #quorum %d/%d",
                quorum, master->quorum);
            master->flags |= SRI_O_DOWN;
            master->o_down_since_time = mstime();
        }
    } else {
        if (master->flags & SRI_O_DOWN) {
            sentinelEvent(LL_WARNING,"-odown",master,"%@");
            master->flags &= ~SRI_O_DOWN;
        }
    }
}

2.2 询问其他哨兵意见

在2.1中判断客观下线时使用了其他哨兵对于master的监控状态,那这些状态是什么时候获取的呢?
哨兵主动发送请求给其他哨兵

2.2.1 发送询问请求

sentinelTimer() ->
	sentinelHandleDictOfRedisInstances() ->
		sentinelHandleRedisInstance() ->
			sentinelAskMasterStateToOtherSentinels()

遍历监控当前master的所有哨兵,然后逐一发送is-master-down-by-addr命令。

#define SENTINEL_ASK_FORCED (1<<0)
void sentinelAskMasterStateToOtherSentinels(sentinelRedisInstance *master, int flags) {
    dictIterator *di;
    dictEntry *de;

    di = dictGetIterator(master->sentinels);
    while((de = dictNext(di)) != NULL) {
        sentinelRedisInstance *ri = dictGetVal(de);
        mstime_t elapsed = mstime() - ri->last_master_down_reply_time;
        char port[32];
        int retval;

      ...

        /* Ask */
        ll2string(port,sizeof(port),master->addr->port);
        retval = redisAsyncCommand(ri->link->cc,
                    sentinelReceiveIsMasterDownReply, ri,
                    "%s is-master-down-by-addr %s %s %llu %s",
                    sentinelInstanceMapCommand(ri,"SENTINEL"),
                    announceSentinelAddr(master->addr), port,
                    sentinel.current_epoch,
                    (master->failover_state > SENTINEL_FAILOVER_STATE_NONE) ?
                    sentinel.myid : "*");
      		...
    }
    dictReleaseIterator(di);
}

2.2.2 处理响应

根据注册的响应处理函数,从响应中获取状态。返回0则表示哨兵认为master在线,返回1则表示哨兵认为master下线。

void sentinelReceiveIsMasterDownReply(redisAsyncContext *c, void *reply, void *privdata) {
    sentinelRedisInstance *ri = privdata;
    instanceLink *link = c->data;
    redisReply *r;

    if (!reply || !link) return;
    link->pending_commands--;
    r = reply;

    /* Ignore every error or unexpected reply.
     * Note that if the command returns an error for any reason we'll
     * end clearing the SRI_MASTER_DOWN flag for timeout anyway. */
    if (r->type == REDIS_REPLY_ARRAY && r->elements == 3 &&
        r->element[0]->type == REDIS_REPLY_INTEGER &&
        r->element[1]->type == REDIS_REPLY_STRING &&
        r->element[2]->type == REDIS_REPLY_INTEGER)
    {
        ri->last_master_down_reply_time = mstime();
        if (r->element[0]->integer == 1) {
            ri->flags |= SRI_MASTER_DOWN;
        } else {
            ri->flags &= ~SRI_MASTER_DOWN;
        }
       ...
    }
}

2.2.3 其他哨兵处理询问请求

对于其他哨兵收到请求的处理

struct redisCommand sentinelcmds[] = {
   ...
    {"sentinel",sentinelCommand,-2,"admin",0,NULL,0,0,0,0,0},
    ...
};
void sentinelCommand(client *c) {
    ...
    else if (!strcasecmp(c->argv[1]->ptr,"is-master-down-by-addr")) {
        /* SENTINEL IS-MASTER-DOWN-BY-ADDR <ip> <port> <current-epoch> <runid>
         *
         * Arguments:
         *
         * ip and port are the ip and port of the master we want to be
         * checked by Sentinel. Note that the command will not check by
         * name but just by master, in theory different Sentinels may monitor
         * different masters with the same name.
         *
         * current-epoch is needed in order to understand if we are allowed
         * to vote for a failover leader or not. Each Sentinel can vote just
         * one time per epoch.
         *
         * runid is "*" if we are not seeking for a vote from the Sentinel
         * in order to elect the failover leader. Otherwise it is set to the
         * runid we want the Sentinel to vote if it did not already voted.
         */
        sentinelRedisInstance *ri;
        long long req_epoch;
        uint64_t leader_epoch = 0;
        char *leader = NULL;
        long port;
        int isdown = 0;

        if (c->argc != 6) goto numargserr;
        if (getLongFromObjectOrReply(c,c->argv[3],&port,NULL) != C_OK ||
            getLongLongFromObjectOrReply(c,c->argv[4],&req_epoch,NULL)
                                                              != C_OK)
            return;
        ri = getSentinelRedisInstanceByAddrAndRunID(sentinel.masters,
            c->argv[2]->ptr,port,NULL);

        /* It exists? Is actually a master? Is subjectively down? It's down.
         * Note: if we are in tilt mode we always reply with "0". */
        if (!sentinel.tilt && ri && (ri->flags & SRI_S_DOWN) &&
                                    (ri->flags & SRI_MASTER))
            isdown = 1;
            
     	...
     	
        /* Reply with a three-elements multi-bulk reply:
         * down state, leader, vote epoch. */
        addReplyArrayLen(c,3);
        addReply(c, isdown ? shared.cone : shared.czero);
        addReplyBulkCString(c, leader ? leader : "*");
        addReplyLongLong(c, (long long)leader_epoch);
        if (leader) sdsfree(leader);
    } 

请添加图片描述

三、谁来主持故障切换

一个master有多个哨兵在监控,当master下线时,那由哪个哨兵来进行切换呢?如果都来切换一切都乱套了,所以必须有一个老大来处理,那谁来当老大呢?都不服谁,那就来个投票吧,少数服从多数。

3.1 投票选举

当故障后,依然使用的是sentinel is-master-down-by-addr命令进行投票,和状态获取不同的是runid字段传递的是实际master的runid,不是*

3.1.1 哨兵投票请求

检查是否需要故障切换

sentinelStartFailoverIfNeeded(sentinelRedisInstance *master)
{
...
	sentinelStartFailover(master);
...
}

需要故障切换,设置状态机为SENTINEL_FAILOVER_STATE_WAIT_START,并且epoch自增,这个epoch很重要,类似于版本号,后续epoch越大的master就会被认可,所以epoch也相当于投票

void sentinelStartFailover(sentinelRedisInstance *master) {
   ...
    master->failover_state = SENTINEL_FAILOVER_STATE_WAIT_START;
    master->flags |= SRI_FAILOVER_IN_PROGRESS;
    master->failover_epoch = ++sentinel.current_epoch;
    ...
}

当开始故障切换时,使用sentinelAskMasterStateToOtherSentinels就是投票,并且runnid传输的是当前master的runnid(唯一标识符),表示开始投票啦,我投我一票(runnid是自己),看你们投给谁?

void sentinelAskMasterStateToOtherSentinels(sentinelRedisInstance *master, int flags) {
...
 /* Ask */
        ll2string(port,sizeof(port),master->addr->port);
        retval = redisAsyncCommand(ri->link->cc,
                    sentinelReceiveIsMasterDownReply, ri,
                    "%s is-master-down-by-addr %s %s %llu %s",
                    sentinelInstanceMapCommand(ri,"SENTINEL"),
                    announceSentinelAddr(master->addr), port,
                    sentinel.current_epoch,
                    (master->failover_state > SENTINEL_FAILOVER_STATE_NONE) ?
                    sentinel.myid : "*");

3.1.2 其他哨兵处理投票请求

对于其他哨兵处理is-master-down-by-addr请求时,会判断runnid是不是*,如果不是*,则会进行leader的投票。

void sentinelCommand(client *c) {
...
	} else if (!strcasecmp(c->argv[1]->ptr,"is-master-down-by-addr")) {
	...
	 /* Vote for the master (or fetch the previous vote) if the request
         * includes a runid, otherwise the sender is not seeking for a vote. */
        if (ri && ri->flags & SRI_MASTER && strcasecmp(c->argv[5]->ptr,"*")) {
            leader = sentinelVoteLeader(ri,(uint64_t)req_epoch,
                                            c->argv[5]->ptr,
                                            &leader_epoch);
        }
	...
	}
...
}

根据epoch值判断,值越大,则选那个哨兵,类似投票给那个epoch值大的哨兵。并将被投票的哨兵的runnid返回。

char *sentinelVoteLeader(sentinelRedisInstance *master, uint64_t req_epoch, char *req_runid, uint64_t *leader_epoch) {
    if (req_epoch > sentinel.current_epoch) {
        sentinel.current_epoch = req_epoch;
        sentinelFlushConfig();
        sentinelEvent(LL_WARNING,"+new-epoch",master,"%llu",
            (unsigned long long) sentinel.current_epoch);
    }

    if (master->leader_epoch < req_epoch && sentinel.current_epoch <= req_epoch)
    {
        sdsfree(master->leader);
        master->leader = sdsnew(req_runid);
        master->leader_epoch = sentinel.current_epoch;
        sentinelFlushConfig();
        sentinelEvent(LL_WARNING,"+vote-for-leader",master,"%s %llu",
            master->leader, (unsigned long long) master->leader_epoch);
        /* If we did not voted for ourselves, set the master failover start
         * time to now, in order to force a delay before we can start a
         * failover for the same master. */
        if (strcasecmp(master->leader,sentinel.myid))
            master->failover_start_time = mstime()+rand()%SENTINEL_MAX_DESYNC;
    }

    *leader_epoch = master->leader_epoch;
    return master->leader ? sdsnew(master->leader) : NULL;
}

3.1.3 哨兵处理其他哨兵响应

响应中的runnid就是投票给的哨兵标识。

void sentinelReceiveIsMasterDownReply(redisAsyncContext *c, void *reply, void *privdata) {
...
 if (strcmp(r->element[1]->str,"*")) {
            /* If the runid in the reply is not "*" the Sentinel actually
             * replied with a vote. */
            sdsfree(ri->leader);
            if ((long long)ri->leader_epoch != r->element[2]->integer)
                serverLog(LL_WARNING,
                    "%s voted for %s %llu", ri->name,
                    r->element[1]->str,
                    (unsigned long long) r->element[2]->integer);
            ri->leader = sdsnew(r->element[1]->str);
            ri->leader_epoch = r->element[2]->integer;
        }
...
}

3.1.4 判断谁是哨兵leader

切换开始,进入故障切换的状态机中

sentinelHandleRedisInstance() ->
	sentinelFailoverStateMachine()
void sentinelFailoverStateMachine(sentinelRedisInstance *ri) {
    serverAssert(ri->flags & SRI_MASTER);

    if (!(ri->flags & SRI_FAILOVER_IN_PROGRESS)) return;

    switch(ri->failover_state) {
        case SENTINEL_FAILOVER_STATE_WAIT_START:
            sentinelFailoverWaitStart(ri);
            break;
        case SENTINEL_FAILOVER_STATE_SELECT_SLAVE:
            sentinelFailoverSelectSlave(ri);
            break;
        case SENTINEL_FAILOVER_STATE_SEND_SLAVEOF_NOONE:
            sentinelFailoverSendSlaveOfNoOne(ri);
            break;
        case SENTINEL_FAILOVER_STATE_WAIT_PROMOTION:
            sentinelFailoverWaitPromotion(ri);
            break;
        case SENTINEL_FAILOVER_STATE_RECONF_SLAVES:
            sentinelFailoverReconfNextSlave(ri);
            break;
    }
}

现在故障切换的状态进入到SENTINEL_FAILOVER_STATE_WAIT_START,所以进入到sentinelFailoverWaitStart函数,而sentinelFailoverWaitStart最重要的一件事就是去选举哨兵的leader。

void sentinelFailoverWaitStart(sentinelRedisInstance *ri) {
    char *leader;
    int isleader;

    /* Check if we are the leader for the failover epoch. */
    leader = sentinelGetLeader(ri, ri->failover_epoch);
    isleader = leader && strcasecmp(leader,sentinel.myid) == 0;
    sdsfree(leader);
	
	...
	//哨兵leader获取成功,进入下一个状态
    ri->failover_state = SENTINEL_FAILOVER_STATE_SELECT_SLAVE;
    ri->failover_state_change_time = mstime();
    ...
}
3.1.4.1 计算各个哨兵的票数

使用字典hash进行统计,key为runnid,值为票数

...
	counters = dictCreate(&leaderVotesDictType,NULL);
...
   voters = dictSize(master->sentinels)+1; /* All the other sentinels and me.*/

    /* Count other sentinels votes */
    di = dictGetIterator(master->sentinels);
    while((de = dictNext(di)) != NULL) {
        sentinelRedisInstance *ri = dictGetVal(de);
        if (ri->leader != NULL && ri->leader_epoch == sentinel.current_epoch)
            sentinelLeaderIncr(counters,ri->leader);
    }
    dictReleaseIterator(di);
...
int sentinelLeaderIncr(dict *counters, char *runid) {
    dictEntry *existing, *de;
    uint64_t oldval;

    de = dictAddRaw(counters,runid,&existing);
    if (existing) {
        oldval = dictGetUnsignedIntegerVal(existing);
        dictSetUnsignedIntegerVal(existing,oldval+1);
        return oldval+1;
    } else {
        serverAssert(de != NULL);
        dictSetUnsignedIntegerVal(de,1);
        return 1;
    }
}
3.1.4.2 选择最大票数的哨兵候选人

遍历counters计数,筛选票数最大的对象,虽然是最大值,但是不一定满足条件。

char *sentinelGetLeader(sentinelRedisInstance *master, uint64_t epoch) {
   ...
    dictIterator *di;
    dictEntry *de;
    unsigned int voters = 0, voters_quorum;
    char *winner = NULL;
    uint64_t max_votes = 0;
	...
	
    di = dictGetIterator(counters);
    while((de = dictNext(di)) != NULL) {
        uint64_t votes = dictGetUnsignedIntegerVal(de);

        if (votes > max_votes) {
            max_votes = votes;
            winner = dictGetKey(de);
        }
    }
    dictReleaseIterator(di);

    ...
}
3.1.4.3 当前哨兵投票

如果当前哨兵还没有投票,则进行投票。

    /* Count this Sentinel vote:
     * if this Sentinel did not voted yet, either vote for the most
     * common voted sentinel, or for itself if no vote exists at all. */
    if (winner)
        myvote = sentinelVoteLeader(master,epoch,winner,&leader_epoch);
    else
        myvote = sentinelVoteLeader(master,epoch,sentinel.myid,&leader_epoch);

    if (myvote && leader_epoch == epoch) {
        uint64_t votes = sentinelLeaderIncr(counters,myvote);

        if (votes > max_votes) {
            max_votes = votes;
            winner = myvote;
        }
    }
3.1.4.4 判断票数是否满足要求

可以看出投票数必须大于哨兵个数的一半
投票数必须大于等于quorum
这两个条件都满足时,才确定了leader。

/* Check what's the winner. For the winner to win, it needs two conditions:
     * 1) Absolute majority between voters (50% + 1).
     * 2) And anyway at least master->quorum votes. */
  voters_quorum = voters/2+1;
    if (winner && (max_votes < voters_quorum || max_votes < master->quorum))
        winner = NULL;

3.2 投票失败怎么办?

当投票失败,超时后,将终止切换。然后下一轮重新开始投票。
为了防止同时发起投票导致一直都选举不出leader,每个哨兵都会随机修改调度时间,增加选举leader成功率。哨兵发现异常时,如果自己没有投票,则投票给自己,每一轮投票中只能投一票。如果多个哨兵同时发起投票请求,他们都投自己一票,则票数不能超过一半,投票失败。

超时终止

sentinelFailoverWaitStart() ->
	sentinelAbortFailover()

改变定时器频率

void sentinelTimer(void) {
...
/* We continuously change the frequency of the Redis "timer interrupt"
     * in order to desynchronize every Sentinel from every other.
     * This non-determinism avoids that Sentinels started at the same time
     * exactly continue to stay synchronized asking to be voted at the
     * same time again and again (resulting in nobody likely winning the
     * election because of split brain voting). */
    server.hz = CONFIG_DEFAULT_HZ + rand() % CONFIG_DEFAULT_HZ;

}

  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值