redis之辛勤的哨兵(四)你“老了”,该我上位啦

一、选谁上位呢?

1.1 你没资格

哨兵遍历当前被判断为下线状态的master下的所有replicate,逐一的判断是否有资格被提升为新的master,组成一个候选人队列。

满足如下的任意一个条件的都没有资格成为候选人。

  • 被哨兵认为是下线状态的
  • 和哨兵连接断开的
  • 上一次响应哨兵的时间超过了SENTINEL_PING_PERIOD*5,即超过5秒没有响应的
  • replicate的优先级配置为0的,可通过replica-priority 100 进行配置,默认为100
  • 上一次响应哨兵INFO命令的时间超过了SENTINEL_PING_PERIOD*5或者SENTINEL_INFO_PERIOD*3
  • replicate和master的断开时间已经超过了master->down_after_period * 10 + master被判断下线到现在的时间间隔, 其中down_after_period通过配置获得down-after-milliseconds <name> <milliseconds>,默认30s,
sentinelTimer() ->
	sentinelHandleDictOfRedisInstances() ->
		sentinelHandleRedisInstance() ->
		    sentinelFailoverStateMachine() ->
		        sentinelFailoverSelectSlave() ->
		            sentinelSelectSlave()
sentinelRedisInstance *sentinelSelectSlave(sentinelRedisInstance *master) {
    ...
        
	mstime_t max_master_down_time = 0;

    if (master->flags & SRI_S_DOWN)
        max_master_down_time += mstime() - master->s_down_since_time;
    max_master_down_time += master->down_after_period * 10;  
    
    di = dictGetIterator(master->slaves);
    while((de = dictNext(di)) != NULL) {
        sentinelRedisInstance *slave = dictGetVal(de);
        mstime_t info_validity_time;

        if (slave->flags & (SRI_S_DOWN|SRI_O_DOWN)) continue;
        if (slave->link->disconnected) continue;
        if (mstime() - slave->link->last_avail_time > SENTINEL_PING_PERIOD*5) continue;
        if (slave->slave_priority == 0) continue;

        /* If the master is in SDOWN state we get INFO for slaves every second.
         * Otherwise we get it with the usual period so we need to account for
         * a larger delay. */
        if (master->flags & SRI_S_DOWN)
            info_validity_time = SENTINEL_PING_PERIOD*5;
        else
            info_validity_time = SENTINEL_INFO_PERIOD*3;
        if (mstime() - slave->info_refresh > info_validity_time) continue;
        if (slave->master_link_down_time > max_master_down_time) continue;
        instance[instances++] = slave;
    }
    dictReleaseIterator(di);
  
    ...
}

1.2 候选人们来个排序

将有资格提升为master的候选者们通过自定义排序算法进行排序。

sentinelRedisInstance *sentinelSelectSlave(sentinelRedisInstance *master) {
    ...
	if (instances) {
        qsort(instance,instances,sizeof(sentinelRedisInstance*),
            compareSlavesForPromotion);
        selected = instance[0];
    }
...
}    

排序规则:

  • 优先级, 值越小越往前排
  • 同步偏移量,偏移值越大的往前排
  • 比较runid,值小的往前排
int compareSlavesForPromotion(const void *a, const void *b) {
    sentinelRedisInstance **sa = (sentinelRedisInstance **)a,
                          **sb = (sentinelRedisInstance **)b;
    char *sa_runid, *sb_runid;

    if ((*sa)->slave_priority != (*sb)->slave_priority)
        return (*sa)->slave_priority - (*sb)->slave_priority;

    /* If priority is the same, select the slave with greater replication
     * offset (processed more data from the master). */
    if ((*sa)->slave_repl_offset > (*sb)->slave_repl_offset) {
        return -1; /* a < b */
    } else if ((*sa)->slave_repl_offset < (*sb)->slave_repl_offset) {
        return 1; /* a > b */
    }

    /* If the replication offset is the same select the slave with that has
     * the lexicographically smaller runid. Note that we try to handle runid
     * == NULL as there are old Redis versions that don't publish runid in
     * INFO. A NULL runid is considered bigger than any other runid. */
    sa_runid = (*sa)->runid;
    sb_runid = (*sb)->runid;
    if (sa_runid == NULL && sb_runid == NULL) return 0;
    else if (sa_runid == NULL) return 1;  /* a > b */
    else if (sb_runid == NULL) return -1; /* a < b */
    return strcasecmp(sa_runid, sb_runid);
}

排序完成后,选择了对头的那个候选人,然后其他人就没有然后了。

二、登基上位

太子选者出来了,就开始进行升级,走上红地毯开始登基准备。

2.1 哨兵通知replica登基

哨兵通知被选择的replica,让其从replicate状态切换为master。

sentinelTimer()->
	sentinelHandleDictOfRedisInstances()->
		sentinelHandleRedisInstance() ->
			sentinelFailoverStateMachine() ->
				sentinelFailoverSendSlaveOfNoOne()
void sentinelFailoverSendSlaveOfNoOne(sentinelRedisInstance *ri) {
    ...
    retval = sentinelSendSlaveOf(ri->promoted_slave,NULL);
    ...
}
int sentinelSendSlaveOf(sentinelRedisInstance *ri, const sentinelAddr *addr) {
    char portstr[32];
    const char *host;
    int retval;

    /* If host is NULL we send SLAVEOF NO ONE that will turn the instance
    * into a master. */
    if (!addr) {
        host = "NO";
        memcpy(portstr,"ONE",4);
    } else {
        host = announceSentinelAddr(addr);
        ll2string(portstr,sizeof(portstr),addr->port);
    }

    /* In order to send SLAVEOF in a safe way, we send a transaction performing
     * the following tasks:
     * 1) Reconfigure the instance according to the specified host/port params.
     * 2) Rewrite the configuration.
     * 3) Disconnect all clients (but this one sending the command) in order
     *    to trigger the ask-master-on-reconnection protocol for connected
     *    clients.
     *
     * Note that we don't check the replies returned by commands, since we
     * will observe instead the effects in the next INFO output. */
    retval = redisAsyncCommand(ri->link->cc,
        sentinelDiscardReplyCallback, ri, "%s",
        sentinelInstanceMapCommand(ri,"MULTI"));
    if (retval == C_ERR) return retval;
    ri->link->pending_commands++;

    retval = redisAsyncCommand(ri->link->cc,
        sentinelDiscardReplyCallback, ri, "%s %s %s",
        sentinelInstanceMapCommand(ri,"SLAVEOF"),
        host, portstr);
    if (retval == C_ERR) return retval;
    ri->link->pending_commands++;

    retval = redisAsyncCommand(ri->link->cc,
        sentinelDiscardReplyCallback, ri, "%s REWRITE",
        sentinelInstanceMapCommand(ri,"CONFIG"));
    if (retval == C_ERR) return retval;
    ri->link->pending_commands++;

    /* CLIENT KILL TYPE <type> is only supported starting from Redis 2.8.12,
     * however sending it to an instance not understanding this command is not
     * an issue because CLIENT is variadic command, so Redis will not
     * recognized as a syntax error, and the transaction will not fail (but
     * only the unsupported command will fail). */
    for (int type = 0; type < 2; type++) {
        retval = redisAsyncCommand(ri->link->cc,
            sentinelDiscardReplyCallback, ri, "%s KILL TYPE %s",
            sentinelInstanceMapCommand(ri,"CLIENT"),
            type == 0 ? "normal" : "pubsub");
        if (retval == C_ERR) return retval;
        ri->link->pending_commands++;
    }

    retval = redisAsyncCommand(ri->link->cc,
        sentinelDiscardReplyCallback, ri, "%s",
        sentinelInstanceMapCommand(ri,"EXEC"));
    if (retval == C_ERR) return retval;
    ri->link->pending_commands++;

    return C_OK;
}

因为升级过程需要几条命令,所以为了能原子完成一系列的操作,使用了MUTIL命令,在EXEC命令前,传输的的命令都是缓存起来的,没有执行,EXEC命令才开始全部执行。整个提升步骤如下:

  • SLAVEOF NO ONE ,解脱replicate的身份,变为了master,一朝变凤凰
  • CONFIG REWRITE,将配置持久化到配置文件,防止睡一觉后睁眼一切都是梦
  • CLIENT KILL TYPE normal/pubsub,杀掉原来的client连接,兔死狗烹鸟尽弓藏

2.2 哨兵发布诏书昭告天下

将给其他的replicate依次发送新的master地址,从新的master进行同步数据。

sentinelFailoverStateMachine() ->
    sentinelFailoverReconfNextSlave()
/* Send SLAVE OF <new master address> to all the remaining slaves that
 * still don't appear to have the configuration updated. */
void sentinelFailoverReconfNextSlave(sentinelRedisInstance *master) {
    dictIterator *di;
    dictEntry *de;
    int in_progress = 0;

   ...

    di = dictGetIterator(master->slaves);
    while(in_progress < master->parallel_syncs &&
          (de = dictNext(di)) != NULL)
    {
        sentinelRedisInstance *slave = dictGetVal(de);
        int retval;

        /* Skip the promoted slave, and already configured slaves. */
        if (slave->flags & (SRI_PROMOTED|SRI_RECONF_DONE)) continue;

        ...

        /* Nothing to do for instances that are disconnected or already
         * in RECONF_SENT state. */
        if (slave->flags & (SRI_RECONF_SENT|SRI_RECONF_INPROG)) continue;
        if (slave->link->disconnected) continue;

        /* Send SLAVEOF <new master>. */
        retval = sentinelSendSlaveOf(slave,master->promoted_slave->addr);
        if (retval == C_OK) {
            slave->flags |= SRI_RECONF_SENT;
            slave->slave_reconf_sent_time = mstime();
            sentinelEvent(LL_NOTICE,"+slave-reconf-sent",slave,"%@");
            in_progress++;
        }
    }
    dictReleaseIterator(di);

    /* Check if all the slaves are reconfigured and handle timeout. */
    sentinelFailoverDetectEnd(master);
}

  • 遍历下线master下的replicate
  • 跳过已经登记的replicate
  • 跳过已经发送诏书的replicate
  • 跳过已经和哨兵短链的replicate
  • 发送诏书给replicate,让replicate从新的master进行同步数据

三、哨兵承认你的地位

切换成功后,即所有的replicate都从新的master开始同步数据,哨兵将更新本地配置,然后将新的配置写入配置文件中进行持久化。

3.1 哨兵更新配置

sentinelTimer() ->
	sentinelHandleDictOfRedisInstances() ->
		sentinelFailoverSwitchToPromotedSlave() ->
   			sentinelResetMasterAndChangeAddress()
3.1.1 重置master地址

将升级的replicate的地址替换原来那个下线的master地址。

    /* Reset and switch address. */
    sentinelResetMaster(master,SENTINEL_RESET_NO_SENTINELS);
    oldaddr = master->addr;
    master->addr = newaddr;
    master->o_down_since_time = 0;
    master->s_down_since_time = 0;
3.1.2 重建文武百官

将下线的master的replicate获取到,然后重新创建到新的master下面。

...
    /* There can be only 0 or 1 slave that has the newaddr.
     * and It can add old master 1 more slave. 
     * so It allocates dictSize(master->slaves) + 1          */
    slaves = zmalloc(sizeof(sentinelAddr*)*(dictSize(master->slaves) + 1));
    
    /* Don't include the one having the address we are switching to. */
    di = dictGetIterator(master->slaves);
    while((de = dictNext(di)) != NULL) {
        sentinelRedisInstance *slave = dictGetVal(de);

        if (sentinelAddrIsEqual(slave->addr,newaddr)) continue;
        slaves[numslaves++] = dupSentinelAddr(slave->addr);
    }
    dictReleaseIterator(di);

    /* If we are switching to a different address, include the old address
     * as a slave as well, so that we'll be able to sense / reconfigure
     * the old master. */
    if (!sentinelAddrIsEqual(newaddr,master->addr)) {
        slaves[numslaves++] = dupSentinelAddr(master->addr);
    }
...
    
    
/* Add slaves back. */
    for (j = 0; j < numslaves; j++) {
        sentinelRedisInstance *slave;

        slave = createSentinelRedisInstance(NULL,SRI_SLAVE,slaves[j]->hostname,
                    slaves[j]->port, master->quorum, master);
        releaseSentinelAddr(slaves[j]);
        if (slave) sentinelEvent(LL_NOTICE,"+slave",slave,"%@");
    }

3.2 哨兵重写配置

哨兵将更新后的配置重新写入配置文件,防止重启后新的master丢失。

int sentinelResetMasterAndChangeAddress(sentinelRedisInstance *master, char *hostname, int port) {
    ...
    sentinelFlushConfig();
    return C_OK;
}
void sentinelFlushConfig(void) {
    int fd = -1;
    int saved_hz = server.hz;
    int rewrite_status;

    server.hz = CONFIG_DEFAULT_HZ;
    rewrite_status = rewriteConfig(server.configfile, 0);
    server.hz = saved_hz;

    if (rewrite_status == -1) goto werr;
    if ((fd = open(server.configfile,O_RDONLY)) == -1) goto werr;
    if (fsync(fd) == -1) goto werr;
    if (close(fd) == EOF) goto werr;
    return;

werr:
    serverLog(LL_WARNING,"WARNING: Sentinel was not able to save the new configuration on disk!!!: %s", strerror(errno));
    if (fd != -1) close(fd);
}

到此,新的皇帝正式坐稳了。后续哨兵按照原有的监控方式继续的监控新的master以及replicate。

请添加图片描述
请添加图片描述

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值