redis之辛勤的哨兵(一)诞生日

一、为什么需要哨兵?

请添加图片描述
当使用主从模式进行高可用的部署时,如上图所示。
这样部署就出现一个问题,master和replicate的状态都需要client 进行维护,判断master是否出现异常;当master出现异常时,如何选取一个replicate进行升级为master;如何通知其他的replicate应该从新的master同步数据等等,这样client就非常臃肿,工作量大,开发效率低,不易维护。因此redis自己诞生了哨兵机制,解放了程序员。

二、什么是哨兵?

哨兵是一个特殊状态下的redis服务实例,主要有四大功能点:

  • 监控
    不断的检查master和replica的状态是否正常。
  • 通知
    当某个被监控的redis实例出现异常时,哨兵通知系统管理员或者其他程序。
  • 故障转移
    当某个被监控的master异常时,哨兵将开始进行故障转移,提升某个replica成为新的master,并且通知其他replica向新的master连接进行数据同步。
  • 服务发现
    哨兵为client提供master的地址。client询问哨兵master的地址,哨兵返回给client,client根据获取到的地址进行读写操作;当master切换后,将返回新的master地址给client。

三、我是谁的哨兵?

通过配置,告诉哨兵需要监控哪个master。

3.1 如何配置

配置监控的master的IP为127.0.0.1 端口时6379, 这样哨兵就知道监控哪个master实例了。

sentinel monitor mymaster 127.0.0.1 6379 2

3.2 如何启动哨兵

可以通过如下两种方式启动哨兵

$ redis-sentinel /path/to/sentinel.conf

$ redis-server /path/to/sentinel.conf --sentinel

通过计算二进制的MD5,能确定这几个文件都是同一个,所以不同的功能是根据参数进行判断的。
在这里插入图片描述
从Makefile文件中也能看出,redis-sentinel只是redis-server的一份拷贝

# redis-server
$(REDIS_SERVER_NAME): $(REDIS_SERVER_OBJ)
	$(REDIS_LD) -o $@ $^ ../deps/hiredis/libhiredis.a ../deps/lua/src/liblua.a $(FINAL_LIBS)

# redis-sentinel
$(REDIS_SENTINEL_NAME): $(REDIS_SERVER_NAME)
	$(REDIS_INSTALL) $(REDIS_SERVER_NAME) $(REDIS_SENTINEL_NAME)

3.3 哨兵初始化过程

因为哨兵只是特殊的一个redis实例,所以初始化的开头部分和redis实例初始化过程一样。
但是哨兵不需要使用数据,所以不会进行rdb或者aof的加载,对于命令也只支持几个命令。

3.3.1 获取哨兵模式

int main(int argc, char **argv) {
    //正常的初始化步骤
    ...
    //判断是否是哨兵模式
     server.sentinel_mode = checkForSentinelMode(argc,argv);
    ...
}

从代码也能看出,哨兵模式的判断方式和我们刚开始说的一致。

/* Returns 1 if there is --sentinel among the arguments or if
 * argv[0] contains "redis-sentinel". */
int checkForSentinelMode(int argc, char **argv) {
    int j;

    if (strstr(argv[0],"redis-sentinel") != NULL) return 1;
    for (j = 1; j < argc; j++)
        if (!strcmp(argv[j],"--sentinel")) return 1;
    return 0;
}

3.3.2 初始化哨兵

这里主要调用了两个函数

 	if (server.sentinel_mode) {
        initSentinelConfig();
        initSentinel();
    }
  • 调整哨兵的默认监听端口为26379
#define REDIS_SENTINEL_PORT 26379
void initSentinelConfig(void) {
    server.port = REDIS_SENTINEL_PORT;
    server.protected_mode = 0; /* Sentinel must be exposed. */
}
  • 构建哨兵的命令表以及全局变量初始化
void initSentinel(void) {
    unsigned int j;

    /* Remove usual Redis commands from the command table, then just add
     * the SENTINEL command. */
    dictEmpty(server.commands,NULL);
    dictEmpty(server.orig_commands,NULL);
    ACLClearCommandID();
    for (j = 0; j < sizeof(sentinelcmds)/sizeof(sentinelcmds[0]); j++) {
        int retval;
        struct redisCommand *cmd = sentinelcmds+j;
        cmd->id = ACLGetCommandID(cmd->name); /* Assign the ID used for ACL. */
        retval = dictAdd(server.commands, sdsnew(cmd->name), cmd);
        serverAssert(retval == DICT_OK);
        retval = dictAdd(server.orig_commands, sdsnew(cmd->name), cmd);
        serverAssert(retval == DICT_OK);

        /* Translate the command string flags description into an actual
         * set of flags. */
        if (populateCommandTableParseFlags(cmd,cmd->sflags) == C_ERR)
            serverPanic("Unsupported command flag");
    }

    /* Initialize various data structures. */
    sentinel.current_epoch = 0;
    sentinel.masters = dictCreate(&instancesDictType,NULL);
    sentinel.tilt = 0;
    sentinel.tilt_start_time = 0;
    sentinel.previous_time = mstime();
    sentinel.running_scripts = 0;
    sentinel.scripts_queue = listCreate();
    sentinel.announce_ip = NULL;
    sentinel.announce_port = 0;
    sentinel.simfailure_flags = SENTINEL_SIMFAILURE_NONE;
    sentinel.deny_scripts_reconfig = SENTINEL_DEFAULT_DENY_SCRIPTS_RECONFIG;
    sentinel.sentinel_auth_pass = NULL;
    sentinel.sentinel_auth_user = NULL;
    sentinel.resolve_hostnames = SENTINEL_DEFAULT_RESOLVE_HOSTNAMES;
    sentinel.announce_hostnames = SENTINEL_DEFAULT_ANNOUNCE_HOSTNAMES;
    memset(sentinel.myid,0,sizeof(sentinel.myid));
    server.sentinel_config = NULL;
}

可以看出哨兵只支持如下15个命令

struct redisCommand sentinelcmds[] = {
    {"ping",pingCommand,1,"fast @connection",0,NULL,0,0,0,0,0},
    {"sentinel",sentinelCommand,-2,"admin",0,NULL,0,0,0,0,0},
    {"subscribe",subscribeCommand,-2,"pub-sub",0,NULL,0,0,0,0,0},
    {"unsubscribe",unsubscribeCommand,-1,"pub-sub",0,NULL,0,0,0,0,0},
    {"psubscribe",psubscribeCommand,-2,"pub-sub",0,NULL,0,0,0,0,0},
    {"punsubscribe",punsubscribeCommand,-1,"pub-sub",0,NULL,0,0,0,0,0},
    {"publish",sentinelPublishCommand,3,"pub-sub fast",0,NULL,0,0,0,0,0},
    {"info",sentinelInfoCommand,-1,"random @dangerous",0,NULL,0,0,0,0,0},
    {"role",sentinelRoleCommand,1,"fast read-only @dangerous",0,NULL,0,0,0,0,0},
    {"client",clientCommand,-2,"admin random @connection",0,NULL,0,0,0,0,0},
    {"shutdown",shutdownCommand,-1,"admin",0,NULL,0,0,0,0,0},
    {"auth",authCommand,-2,"no-auth fast @connection",0,NULL,0,0,0,0,0},
    {"hello",helloCommand,-1,"no-auth fast @connection",0,NULL,0,0,0,0,0},
    {"acl",aclCommand,-2,"admin",0,NULL,0,0,0,0,0,0},
    {"command",commandCommand,-1, "random @connection", 0,NULL,0,0,0,0,0,0}
};

3.3.3 加载哨兵配置

if (server.sentinel_mode) loadSentinelConfigFromQueue();

从三个链表中加载配置进行处理。

/* This function is used for loading the sentinel configuration from
 * pre_monitor_cfg, monitor_cfg and post_monitor_cfg list */
void loadSentinelConfigFromQueue(void) {
    const char *err = NULL;
    listIter li;
    listNode *ln;
    int linenum = 0;
    sds line = NULL;

    /* if there is no sentinel_config entry, we can return immediately */
    if (server.sentinel_config == NULL) return;

    /* loading from pre monitor config queue first to avoid dependency issues */
    listRewind(server.sentinel_config->pre_monitor_cfg,&li);
    while((ln = listNext(&li))) {
        struct sentinelLoadQueueEntry *entry = ln->value;
        err = sentinelHandleConfiguration(entry->argv,entry->argc);
        ...
    }

    /* loading from monitor config queue */
    listRewind(server.sentinel_config->monitor_cfg,&li);
    while((ln = listNext(&li))) {
        struct sentinelLoadQueueEntry *entry = ln->value;
        err = sentinelHandleConfiguration(entry->argv,entry->argc);
        ...
    }

    /* loading from the post monitor config queue */
    listRewind(server.sentinel_config->post_monitor_cfg,&li);
    while((ln = listNext(&li))) {
        struct sentinelLoadQueueEntry *entry = ln->value;
        err = sentinelHandleConfiguration(entry->argv,entry->argc);
       ...
    }

    /* free sentinel_config when config loading is finished */
    freeSentinelConfig();
    return;
	
	...
}

这三个链表在哪里赋值的呢?
从配置中加载出来的,然后加入到链表中。

main() ->
	loadServerConfig(server.configfile, config_from_stdin, options) ->
		loadServerConfigFromString()
			...
			for (i = 0; i < totlines; i++) {
				...
				else if (!strcasecmp(argv[0],"sentinel")) {
            		...
		           queueSentinelConfig(argv+1,argc-1,linenum,lines[i]);
		        } 
			}
			...

将每行解析成argc,argv形式,然后加入到链表中。

void queueSentinelConfig(sds *argv, int argc, int linenum, sds line) {
    int i;
    struct sentinelLoadQueueEntry *entry;

    /* initialize sentinel_config for the first call */
    if (server.sentinel_config == NULL) initializeSentinelConfig();

    entry = zmalloc(sizeof(struct sentinelLoadQueueEntry));
    entry->argv = zmalloc(sizeof(char*)*argc);
    entry->argc = argc;
    entry->linenum = linenum;
    entry->line = sdsdup(line);
    for (i = 0; i < argc; i++) {
        entry->argv[i] = sdsdup(argv[i]);
    }
    /*  Separate config lines with pre monitor config, monitor config and
     *  post monitor config, in order to parsing config dependencies
     *  correctly. */
    if (!strcasecmp(argv[0],"monitor")) {
        listAddNodeTail(server.sentinel_config->monitor_cfg,entry);
    } else if (searchPreMonitorCfgName(argv[0])) {
        listAddNodeTail(server.sentinel_config->pre_monitor_cfg,entry);
    } else{
        listAddNodeTail(server.sentinel_config->post_monitor_cfg,entry);
    }
}

根据不同的配置放入不同的链表中。
monitor配置放入monitor_cfg链表,而对于如下的配置,放入到pre_monitor_cfg链表中,其他的配置则放入到post_monitor_cfg链表中。

int searchPreMonitorCfgName(const char *name) {
    for (unsigned int i = 0; i < sizeof(preMonitorCfgName)/sizeof(preMonitorCfgName[0]); i++) {
        if (!strcasecmp(preMonitorCfgName[i],name)) return 1;
    }
    return 0;
}
const char *preMonitorCfgName[] = { 
    "announce-ip",
    "announce-port",
    "deny-scripts-reconfig",
    "sentinel-user",
    "sentinel-pass",
    "current-epoch",
    "myid",
    "resolve-hostnames",
    "announce-hostnames"
};

请添加图片描述

为啥需要三个链表?
因为各个配置之间有前后依赖关系,为了简化处理过程,将配置分为三个链表,将不同的依赖关系的配置加入到不同的链表中,后续处理时只需要按照三个链表的依赖关系先后处理,这个方式可以在我们自己的设计中可以借鉴。

  /* loading from pre monitor config queue first to avoid dependency issues */
    listRewind(server.sentinel_config->pre_monitor_cfg,&li);
    while((ln = listNext(&li))) {
       ...
    }

    /* loading from monitor config queue */
    listRewind(server.sentinel_config->monitor_cfg,&li);
    while((ln = listNext(&li))) {
        ...
    }

    /* loading from the post monitor config queue */
    listRewind(server.sentinel_config->post_monitor_cfg,&li);
    while((ln = listNext(&li))) {
       ...
    }

配置的主要解析函数

const char *sentinelHandleConfiguration(char **argv, int argc) {

    sentinelRedisInstance *ri;

    if (!strcasecmp(argv[0],"monitor") && argc == 5) {
        /* monitor <name> <host> <port> <quorum> */
        int quorum = atoi(argv[4]);

        if (quorum <= 0) return "Quorum must be 1 or greater.";
        if (createSentinelRedisInstance(argv[1],SRI_MASTER,argv[2],
                                        atoi(argv[3]),quorum,NULL) == NULL)
        {
            return sentinelCheckCreateInstanceErrors(SRI_MASTER);
        }
    } else if (!strcasecmp(argv[0],"down-after-milliseconds") && argc == 3) {
        /* down-after-milliseconds <name> <milliseconds> */
        ri = sentinelGetMasterByName(argv[1]);
        if (!ri) return "No such master with specified name.";
        ri->down_after_period = atoi(argv[2]);
        if (ri->down_after_period <= 0)
            return "negative or zero time parameter.";
        sentinelPropagateDownAfterPeriod(ri);
    } else if (!strcasecmp(argv[0],"failover-timeout") && argc == 3) {
        /* failover-timeout <name> <milliseconds> */
        ri = sentinelGetMasterByName(argv[1]);
        if (!ri) return "No such master with specified name.";
        ri->failover_timeout = atoi(argv[2]);
        if (ri->failover_timeout <= 0)
            return "negative or zero time parameter.";
    } else if (!strcasecmp(argv[0],"parallel-syncs") && argc == 3) {
        /* parallel-syncs <name> <milliseconds> */
        ri = sentinelGetMasterByName(argv[1]);
        if (!ri) return "No such master with specified name.";
        ri->parallel_syncs = atoi(argv[2]);
    } else if (!strcasecmp(argv[0],"notification-script") && argc == 3) {
        /* notification-script <name> <path> */
        ri = sentinelGetMasterByName(argv[1]);
        if (!ri) return "No such master with specified name.";
        if (access(argv[2],X_OK) == -1)
            return "Notification script seems non existing or non executable.";
        ri->notification_script = sdsnew(argv[2]);
    } else if (!strcasecmp(argv[0],"client-reconfig-script") && argc == 3) {
        /* client-reconfig-script <name> <path> */
        ri = sentinelGetMasterByName(argv[1]);
        if (!ri) return "No such master with specified name.";
        if (access(argv[2],X_OK) == -1)
            return "Client reconfiguration script seems non existing or "
                   "non executable.";
        ri->client_reconfig_script = sdsnew(argv[2]);
    } else if (!strcasecmp(argv[0],"auth-pass") && argc == 3) {
        /* auth-pass <name> <password> */
        ri = sentinelGetMasterByName(argv[1]);
        if (!ri) return "No such master with specified name.";
        ri->auth_pass = sdsnew(argv[2]);
    } else if (!strcasecmp(argv[0],"auth-user") && argc == 3) {
        /* auth-user <name> <username> */
        ri = sentinelGetMasterByName(argv[1]);
        if (!ri) return "No such master with specified name.";
        ri->auth_user = sdsnew(argv[2]);
    } else if (!strcasecmp(argv[0],"current-epoch") && argc == 2) {
        /* current-epoch <epoch> */
        unsigned long long current_epoch = strtoull(argv[1],NULL,10);
        if (current_epoch > sentinel.current_epoch)
            sentinel.current_epoch = current_epoch;
    } else if (!strcasecmp(argv[0],"myid") && argc == 2) {
        if (strlen(argv[1]) != CONFIG_RUN_ID_SIZE)
            return "Malformed Sentinel id in myid option.";
        memcpy(sentinel.myid,argv[1],CONFIG_RUN_ID_SIZE);
    } else if (!strcasecmp(argv[0],"config-epoch") && argc == 3) {
        /* config-epoch <name> <epoch> */
        ri = sentinelGetMasterByName(argv[1]);
        if (!ri) return "No such master with specified name.";
        ri->config_epoch = strtoull(argv[2],NULL,10);
        /* The following update of current_epoch is not really useful as
         * now the current epoch is persisted on the config file, but
         * we leave this check here for redundancy. */
        if (ri->config_epoch > sentinel.current_epoch)
            sentinel.current_epoch = ri->config_epoch;
    } else if (!strcasecmp(argv[0],"leader-epoch") && argc == 3) {
        /* leader-epoch <name> <epoch> */
        ri = sentinelGetMasterByName(argv[1]);
        if (!ri) return "No such master with specified name.";
        ri->leader_epoch = strtoull(argv[2],NULL,10);
    } else if ((!strcasecmp(argv[0],"known-slave") ||
                !strcasecmp(argv[0],"known-replica")) && argc == 4)
    {
        sentinelRedisInstance *slave;

        /* known-replica <name> <ip> <port> */
        ri = sentinelGetMasterByName(argv[1]);
        if (!ri) return "No such master with specified name.";
        if ((slave = createSentinelRedisInstance(NULL,SRI_SLAVE,argv[2],
                    atoi(argv[3]), ri->quorum, ri)) == NULL)
        {
            return sentinelCheckCreateInstanceErrors(SRI_SLAVE);
        }
    } else if (!strcasecmp(argv[0],"known-sentinel") &&
               (argc == 4 || argc == 5)) {
        sentinelRedisInstance *si;

        if (argc == 5) { /* Ignore the old form without runid. */
            /* known-sentinel <name> <ip> <port> [runid] */
            ri = sentinelGetMasterByName(argv[1]);
            if (!ri) return "No such master with specified name.";
            if ((si = createSentinelRedisInstance(argv[4],SRI_SENTINEL,argv[2],
                        atoi(argv[3]), ri->quorum, ri)) == NULL)
            {
                return sentinelCheckCreateInstanceErrors(SRI_SENTINEL);
            }
            si->runid = sdsnew(argv[4]);
            sentinelTryConnectionSharing(si);
        }
    } else if (!strcasecmp(argv[0],"rename-command") && argc == 4) {
        /* rename-command <name> <command> <renamed-command> */
        ri = sentinelGetMasterByName(argv[1]);
        if (!ri) return "No such master with specified name.";
        sds oldcmd = sdsnew(argv[2]);
        sds newcmd = sdsnew(argv[3]);
        if (dictAdd(ri->renamed_commands,oldcmd,newcmd) != DICT_OK) {
            sdsfree(oldcmd);
            sdsfree(newcmd);
            return "Same command renamed multiple times with rename-command.";
        }
    } else if (!strcasecmp(argv[0],"announce-ip") && argc == 2) {
        /* announce-ip <ip-address> */
        if (strlen(argv[1]))
            sentinel.announce_ip = sdsnew(argv[1]);
    } else if (!strcasecmp(argv[0],"announce-port") && argc == 2) {
        /* announce-port <port> */
        sentinel.announce_port = atoi(argv[1]);
    } else if (!strcasecmp(argv[0],"deny-scripts-reconfig") && argc == 2) {
        /* deny-scripts-reconfig <yes|no> */
        if ((sentinel.deny_scripts_reconfig = yesnotoi(argv[1])) == -1) {
            return "Please specify yes or no for the "
                   "deny-scripts-reconfig options.";
        }
    } else if (!strcasecmp(argv[0],"sentinel-user") && argc == 2) {
        /* sentinel-user <user-name> */
        if (strlen(argv[1]))
            sentinel.sentinel_auth_user = sdsnew(argv[1]);
    } else if (!strcasecmp(argv[0],"sentinel-pass") && argc == 2) {
        /* sentinel-pass <password> */
        if (strlen(argv[1]))
            sentinel.sentinel_auth_pass = sdsnew(argv[1]);
    } else if (!strcasecmp(argv[0],"resolve-hostnames") && argc == 2) {
        /* resolve-hostnames <yes|no> */
        if ((sentinel.resolve_hostnames = yesnotoi(argv[1])) == -1) {
            return "Please specify yes or no for the resolve-hostnames option.";
        }
    } else if (!strcasecmp(argv[0],"announce-hostnames") && argc == 2) {
        /* announce-hostnames <yes|no> */
        if ((sentinel.announce_hostnames = yesnotoi(argv[1])) == -1) {
            return "Please specify yes or no for the announce-hostnames option.";
        }
    } else {
        return "Unrecognized sentinel configuration statement.";
    }
    return NULL;
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值