1.1简介
哨兵(Sentinel),由一个或者多个Sentinel组成监视任意多个主服务器以及主服务器的从服务器,自动将下线的主服务器的某个从服务器升级为主服务器。
举个例子,假设有1个主服务器server1,和多个从服务器server1.....servern
1)哨兵监视所有服务器
2)主服务器server1下线后,从其附属的服务器中选出来一个主服务器,并向其他服务器发送slaveof的命令
3)其他服务器附属于新的服务器
4)继续监视server1,若他重新启动时,让他成为当前的主服务器,当前的主服务器降级为从服务器
1.2启动sentinel
redis_sentinel /path/to/your/sentinel.conf
或者命令
redis_server /path/to/your/sentinel.conf --sentinel
启动后进行如下的操作,
1)初始化服务器
在这里,sentinel主要使用的功能是slaveof和时间事件处理器和文件事件处理器,发布和订阅功能,publish和subscribe
2)将redis专用的代码替换为sentinel的代码
在作为sentinel时候只具有这些功能;
struct redisCommand sentinelcmds[] = {
{"ping",pingCommand,1,"",0,NULL,0,0,0,0,0},
{"sentinel",sentinelCommand,-2,"",0,NULL,0,0,0,0,0},
{"subscribe",subscribeCommand,-2,"",0,NULL,0,0,0,0,0},
{"unsubscribe",unsubscribeCommand,-1,"",0,NULL,0,0,0,0,0},
{"psubscribe",psubscribeCommand,-2,"",0,NULL,0,0,0,0,0},
{"punsubscribe",punsubscribeCommand,-1,"",0,NULL,0,0,0,0,0},
{"publish",sentinelPublishCommand,3,"",0,NULL,0,0,0,0,0},
{"info",sentinelInfoCommand,-1,"",0,NULL,0,0,0,0,0},
{"role",sentinelRoleCommand,1,"l",0,NULL,0,0,0,0,0},
{"client",clientCommand,-2,"rs",0,NULL,0,0,0,0,0},
{"shutdown",shutdownCommand,-1,"",0,NULL,0,0,0,0,0},
{"auth",authCommand,2,"sltF",0,NULL,0,0,0,0,0}
};
而作为redis服务器时候却有这么多功能
struct redisCommand redisCommandTable[] = {
{"module",moduleCommand,-2,"as",0,NULL,0,0,0,0,0},
{"get",getCommand,2,"rF",0,NULL,1,1,1,0,0},
{"set",setCommand,-3,"wm",0,NULL,1,1,1,0,0},
{"setnx",setnxCommand,3,"wmF",0,NULL,1,1,1,0,0},
{"setex",setexCommand,4,"wm",0,NULL,1,1,1,0,0},
{"psetex",psetexCommand,4,"wm",0,NULL,1,1,1,0,0},
{"append",appendCommand,3,"wm",0,NULL,1,1,1,0,0},
{"strlen",strlenCommand,2,"rF",0,NULL,1,1,1,0,0},
{"del",delCommand,-2,"w",0,NULL,1,-1,1,0,0},
{"unlink",unlinkCommand,-2,"wF",0,NULL,1,-1,1,0,0},
{"exists",existsCommand,-2,"rF",0,NULL,1,-1,1,0,0},
{"setbit",setbitCommand,4,"wm",0,NULL,1,1,1,0,0},
{"getbit",getbitCommand,3,"rF",0,NULL,1,1,1,0,0},
{"bitfield",bitfieldCommand,-2,"wm",0,NULL,1,1,1,0,0},
{"setrange",setrangeCommand,4,"wm",0,NULL,1,1,1,0,0},
{"getrange",getrangeCommand,4,"r",0,NULL,1,1,1,0,0},
{"substr",getrangeCommand,4,"r",0,NULL,1,1,1,0,0},
{"incr",incrCommand,2,"wmF",0,NULL,1,1,1,0,0},
{"decr",decrCommand,2,"wmF",0,NULL,1,1,1,0,0},
{"mget",mgetCommand,-2,"rF",0,NULL,1,-1,1,0,0},
{"rpush",rpushCommand,-3,"wmF",0,NULL,1,1,1,0,0},
{"lpush",lpushCommand,-3,"wmF",0,NULL,1,1,1,0,0},
{"rpushx",rpushxCommand,-3,"wmF",0,NULL,1,1,1,0,0},
{"lpushx",lpushxCommand,-3,"wmF",0,NULL,1,1,1,0,0},
{"linsert",linsertCommand,5,"wm",0,NULL,1,1,1,0,0},
{"rpop",rpopCommand,2,"wF",0,NULL,1,1,1,0,0},
{"lpop",lpopCommand,2,"wF",0,NULL,1,1,1,0,0},
{"brpop",brpopCommand,-3,"ws",0,NULL,1,-2,1,0,0},
{"brpoplpush",brpoplpushCommand,4,"wms",0,NULL,1,2,1,0,0},
{"blpop",blpopCommand,-3,"ws",0,NULL,1,-2,1,0,0},
{"llen",llenCommand,2,"rF",0,NULL,1,1,1,0,0},
{"lindex",lindexCommand,3,"r",0,NULL,1,1,1,0,0},
{"lset",lsetCommand,4,"wm",0,NULL,1,1,1,0,0},
{"lrange",lrangeCommand,4,"r",0,NULL,1,1,1,0,0},
{"ltrim",ltrimCommand,4,"w",0,NULL,1,1,1,0,0},
{"lrem",lremCommand,4,"w",0,NULL,1,1,1,0,0},
{"rpoplpush",rpoplpushCommand,3,"wm",0,NULL,1,2,1,0,0},
{"sadd",saddCommand,-3,"wmF",0,NULL,1,1,1,0,0},
{"srem",sremCommand,-3,"wF",0,NULL,1,1,1,0,0},
{"smove",smoveCommand,4,"wF",0,NULL,1,2,1,0,0},
{"sismember",sismemberCommand,3,"rF",0,NULL,1,1,1,0,0},
{"scard",scardCommand,2,"rF",0,NULL,1,1,1,0,0},
{"spop",spopCommand,-2,"wRF",0,NULL,1,1,1,0,0},
{"srandmember",srandmemberCommand,-2,"rR",0,NULL,1,1,1,0,0},
{"sinter",sinterCommand,-2,"rS",0,NULL,1,-1,1,0,0},
{"sinterstore",sinterstoreCommand,-3,"wm",0,NULL,1,-1,1,0,0},
{"sunion",sunionCommand,-2,"rS",0,NULL,1,-1,1,0,0},
{"sunionstore",sunionstoreCommand,-3,"wm",0,NULL,1,-1,1,0,0},
{"sdiff",sdiffCommand,-2,"rS",0,NULL,1,-1,1,0,0},
{"sdiffstore",sdiffstoreCommand,-3,"wm",0,NULL,1,-1,1,0,0},
{"smembers",sinterCommand,2,"rS",0,NULL,1,1,1,0,0},
{"sscan",sscanCommand,-3,"rR",0,NULL,1,1,1,0,0},
{"zadd",zaddCommand,-4,"wmF",0,NULL,1,1,1,0,0},
{"zincrby",zincrbyCommand,4,"wmF",0,NULL,1,1,1,0,0},
{"zrem",zremCommand,-3,"wF",0,NULL,1,1,1,0,0},
{"zremrangebyscore",zremrangebyscoreCommand,4,"w",0,NULL,1,1,1,0,0},
{"zremrangebyrank",zremrangebyrankCommand,4,"w",0,NULL,1,1,1,0,0},
{"zremrangebylex",zremrangebylexCommand,4,"w",0,NULL,1,1,1,0,0},
{"zunionstore",zunionstoreCommand,-4,"wm",0,zunionInterGetKeys,0,0,0,0,0},
{"zinterstore",zinterstoreCommand,-4,"wm",0,zunionInterGetKeys,0,0,0,0,0},
{"zrange",zrangeCommand,-4,"r",0,NULL,1,1,1,0,0},
{"zrangebyscore",zrangebyscoreCommand,-4,"r",0,NULL,1,1,1,0,0},
{"zrevrangebyscore",zrevrangebyscoreCommand,-4,"r",0,NULL,1,1,1,0,0},
{"zrangebylex",zrangebylexCommand,-4,"r",0,NULL,1,1,1,0,0},
{"zrevrangebylex",zrevrangebylexCommand,-4,"r",0,NULL,1,1,1,0,0},
{"zcount",zcountCommand,4,"rF",0,NULL,1,1,1,0,0},
{"zlexcount",zlexcountCommand,4,"rF",0,NULL,1,1,1,0,0},
{"zrevrange",zrevrangeCommand,-4,"r",0,NULL,1,1,1,0,0},
{"zcard",zcardCommand,2,"rF",0,NULL,1,1,1,0,0},
{"zscore",zscoreCommand,3,"rF",0,NULL,1,1,1,0,0},
{"zrank",zrankCommand,3,"rF",0,NULL,1,1,1,0,0},
{"zrevrank",zrevrankCommand,3,"rF",0,NULL,1,1,1,0,0},
{"zscan",zscanCommand,-3,"rR",0,NULL,1,1,1,0,0},
{"zpopmin",zpopminCommand,-2,"wF",0,NULL,1,1,1,0,0},
{"zpopmax",zpopmaxCommand,-2,"wF",0,NULL,1,1,1,0,0},
{"bzpopmin",bzpopminCommand,-3,"wsF",0,NULL,1,-2,1,0,0},
{"bzpopmax",bzpopmaxCommand,-3,"wsF",0,NULL,1,-2,1,0,0},
{"hset",hsetCommand,-4,"wmF",0,NULL,1,1,1,0,0},
{"hsetnx",hsetnxCommand,4,"wmF",0,NULL,1,1,1,0,0},
{"hget",hgetCommand,3,"rF",0,NULL,1,1,1,0,0},
{"hmset",hsetCommand,-4,"wmF",0,NULL,1,1,1,0,0},
{"hmget",hmgetCommand,-3,"rF",0,NULL,1,1,1,0,0},
{"hincrby",hincrbyCommand,4,"wmF",0,NULL,1,1,1,0,0},
{"hincrbyfloat",hincrbyfloatCommand,4,"wmF",0,NULL,1,1,1,0,0},
{"hdel",hdelCommand,-3,"wF",0,NULL,1,1,1,0,0},
{"hlen",hlenCommand,2,"rF",0,NULL,1,1,1,0,0},
{"hstrlen",hstrlenCommand,3,"rF",0,NULL,1,1,1,0,0},
{"hkeys",hkeysCommand,2,"rS",0,NULL,1,1,1,0,0},
{"hvals",hvalsCommand,2,"rS",0,NULL,1,1,1,0,0},
{"hgetall",hgetallCommand,2,"rR",0,NULL,1,1,1,0,0},
{"hexists",hexistsCommand,3,"rF",0,NULL,1,1,1,0,0},
{"hscan",hscanCommand,-3,"rR",0,NULL,1,1,1,0,0},
{"incrby",incrbyCommand,3,"wmF",0,NULL,1,1,1,0,0},
{"decrby",decrbyCommand,3,"wmF",0,NULL,1,1,1,0,0},
{"incrbyfloat",incrbyfloatCommand,3,"wmF",0,NULL,1,1,1,0,0},
{"getset",getsetCommand,3,"wm",0,NULL,1,1,1,0,0},
{"mset",msetCommand,-3,"wm",0,NULL,1,-1,2,0,0},
{"msetnx",msetnxCommand,-3,"wm",0,NULL,1,-1,2,0,0},
{"randomkey",randomkeyCommand,1,"rR",0,NULL,0,0,0,0,0},
{"select",selectCommand,2,"lF",0,NULL,0,0,0,0,0},
{"swapdb",swapdbCommand,3,"wF",0,NULL,0,0,0,0,0},
{"move",moveCommand,3,"wF",0,NULL,1,1,1,0,0},
{"rename",renameCommand,3,"w",0,NULL,1,2,1,0,0},
{"renamenx",renamenxCommand,3,"wF",0,NULL,1,2,1,0,0},
{"expire",expireCommand,3,"wF",0,NULL,1,1,1,0,0},
{"expireat",expireatCommand,3,"wF",0,NULL,1,1,1,0,0},
{"pexpire",pexpireCommand,3,"wF",0,NULL,1,1,1,0,0},
{"pexpireat",pexpireatCommand,3,"wF",0,NULL,1,1,1,0,0},
{"keys",keysCommand,2,"rS",0,NULL,0,0,0,0,0},
{"scan",scanCommand,-2,"rR",0,NULL,0,0,0,0,0},
{"dbsize",dbsizeCommand,1,"rF",0,NULL,0,0,0,0,0},
{"auth",authCommand,2,"sltF",0,NULL,0,0,0,0,0},
{"ping",pingCommand,-1,"tF",0,NULL,0,0,0,0,0},
{"echo",echoCommand,2,"F",0,NULL,0,0,0,0,0},
{"save",saveCommand,1,"as",0,NULL,0,0,0,0,0},
{"bgsave",bgsaveCommand,-1,"as",0,NULL,0,0,0,0,0},
{"bgrewriteaof",bgrewriteaofCommand,1,"as",0,NULL,0,0,0,0,0},
{"shutdown",shutdownCommand,-1,"aslt",0,NULL,0,0,0,0,0},
{"lastsave",lastsaveCommand,1,"RF",0,NULL,0,0,0,0,0},
{"type",typeCommand,2,"rF",0,NULL,1,1,1,0,0},
{"multi",multiCommand,1,"sF",0,NULL,0,0,0,0,0},
{"exec",execCommand,1,"sM",0,NULL,0,0,0,0,0},
{"discard",discardCommand,1,"sF",0,NULL,0,0,0,0,0},
{"sync",syncCommand,1,"ars",0,NULL,0,0,0,0,0},
{"psync",syncCommand,3,"ars",0,NULL,0,0,0,0,0},
{"replconf",replconfCommand,-1,"aslt",0,NULL,0,0,0,0,0},
{"flushdb",flushdbCommand,-1,"w",0,NULL,0,0,0,0,0},
{"flushall",flushallCommand,-1,"w",0,NULL,0,0,0,0,0},
{"sort",sortCommand,-2,"wm",0,sortGetKeys,1,1,1,0,0},
{"info",infoCommand,-1,"ltR",0,NULL,0,0,0,0,0},
{"monitor",monitorCommand,1,"as",0,NULL,0,0,0,0,0},
{"ttl",ttlCommand,2,"rFR",0,NULL,1,1,1,0,0},
{"touch",touchCommand,-2,"rF",0,NULL,1,1,1,0,0},
{"pttl",pttlCommand,2,"rFR",0,NULL,1,1,1,0,0},
{"persist",persistCommand,2,"wF",0,NULL,1,1,1,0,0},
{"slaveof",replicaofCommand,3,"ast",0,NULL,0,0,0,0,0},
{"replicaof",replicaofCommand,3,"ast",0,NULL,0,0,0,0,0},
{"role",roleCommand,1,"lst",0,NULL,0,0,0,0,0},
{"debug",debugCommand,-2,"as",0,NULL,0,0,0,0,0},
{"config",configCommand,-2,"last",0,NULL,0,0,0,0,0},
{"subscribe",subscribeCommand,-2,"pslt",0,NULL,0,0,0,0,0},
{"unsubscribe",unsubscribeCommand,-1,"pslt",0,NULL,0,0,0,0,0},
{"psubscribe",psubscribeCommand,-2,"pslt",0,NULL,0,0,0,0,0},
{"punsubscribe",punsubscribeCommand,-1,"pslt",0,NULL,0,0,0,0,0},
{"publish",publishCommand,3,"pltF",0,NULL,0,0,0,0,0},
{"pubsub",pubsubCommand,-2,"pltR",0,NULL,0,0,0,0,0},
{"watch",watchCommand,-2,"sF",0,NULL,1,-1,1,0,0},
{"unwatch",unwatchCommand,1,"sF",0,NULL,0,0,0,0,0},
{"cluster",clusterCommand,-2,"a",0,NULL,0,0,0,0,0},
{"restore",restoreCommand,-4,"wm",0,NULL,1,1,1,0,0},
{"restore-asking",restoreCommand,-4,"wmk",0,NULL,1,1,1,0,0},
{"migrate",migrateCommand,-6,"wR",0,migrateGetKeys,0,0,0,0,0},
{"asking",askingCommand,1,"F",0,NULL,0,0,0,0,0},
{"readonly",readonlyCommand,1,"F",0,NULL,0,0,0,0,0},
{"readwrite",readwriteCommand,1,"F",0,NULL,0,0,0,0,0},
{"dump",dumpCommand,2,"rR",0,NULL,1,1,1,0,0},
{"object",objectCommand,-2,"rR",0,NULL,2,2,1,0,0},
{"memory",memoryCommand,-2,"rR",0,NULL,0,0,0,0,0},
{"client",clientCommand,-2,"as",0,NULL,0,0,0,0,0},
{"eval",evalCommand,-3,"s",0,evalGetKeys,0,0,0,0,0},
{"evalsha",evalShaCommand,-3,"s",0,evalGetKeys,0,0,0,0,0},
{"slowlog",slowlogCommand,-2,"aR",0,NULL,0,0,0,0,0},
{"script",scriptCommand,-2,"s",0,NULL,0,0,0,0,0},
{"time",timeCommand,1,"RF",0,NULL,0,0,0,0,0},
{"bitop",bitopCommand,-4,"wm",0,NULL,2,-1,1,0,0},
{"bitcount",bitcountCommand,-2,"r",0,NULL,1,1,1,0,0},
{"bitpos",bitposCommand,-3,"r",0,NULL,1,1,1,0,0},
{"wait",waitCommand,3,"s",0,NULL,0,0,0,0,0},
{"command",commandCommand,0,"ltR",0,NULL,0,0,0,0,0},
{"geoadd",geoaddCommand,-5,"wm",0,NULL,1,1,1,0,0},
{"georadius",georadiusCommand,-6,"w",0,georadiusGetKeys,1,1,1,0,0},
{"georadius_ro",georadiusroCommand,-6,"r",0,georadiusGetKeys,1,1,1,0,0},
{"georadiusbymember",georadiusbymemberCommand,-5,"w",0,georadiusGetKeys,1,1,1,0,0},
{"georadiusbymember_ro",georadiusbymemberroCommand,-5,"r",0,georadiusGetKeys,1,1,1,0,0},
{"geohash",geohashCommand,-2,"r",0,NULL,1,1,1,0,0},
{"geopos",geoposCommand,-2,"r",0,NULL,1,1,1,0,0},
{"geodist",geodistCommand,-4,"r",0,NULL,1,1,1,0,0},
{"pfselftest",pfselftestCommand,1,"a",0,NULL,0,0,0,0,0},
{"pfadd",pfaddCommand,-2,"wmF",0,NULL,1,1,1,0,0},
{"pfcount",pfcountCommand,-2,"r",0,NULL,1,-1,1,0,0},
{"pfmerge",pfmergeCommand,-2,"wm",0,NULL,1,-1,1,0,0},
{"pfdebug",pfdebugCommand,-3,"w",0,NULL,0,0,0,0,0},
{"xadd",xaddCommand,-5,"wmFR",0,NULL,1,1,1,0,0},
{"xrange",xrangeCommand,-4,"r",0,NULL,1,1,1,0,0},
{"xrevrange",xrevrangeCommand,-4,"r",0,NULL,1,1,1,0,0},
{"xlen",xlenCommand,2,"rF",0,NULL,1,1,1,0,0},
{"xread",xreadCommand,-4,"rs",0,xreadGetKeys,1,1,1,0,0},
{"xreadgroup",xreadCommand,-7,"ws",0,xreadGetKeys,1,1,1,0,0},
{"xgroup",xgroupCommand,-2,"wm",0,NULL,2,2,1,0,0},
{"xsetid",xsetidCommand,3,"wmF",0,NULL,1,1,1,0,0},
{"xack",xackCommand,-4,"wF",0,NULL,1,1,1,0,0},
{"xpending",xpendingCommand,-3,"rR",0,NULL,1,1,1,0,0},
{"xclaim",xclaimCommand,-6,"wRF",0,NULL,1,1,1,0,0},
{"xinfo",xinfoCommand,-2,"rR",0,NULL,2,2,1,0,0},
{"xdel",xdelCommand,-3,"wF",0,NULL,1,1,1,0,0},
{"xtrim",xtrimCommand,-2,"wFR",0,NULL,1,1,1,0,0},
{"post",securityWarningCommand,-1,"lt",0,NULL,0,0,0,0,0},
{"host:",securityWarningCommand,-1,"lt",0,NULL,0,0,0,0,0},
{"latency",latencyCommand,-2,"aslt",0,NULL,0,0,0,0,0},
{"lolwut",lolwutCommand,-1,"r",0,NULL,0,0,0,0,0}
};
3)初始化sentinel的状态
/* Main state. */
struct sentinelState {
char myid[CONFIG_RUN_ID_SIZE+1]; /* This sentinel ID. */
uint64_t current_epoch; /* Current epoch. */
dict *masters; /* Dictionary of master sentinelRedisInstances.
Key is the instance name, value is the
sentinelRedisInstance structure pointer. */
int tilt; /* Are we in TILT mode? */
int running_scripts; /* Number of scripts in execution right now. */
mstime_t tilt_start_time; /* When TITL started. */
mstime_t previous_time; /* Last time we ran the time handler. */
list *scripts_queue; /* Queue of user scripts to execute. */
char *announce_ip; /* IP addr that is gossiped to other sentinels if
not NULL. */
int announce_port; /* Port that is gossiped to other sentinels if
non zero. */
unsigned long simfailure_flags; /* Failures simulation. */
int deny_scripts_reconfig; /* Allow SENTINEL SET ... to change script
paths at runtime? */
} sentinel;
typedef struct sentinelRedisInstance {
int flags; /* See SRI_... defines */
char *name; /* Master name from the point of view of this sentinel. */
char *runid; /* Run ID of this instance, or unique ID if is a Sentinel.*/
uint64_t config_epoch; /* Configuration epoch. */
sentinelAddr *addr; /* Master host. */
instanceLink *link; /* Link to the instance, may be shared for Sentinels. */
mstime_t last_pub_time; /* Last time we sent hello via Pub/Sub. */
mstime_t last_hello_time; /* Only used if SRI_SENTINEL is set. Last time
we received a hello from this Sentinel
via Pub/Sub. */
mstime_t last_master_down_reply_time; /* Time of last reply to
SENTINEL is-master-down command. */
mstime_t s_down_since_time; /* Subjectively down since time. */
mstime_t o_down_since_time; /* Objectively down since time. */
mstime_t down_after_period; /* Consider it down after that period. */
mstime_t info_refresh; /* Time at which we received INFO output from it. */
dict *renamed_commands; /* Commands renamed in this instance:
Sentinel will use the alternative commands
mapped on this table to send things like
SLAVEOF, CONFING, INFO, ... */
/* Role and the first time we observed it.
* This is useful in order to delay replacing what the instance reports
* with our own configuration. We need to always wait some time in order
* to give a chance to the leader to report the new configuration before
* we do silly things. */
int role_reported;
mstime_t role_reported_time;
mstime_t slave_conf_change_time; /* Last time slave master addr changed. */
/* Master specific. */
dict *sentinels; /* Other sentinels monitoring the same master. */
dict *slaves; /* Slaves for this master instance. */
unsigned int quorum;/* Number of sentinels that need to agree on failure. */
int parallel_syncs; /* How many slaves to reconfigure at same time. */
char *auth_pass; /* Password to use for AUTH against master & slaves. */
4)根据配置文件,初始化sentinel的监视列表
/* Slave specific. */
mstime_t master_link_down_time; /* Slave replication link down time. */
int slave_priority; /* Slave priority according to its INFO output. */
mstime_t slave_reconf_sent_time; /* Time at which we sent SLAVE OF <new> */
struct sentinelRedisInstance *master; /* Master instance if it's slave. */
char *slave_master_host; /* Master host as reported by INFO */
int slave_master_port; /* Master port as reported by INFO */
int slave_master_link_status; /* Master link status as reported by INFO */
unsigned long long slave_repl_offset; /* Slave replication offset. */
/* Failover */
char *leader; /* If this is a master instance, this is the runid of
the Sentinel that should perform the failover. If
this is a Sentinel, this is the runid of the Sentinel
that this Sentinel voted as leader. */
uint64_t leader_epoch; /* Epoch of the 'leader' field. */
uint64_t failover_epoch; /* Epoch of the currently started failover. */
int failover_state; /* See SENTINEL_FAILOVER_STATE_* defines. */
mstime_t failover_state_change_time;
mstime_t failover_start_time; /* Last failover attempt start time. */
mstime_t failover_timeout; /* Max time to refresh failover state. */
mstime_t failover_delay_logged; /* For what failover_start_time value we
logged the failover delay. */
struct sentinelRedisInstance *promoted_slave; /* Promoted slave instance. */
/* Scripts executed to notify admin or reconfigure clients: when they
* are set to NULL no script is executed. */
char *notification_script;
char *client_reconfig_script;
sds info; /* cached INFO output */
} sentinelRedisInstance;
sentinelRedisInstance代表被sentinel监视的一个服务器实例 ,sentinelRedisInstance.addr代表实例的ip和端口
5)创建向主服务器的网络链接
开启两个链接,一个是命令连接,一个是订阅__sentinel__::hello频道的所有数据,那么为什么开启两个链接呢,就是在当前的发部和订阅里,不保存已经发过的命令,就好比一条鱼在热水里说完我好热,完事他就忘了。若服务器发完信息,但当前客户端不在线,那么就会永远也收不到这条信息了,所有为了解决这个问题,就需要订阅;
获取服务器的信息
每10s向服务器发送一个INFO,类似于心跳,然后sentinel在这个INFO的回复里解析出服务器的信息,
发送的信息有,主服务器的run_id和role和他的从服务器的参数;
对于多个sentinel监控一台主服务器的情况,只要其中一台订阅了hello频道的消息,其他sentinel也会收到hello频道的消息,并且根据run_id的不同来判断是否要进行主从服务器的更新。
__sentinel::hello__
默认情况下:
收到这样的消息,其中s_ip s_port等带s开头的都是sentinel的属性,其他的m_开头的是主服务器的属性,sentinel如果监控的是从服务器,那么m_开头的属性是其复制主服务器的属性;
会根据上述收到的hello频道的消息更新每个sentinels
sentinel相互之间也会连接,并且会两两相连,每2s会向所有的连接发送ping命令,有效回复是pong -LOADING -MASTERDOWN,在seninel.cnf中有down_after_milliseconds选项,是设置多少毫秒被判定为下线的,若下线了就会把RSI_S_DOWN设置,表示已经下线。
使用
命令客观判断是否主服务器下线,所谓的客观判断是总结所有sentinel的结果判断是否下线。
接下来进行选举sentinel头领,然后再进行主服务器的故障转移
选sentinel的规则如下(实际上用到了raft算法):
1)每个sentinel都有可能成为带头大哥,基于先到先得策略,谁最先检测出主服务器下线,并且发送客观检测,得到主服务器下线就会成为临时带头大哥,此时配置纪元+1。
2)选举出临时的大哥后,他所在的局部的sentinel都将成为临时大哥的小弟。直到大多数sentinel都成为临时大哥的小弟后,大哥升级成为真大哥,这里说下大多数是指超过sentinel的数一半。
3)然后进行选举新的主服务器,过程是选取一个状态最好的偏移量最大的
这章主要介绍了哨兵模式,从命令启动哨兵到哨兵的结构,再到哨兵和主服务器和从服务器以及其他哨兵的通信过程,再到订阅和发送命令,再到其他节点的下线检测,再到sentinel大哥的选举,再到主服务的故障转移的过程。