哨兵在master发生故障时,能自动从replica中选举新的master,而我们配置哨兵时,只配置了master,那哨兵是如何知道master有哪些replica呢?
一、哨兵主动向master询问
哨兵在向master发送PING命令进行监控master状态的同时,也会不间断的发送INFO命令。
int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) ->
void sentinelTimer(void) ->
void sentinelHandleDictOfRedisInstances(sentinel.masters); ->
void sentinelHandleRedisInstance(ri); ->
void sentinelSendPeriodicCommands(ri);
可以看出向master是10s发送一次INFO 命令。
/* Note: times are in milliseconds. */
#define SENTINEL_INFO_PERIOD 10000
void sentinelSendPeriodicCommands(sentinelRedisInstance *ri) {
mstime_t now = mstime();
mstime_t info_period;
int retval;
...
/* If this is a slave of a master in O_DOWN condition we start sending
* it INFO every second, instead of the usual SENTINEL_INFO_PERIOD
* period. In this state we want to closely monitor slaves in case they
* are turned into masters by another Sentinel, or by the sysadmin.
*
* Similarly we monitor the INFO output more often if the slave reports
* to be disconnected from the master, so that we can have a fresh
* disconnection time figure. */
if ((ri->flags & SRI_SLAVE) &&
((ri->master->flags & (SRI_O_DOWN|SRI_FAILOVER_IN_PROGRESS)) ||
(ri->master_link_down_time != 0)))
{
info_period = 1000;
} else {
info_period = SENTINEL_INFO_PERIOD;
}
/* We ping instances every time the last received pong is older than
* the configured 'down-after-milliseconds' time, but every second
* anyway if 'down-after-milliseconds' is greater than 1 second. */
ping_period = ri->down_after_period;
if (ping_period > SENTINEL_PING_PERIOD) ping_period = SENTINEL_PING_PERIOD;
/* Send INFO to masters and slaves, not sentinels. */
if ((ri->flags & SRI_SENTINEL) == 0 &&
(ri->info_refresh == 0 ||
(now - ri->info_refresh) > info_period))
{
retval = redisAsyncCommand(ri->link->cc,
sentinelInfoReplyCallback, ri, "%s",
sentinelInstanceMapCommand(ri,"INFO"));
if (retval == C_OK) ri->link->pending_commands++;
}
...
}
二、master回复哨兵
对于master在收到INFO命令时的处理函数如下
struct redisCommand redisCommandTable[] = {
...
{"info",infoCommand,-1,
"ok-loading ok-stale random @dangerous",
0,NULL,0,0,0,0,0,0},
...
};
通过命令处理函数可以看出,INFO命令可以指定参数,即查看哪部分信息,而哨兵发送的是不带参数的INFO命令,所以使用default部分进行查询,并且INFO命令最多只能有一个参数。
void infoCommand(client *c) {
char *section = c->argc == 2 ? c->argv[1]->ptr : "default";
if (c->argc > 2) {
addReplyErrorObject(c,shared.syntaxerr);
return;
}
sds info = genRedisInfoString(section);
addReplyVerbatim(c,info,sdslen(info),"txt");
sdsfree(info);
}
其中关键的函数为genRedisInfoString,根据不同的选择进行获取不同下信息。
/* Create the string returned by the INFO command. This is decoupled
* by the INFO command itself as we need to report the same information
* on memory corruption problems. */
sds genRedisInfoString(const char *section) {
sds info = sdsempty();
time_t uptime = server.unixtime-server.stat_starttime;
int j;
int allsections = 0, defsections = 0, everything = 0, modules = 0;
int sections = 0;
if (section == NULL) section = "default";
allsections = strcasecmp(section,"all") == 0;
defsections = strcasecmp(section,"default") == 0;
everything = strcasecmp(section,"everything") == 0;
modules = strcasecmp(section,"modules") == 0;
if (everything) allsections = 1;
....
return info;
}
2.1 服务器信息
/* Server */
if (allsections || defsections || !strcasecmp(section,"server")) {
static int call_uname = 1;
static struct utsname name;
char *mode;
char *supervised;
if (server.cluster_enabled) mode = "cluster";
else if (server.sentinel_mode) mode = "sentinel";
else mode = "standalone";
if (server.supervised) {
if (server.supervised_mode == SUPERVISED_UPSTART) supervised = "upstart";
else if (server.supervised_mode == SUPERVISED_SYSTEMD) supervised = "systemd";
else supervised = "unknown";
} else {
supervised = "no";
}
if (sections++) info = sdscat(info,"\r\n");
if (call_uname) {
/* Uname can be slow and is always the same output. Cache it. */
uname(&name);
call_uname = 0;
}
unsigned int lruclock;
atomicGet(server.lruclock,lruclock);
info = sdscatfmt(info,
"# Server\r\n"
"redis_version:%s\r\n"
"redis_git_sha1:%s\r\n"
"redis_git_dirty:%i\r\n"
"redis_build_id:%s\r\n"
"redis_mode:%s\r\n"
"os:%s %s %s\r\n"
"arch_bits:%i\r\n"
"multiplexing_api:%s\r\n"
"atomicvar_api:%s\r\n"
"gcc_version:%i.%i.%i\r\n"
"process_id:%I\r\n"
"process_supervised:%s\r\n"
"run_id:%s\r\n"
"tcp_port:%i\r\n"
"server_time_usec:%I\r\n"
"uptime_in_seconds:%I\r\n"
"uptime_in_days:%I\r\n"
"hz:%i\r\n"
"configured_hz:%i\r\n"
"lru_clock:%u\r\n"
"executable:%s\r\n"
"config_file:%s\r\n"
"io_threads_active:%i\r\n",
REDIS_VERSION,
redisGitSHA1(),
strtol(redisGitDirty(),NULL,10) > 0,
redisBuildIdString(),
mode,
name.sysname, name.release, name.machine,
server.arch_bits,
aeGetApiName(),
REDIS_ATOMIC_API,
#ifdef __GNUC__
__GNUC__,__GNUC_MINOR__,__GNUC_PATCHLEVEL__,
#else
0,0,0,
#endif
(int64_t) getpid(),
supervised,
server.runid,
server.port ? server.port : server.tls_port,
(int64_t)server.ustime,
(int64_t)uptime,
(int64_t)(uptime/(3600*24)),
server.hz,
server.config_hz,
lruclock,
server.executable ? server.executable : "",
server.configfile ? server.configfile : "",
server.io_threads_active);
}
对于os信息是不会变的,所以使用了static的变量进行缓存,后续就不用去获取了。对于我们在设计时,如果频繁的调用某个函数,并且某些信息不变,而且获取这些信息还耗时,则可以使用缓存,提升速度,类似以空间换时间。
2.2 客户端信息
/* Clients */
if (allsections || defsections || !strcasecmp(section,"clients")) {
size_t maxin, maxout;
getExpansiveClientsInfo(&maxin,&maxout);
if (sections++) info = sdscat(info,"\r\n");
info = sdscatprintf(info,
"# Clients\r\n"
"connected_clients:%lu\r\n"
"cluster_connections:%lu\r\n"
"maxclients:%u\r\n"
"client_recent_max_input_buffer:%zu\r\n"
"client_recent_max_output_buffer:%zu\r\n"
"blocked_clients:%d\r\n"
"tracking_clients:%d\r\n"
"clients_in_timeout_table:%llu\r\n",
listLength(server.clients)-listLength(server.slaves),
getClusterConnectionsCount(),
server.maxclients,
maxin, maxout,
server.blocked_clients,
server.tracking_clients,
(unsigned long long) raxSize(server.clients_timeout_table));
}
对于client_recent_max_input_buffer和client_recent_max_output_buffer,并不是所有的client中最大的输入输出buffer,而是采样了8个client进行的统计。对于redis成千上万的client,如果都去遍历一次来获取最大的buffer大小,很耗时,而且这种统计信息不需要非常精确的值,所以使用采样的思想非常好。
2.3 内存信息
/* Memory */
if (allsections || defsections || !strcasecmp(section,"memory")) {
char hmem[64];
char peak_hmem[64];
char total_system_hmem[64];
char used_memory_lua_hmem[64];
char used_memory_scripts_hmem[64];
char used_memory_rss_hmem[64];
char maxmemory_hmem[64];
size_t zmalloc_used = zmalloc_used_memory();
size_t total_system_mem = server.system_memory_size;
const char *evict_policy = evictPolicyToString();
long long memory_lua = server.lua ? (long long)lua_gc(server.lua,LUA_GCCOUNT,0)*1024 : 0;
struct redisMemOverhead *mh = getMemoryOverheadData();
/* Peak memory is updated from time to time by serverCron() so it
* may happen that the instantaneous value is slightly bigger than
* the peak value. This may confuse users, so we update the peak
* if found smaller than the current memory usage. */
if (zmalloc_used > server.stat_peak_memory)
server.stat_peak_memory = zmalloc_used;
bytesToHuman(hmem,zmalloc_used);
bytesToHuman(peak_hmem,server.stat_peak_memory);
bytesToHuman(total_system_hmem,total_system_mem);
bytesToHuman(used_memory_lua_hmem,memory_lua);
bytesToHuman(used_memory_scripts_hmem,mh->lua_caches);
bytesToHuman(used_memory_rss_hmem,server.cron_malloc_stats.process_rss);
bytesToHuman(maxmemory_hmem,server.maxmemory);
if (sections++) info = sdscat(info,"\r\n");
info = sdscatprintf(info,
"# Memory\r\n"
"used_memory:%zu\r\n"
"used_memory_human:%s\r\n"
"used_memory_rss:%zu\r\n"
"used_memory_rss_human:%s\r\n"
"used_memory_peak:%zu\r\n"
"used_memory_peak_human:%s\r\n"
"used_memory_peak_perc:%.2f%%\r\n"
"used_memory_overhead:%zu\r\n"
"used_memory_startup:%zu\r\n"
"used_memory_dataset:%zu\r\n"
"used_memory_dataset_perc:%.2f%%\r\n"
"allocator_allocated:%zu\r\n"
"allocator_active:%zu\r\n"
"allocator_resident:%zu\r\n"
"total_system_memory:%lu\r\n"
"total_system_memory_human:%s\r\n"
"used_memory_lua:%lld\r\n"
"used_memory_lua_human:%s\r\n"
"used_memory_scripts:%lld\r\n"
"used_memory_scripts_human:%s\r\n"
"number_of_cached_scripts:%lu\r\n"
"maxmemory:%lld\r\n"
"maxmemory_human:%s\r\n"
"maxmemory_policy:%s\r\n"
"allocator_frag_ratio:%.2f\r\n"
"allocator_frag_bytes:%zu\r\n"
"allocator_rss_ratio:%.2f\r\n"
"allocator_rss_bytes:%zd\r\n"
"rss_overhead_ratio:%.2f\r\n"
"rss_overhead_bytes:%zd\r\n"
"mem_fragmentation_ratio:%.2f\r\n"
"mem_fragmentation_bytes:%zd\r\n"
"mem_not_counted_for_evict:%zu\r\n"
"mem_replication_backlog:%zu\r\n"
"mem_clients_slaves:%zu\r\n"
"mem_clients_normal:%zu\r\n"
"mem_aof_buffer:%zu\r\n"
"mem_allocator:%s\r\n"
"active_defrag_running:%d\r\n"
"lazyfree_pending_objects:%zu\r\n"
"lazyfreed_objects:%zu\r\n",
zmalloc_used,
hmem,
server.cron_malloc_stats.process_rss,
used_memory_rss_hmem,
server.stat_peak_memory,
peak_hmem,
mh->peak_perc,
mh->overhead_total,
mh->startup_allocated,
mh->dataset,
mh->dataset_perc,
server.cron_malloc_stats.allocator_allocated,
server.cron_malloc_stats.allocator_active,
server.cron_malloc_stats.allocator_resident,
(unsigned long)total_system_mem,
total_system_hmem,
memory_lua,
used_memory_lua_hmem,
(long long) mh->lua_caches,
used_memory_scripts_hmem,
dictSize(server.lua_scripts),
server.maxmemory,
maxmemory_hmem,
evict_policy,
mh->allocator_frag,
mh->allocator_frag_bytes,
mh->allocator_rss,
mh->allocator_rss_bytes,
mh->rss_extra,
mh->rss_extra_bytes,
mh->total_frag, /* This is the total RSS overhead, including
fragmentation, but not just it. This field
(and the next one) is named like that just
for backward compatibility. */
mh->total_frag_bytes,
freeMemoryGetNotCountedMemory(),
mh->repl_backlog,
mh->clients_slaves,
mh->clients_normal,
mh->aof_buffer,
ZMALLOC_LIB,
server.active_defrag_running,
lazyfreeGetPendingObjectsCount(),
lazyfreeGetFreedObjectsCount()
);
freeMemoryOverheadData(mh);
}
2.4 持久化信息
/* Persistence */
if (allsections || defsections || !strcasecmp(section,"persistence")) {
if (sections++) info = sdscat(info,"\r\n");
double fork_perc = 0;
if (server.stat_module_progress) {
fork_perc = server.stat_module_progress * 100;
} else if (server.stat_current_save_keys_total) {
fork_perc = ((double)server.stat_current_save_keys_processed / server.stat_current_save_keys_total) * 100;
}
int aof_bio_fsync_status;
atomicGet(server.aof_bio_fsync_status,aof_bio_fsync_status);
info = sdscatprintf(info,
"# Persistence\r\n"
"loading:%d\r\n"
"current_cow_size:%zu\r\n"
"current_cow_size_age:%lu\r\n"
"current_fork_perc:%.2f\r\n"
"current_save_keys_processed:%zu\r\n"
"current_save_keys_total:%zu\r\n"
"rdb_changes_since_last_save:%lld\r\n"
"rdb_bgsave_in_progress:%d\r\n"
"rdb_last_save_time:%jd\r\n"
"rdb_last_bgsave_status:%s\r\n"
"rdb_last_bgsave_time_sec:%jd\r\n"
"rdb_current_bgsave_time_sec:%jd\r\n"
"rdb_last_cow_size:%zu\r\n"
"aof_enabled:%d\r\n"
"aof_rewrite_in_progress:%d\r\n"
"aof_rewrite_scheduled:%d\r\n"
"aof_last_rewrite_time_sec:%jd\r\n"
"aof_current_rewrite_time_sec:%jd\r\n"
"aof_last_bgrewrite_status:%s\r\n"
"aof_last_write_status:%s\r\n"
"aof_last_cow_size:%zu\r\n"
"module_fork_in_progress:%d\r\n"
"module_fork_last_cow_size:%zu\r\n",
(int)server.loading,
server.stat_current_cow_bytes,
server.stat_current_cow_updated ? (unsigned long) elapsedMs(server.stat_current_cow_updated) / 1000 : 0,
fork_perc,
server.stat_current_save_keys_processed,
server.stat_current_save_keys_total,
server.dirty,
server.child_type == CHILD_TYPE_RDB,
(intmax_t)server.lastsave,
(server.lastbgsave_status == C_OK) ? "ok" : "err",
(intmax_t)server.rdb_save_time_last,
(intmax_t)((server.child_type != CHILD_TYPE_RDB) ?
-1 : time(NULL)-server.rdb_save_time_start),
server.stat_rdb_cow_bytes,
server.aof_state != AOF_OFF,
server.child_type == CHILD_TYPE_AOF,
server.aof_rewrite_scheduled,
(intmax_t)server.aof_rewrite_time_last,
(intmax_t)((server.child_type != CHILD_TYPE_AOF) ?
-1 : time(NULL)-server.aof_rewrite_time_start),
(server.aof_lastbgrewrite_status == C_OK) ? "ok" : "err",
(server.aof_last_write_status == C_OK &&
aof_bio_fsync_status == C_OK) ? "ok" : "err",
server.stat_aof_cow_bytes,
server.child_type == CHILD_TYPE_MODULE,
server.stat_module_cow_bytes);
if (server.aof_enabled) {
info = sdscatprintf(info,
"aof_current_size:%lld\r\n"
"aof_base_size:%lld\r\n"
"aof_pending_rewrite:%d\r\n"
"aof_buffer_length:%zu\r\n"
"aof_rewrite_buffer_length:%lu\r\n"
"aof_pending_bio_fsync:%llu\r\n"
"aof_delayed_fsync:%lu\r\n",
(long long) server.aof_current_size,
(long long) server.aof_rewrite_base_size,
server.aof_rewrite_scheduled,
sdslen(server.aof_buf),
aofRewriteBufferSize(),
bioPendingJobsOfType(BIO_AOF_FSYNC),
server.aof_delayed_fsync);
}
if (server.loading) {
double perc = 0;
time_t eta, elapsed;
off_t remaining_bytes = 1;
if (server.loading_total_bytes) {
perc = ((double)server.loading_loaded_bytes / server.loading_total_bytes) * 100;
remaining_bytes = server.loading_total_bytes - server.loading_loaded_bytes;
} else if(server.loading_rdb_used_mem) {
perc = ((double)server.loading_loaded_bytes / server.loading_rdb_used_mem) * 100;
remaining_bytes = server.loading_rdb_used_mem - server.loading_loaded_bytes;
/* used mem is only a (bad) estimation of the rdb file size, avoid going over 100% */
if (perc > 99.99) perc = 99.99;
if (remaining_bytes < 1) remaining_bytes = 1;
}
elapsed = time(NULL)-server.loading_start_time;
if (elapsed == 0) {
eta = 1; /* A fake 1 second figure if we don't have
enough info */
} else {
eta = (elapsed*remaining_bytes)/(server.loading_loaded_bytes+1);
}
info = sdscatprintf(info,
"loading_start_time:%jd\r\n"
"loading_total_bytes:%llu\r\n"
"loading_rdb_used_mem:%llu\r\n"
"loading_loaded_bytes:%llu\r\n"
"loading_loaded_perc:%.2f\r\n"
"loading_eta_seconds:%jd\r\n",
(intmax_t) server.loading_start_time,
(unsigned long long) server.loading_total_bytes,
(unsigned long long) server.loading_rdb_used_mem,
(unsigned long long) server.loading_loaded_bytes,
perc,
(intmax_t)eta
);
}
}
2.5 统计信息
/* Stats */
if (allsections || defsections || !strcasecmp(section,"stats")) {
long long stat_total_reads_processed, stat_total_writes_processed;
long long stat_net_input_bytes, stat_net_output_bytes;
atomicGet(server.stat_total_reads_processed, stat_total_reads_processed);
atomicGet(server.stat_total_writes_processed, stat_total_writes_processed);
atomicGet(server.stat_net_input_bytes, stat_net_input_bytes);
atomicGet(server.stat_net_output_bytes, stat_net_output_bytes);
if (sections++) info = sdscat(info,"\r\n");
info = sdscatprintf(info,
"# Stats\r\n"
"total_connections_received:%lld\r\n"
"total_commands_processed:%lld\r\n"
"instantaneous_ops_per_sec:%lld\r\n"
"total_net_input_bytes:%lld\r\n"
"total_net_output_bytes:%lld\r\n"
"instantaneous_input_kbps:%.2f\r\n"
"instantaneous_output_kbps:%.2f\r\n"
"rejected_connections:%lld\r\n"
"sync_full:%lld\r\n"
"sync_partial_ok:%lld\r\n"
"sync_partial_err:%lld\r\n"
"expired_keys:%lld\r\n"
"expired_stale_perc:%.2f\r\n"
"expired_time_cap_reached_count:%lld\r\n"
"expire_cycle_cpu_milliseconds:%lld\r\n"
"evicted_keys:%lld\r\n"
"keyspace_hits:%lld\r\n"
"keyspace_misses:%lld\r\n"
"pubsub_channels:%ld\r\n"
"pubsub_patterns:%lu\r\n"
"latest_fork_usec:%lld\r\n"
"total_forks:%lld\r\n"
"migrate_cached_sockets:%ld\r\n"
"slave_expires_tracked_keys:%zu\r\n"
"active_defrag_hits:%lld\r\n"
"active_defrag_misses:%lld\r\n"
"active_defrag_key_hits:%lld\r\n"
"active_defrag_key_misses:%lld\r\n"
"tracking_total_keys:%lld\r\n"
"tracking_total_items:%lld\r\n"
"tracking_total_prefixes:%lld\r\n"
"unexpected_error_replies:%lld\r\n"
"total_error_replies:%lld\r\n"
"dump_payload_sanitizations:%lld\r\n"
"total_reads_processed:%lld\r\n"
"total_writes_processed:%lld\r\n"
"io_threaded_reads_processed:%lld\r\n"
"io_threaded_writes_processed:%lld\r\n",
server.stat_numconnections,
server.stat_numcommands,
getInstantaneousMetric(STATS_METRIC_COMMAND),
stat_net_input_bytes,
stat_net_output_bytes,
(float)getInstantaneousMetric(STATS_METRIC_NET_INPUT)/1024,
(float)getInstantaneousMetric(STATS_METRIC_NET_OUTPUT)/1024,
server.stat_rejected_conn,
server.stat_sync_full,
server.stat_sync_partial_ok,
server.stat_sync_partial_err,
server.stat_expiredkeys,
server.stat_expired_stale_perc*100,
server.stat_expired_time_cap_reached_count,
server.stat_expire_cycle_time_used/1000,
server.stat_evictedkeys,
server.stat_keyspace_hits,
server.stat_keyspace_misses,
dictSize(server.pubsub_channels),
dictSize(server.pubsub_patterns),
server.stat_fork_time,
server.stat_total_forks,
dictSize(server.migrate_cached_sockets),
getSlaveKeyWithExpireCount(),
server.stat_active_defrag_hits,
server.stat_active_defrag_misses,
server.stat_active_defrag_key_hits,
server.stat_active_defrag_key_misses,
(unsigned long long) trackingGetTotalKeys(),
(unsigned long long) trackingGetTotalItems(),
(unsigned long long) trackingGetTotalPrefixes(),
server.stat_unexpected_error_replies,
server.stat_total_error_replies,
server.stat_dump_payload_sanitizations,
stat_total_reads_processed,
stat_total_writes_processed,
server.stat_io_reads_processed,
server.stat_io_writes_processed);
}
2.6 replicate信息
/* Replication */
if (allsections || defsections || !strcasecmp(section,"replication")) {
if (sections++) info = sdscat(info,"\r\n");
info = sdscatprintf(info,
"# Replication\r\n"
"role:%s\r\n",
server.masterhost == NULL ? "master" : "slave");
if (server.masterhost) {
long long slave_repl_offset = 1;
long long slave_read_repl_offset = 1;
if (server.master) {
slave_repl_offset = server.master->reploff;
slave_read_repl_offset = server.master->read_reploff;
} else if (server.cached_master) {
slave_repl_offset = server.cached_master->reploff;
slave_read_repl_offset = server.cached_master->read_reploff;
}
info = sdscatprintf(info,
"master_host:%s\r\n"
"master_port:%d\r\n"
"master_link_status:%s\r\n"
"master_last_io_seconds_ago:%d\r\n"
"master_sync_in_progress:%d\r\n"
"slave_read_repl_offset:%lld\r\n"
"slave_repl_offset:%lld\r\n"
,server.masterhost,
server.masterport,
(server.repl_state == REPL_STATE_CONNECTED) ?
"up" : "down",
server.master ?
((int)(server.unixtime-server.master->lastinteraction)) : -1,
server.repl_state == REPL_STATE_TRANSFER,
slave_read_repl_offset,
slave_repl_offset
);
if (server.repl_state == REPL_STATE_TRANSFER) {
double perc = 0;
if (server.repl_transfer_size) {
perc = ((double)server.repl_transfer_read / server.repl_transfer_size) * 100;
}
info = sdscatprintf(info,
"master_sync_total_bytes:%lld\r\n"
"master_sync_read_bytes:%lld\r\n"
"master_sync_left_bytes:%lld\r\n"
"master_sync_perc:%.2f\r\n"
"master_sync_last_io_seconds_ago:%d\r\n",
(long long) server.repl_transfer_size,
(long long) server.repl_transfer_read,
(long long) (server.repl_transfer_size - server.repl_transfer_read),
perc,
(int)(server.unixtime-server.repl_transfer_lastio)
);
}
if (server.repl_state != REPL_STATE_CONNECTED) {
info = sdscatprintf(info,
"master_link_down_since_seconds:%jd\r\n",
server.repl_down_since ?
(intmax_t)(server.unixtime-server.repl_down_since) : -1);
}
info = sdscatprintf(info,
"slave_priority:%d\r\n"
"slave_read_only:%d\r\n"
"replica_announced:%d\r\n",
server.slave_priority,
server.repl_slave_ro,
server.replica_announced);
}
info = sdscatprintf(info,
"connected_slaves:%lu\r\n",
listLength(server.slaves));
/* If min-slaves-to-write is active, write the number of slaves
* currently considered 'good'. */
if (server.repl_min_slaves_to_write &&
server.repl_min_slaves_max_lag) {
info = sdscatprintf(info,
"min_slaves_good_slaves:%d\r\n",
server.repl_good_slaves_count);
}
if (listLength(server.slaves)) {
int slaveid = 0;
listNode *ln;
listIter li;
listRewind(server.slaves,&li);
while((ln = listNext(&li))) {
client *slave = listNodeValue(ln);
char *state = NULL;
char ip[NET_IP_STR_LEN], *slaveip = slave->slave_addr;
int port;
long lag = 0;
if (!slaveip) {
if (connPeerToString(slave->conn,ip,sizeof(ip),&port) == -1)
continue;
slaveip = ip;
}
switch(slave->replstate) {
case SLAVE_STATE_WAIT_BGSAVE_START:
case SLAVE_STATE_WAIT_BGSAVE_END:
state = "wait_bgsave";
break;
case SLAVE_STATE_SEND_BULK:
state = "send_bulk";
break;
case SLAVE_STATE_ONLINE:
state = "online";
break;
}
if (state == NULL) continue;
if (slave->replstate == SLAVE_STATE_ONLINE)
lag = time(NULL) - slave->repl_ack_time;
info = sdscatprintf(info,
"slave%d:ip=%s,port=%d,state=%s,"
"offset=%lld,lag=%ld\r\n",
slaveid,slaveip,slave->slave_listening_port,state,
slave->repl_ack_off, lag);
slaveid++;
}
}
info = sdscatprintf(info,
"master_failover_state:%s\r\n"
"master_replid:%s\r\n"
"master_replid2:%s\r\n"
"master_repl_offset:%lld\r\n"
"second_repl_offset:%lld\r\n"
"repl_backlog_active:%d\r\n"
"repl_backlog_size:%lld\r\n"
"repl_backlog_first_byte_offset:%lld\r\n"
"repl_backlog_histlen:%lld\r\n",
getFailoverStateString(),
server.replid,
server.replid2,
server.master_repl_offset,
server.second_replid_offset,
server.repl_backlog != NULL,
server.repl_backlog_size,
server.repl_backlog_off,
server.repl_backlog_histlen);
}
可以看出获取replicate信息是从server.slaves链表中获取的,每一个连接到master的replicate都会加入到此链表中
这个内容对于哨兵来说很重要,从中能获取此master下有哪些replica,以及这些replica的同步状态等。
2.7 CPU信息
/* CPU */
if (allsections || defsections || !strcasecmp(section,"cpu")) {
if (sections++) info = sdscat(info,"\r\n");
struct rusage self_ru, c_ru;
getrusage(RUSAGE_SELF, &self_ru);
getrusage(RUSAGE_CHILDREN, &c_ru);
info = sdscatprintf(info,
"# CPU\r\n"
"used_cpu_sys:%ld.%06ld\r\n"
"used_cpu_user:%ld.%06ld\r\n"
"used_cpu_sys_children:%ld.%06ld\r\n"
"used_cpu_user_children:%ld.%06ld\r\n",
(long)self_ru.ru_stime.tv_sec, (long)self_ru.ru_stime.tv_usec,
(long)self_ru.ru_utime.tv_sec, (long)self_ru.ru_utime.tv_usec,
(long)c_ru.ru_stime.tv_sec, (long)c_ru.ru_stime.tv_usec,
(long)c_ru.ru_utime.tv_sec, (long)c_ru.ru_utime.tv_usec);
#ifdef RUSAGE_THREAD
struct rusage m_ru;
getrusage(RUSAGE_THREAD, &m_ru);
info = sdscatprintf(info,
"used_cpu_sys_main_thread:%ld.%06ld\r\n"
"used_cpu_user_main_thread:%ld.%06ld\r\n",
(long)m_ru.ru_stime.tv_sec, (long)m_ru.ru_stime.tv_usec,
(long)m_ru.ru_utime.tv_sec, (long)m_ru.ru_utime.tv_usec);
#endif /* RUSAGE_THREAD */
}
2.8 模块信息
/* Modules */
if (allsections || defsections || !strcasecmp(section,"modules")) {
if (sections++) info = sdscat(info,"\r\n");
info = sdscatprintf(info,"# Modules\r\n");
info = genModulesInfoString(info);
}
....
/* Get info from modules.
* if user asked for "everything" or "modules", or a specific section
* that's not found yet. */
if (everything || modules ||
(!allsections && !defsections && sections==0)) {
info = modulesCollectInfo(info,
everything || modules ? NULL: section,
0, /* not a crash report */
sections);
}
2.9 命令统计信息
/* Command statistics */
if (allsections || !strcasecmp(section,"commandstats")) {
if (sections++) info = sdscat(info,"\r\n");
info = sdscatprintf(info, "# Commandstats\r\n");
struct redisCommand *c;
dictEntry *de;
dictIterator *di;
di = dictGetSafeIterator(server.commands);
while((de = dictNext(di)) != NULL) {
char *tmpsafe;
c = (struct redisCommand *) dictGetVal(de);
if (!c->calls && !c->failed_calls && !c->rejected_calls)
continue;
info = sdscatprintf(info,
"cmdstat_%s:calls=%lld,usec=%lld,usec_per_call=%.2f"
",rejected_calls=%lld,failed_calls=%lld\r\n",
getSafeInfoString(c->name, strlen(c->name), &tmpsafe), c->calls, c->microseconds,
(c->calls == 0) ? 0 : ((float)c->microseconds/c->calls),
c->rejected_calls, c->failed_calls);
if (tmpsafe != NULL) zfree(tmpsafe);
}
dictReleaseIterator(di);
}
2.10 错误统计信息
/* Error statistics */
if (allsections || defsections || !strcasecmp(section,"errorstats")) {
if (sections++) info = sdscat(info,"\r\n");
info = sdscat(info, "# Errorstats\r\n");
raxIterator ri;
raxStart(&ri,server.errors);
raxSeek(&ri,"^",NULL,0);
struct redisError *e;
while(raxNext(&ri)) {
char *tmpsafe;
e = (struct redisError *) ri.data;
info = sdscatprintf(info,
"errorstat_%.*s:count=%lld\r\n",
(int)ri.key_len, getSafeInfoString((char *) ri.key, ri.key_len, &tmpsafe), e->count);
if (tmpsafe != NULL) zfree(tmpsafe);
}
raxStop(&ri);
}
2.11 集群信息
/* Cluster */
if (allsections || defsections || !strcasecmp(section,"cluster")) {
if (sections++) info = sdscat(info,"\r\n");
info = sdscatprintf(info,
"# Cluster\r\n"
"cluster_enabled:%d\r\n",
server.cluster_enabled);
}
只是获取是否开启集群
2.12 key信息
/* Key space */
if (allsections || defsections || !strcasecmp(section,"keyspace")) {
if (sections++) info = sdscat(info,"\r\n");
info = sdscatprintf(info, "# Keyspace\r\n");
for (j = 0; j < server.dbnum; j++) {
long long keys, vkeys;
keys = dictSize(server.db[j].dict);
vkeys = dictSize(server.db[j].expires);
if (keys || vkeys) {
info = sdscatprintf(info,
"db%d:keys=%lld,expires=%lld,avg_ttl=%lld\r\n",
j, keys, vkeys, server.db[j].avg_ttl);
}
}
}
只是获取全局的key的个数
从上面的整个处理看,default将所有的数据都会响应给哨兵,而哨兵是否需要真么多信息吗?这样网络上的传输了很多无用的信息,浪费网络带宽
三、哨兵从master响应中提取信息
哨兵根据发送INFO命令时注册的回调处理函数进行处理INFO的响应,从中提取replica信息。
void sentinelInfoReplyCallback(redisAsyncContext *c, void *reply, void *privdata) {
sentinelRedisInstance *ri = privdata;
instanceLink *link = c->data;
redisReply *r;
if (!reply || !link) return;
link->pending_commands--;
r = reply;
if (r->type == REDIS_REPLY_STRING)
sentinelRefreshInstanceInfo(ri,r->str);
}
/* Process the INFO output from masters. */
void sentinelRefreshInstanceInfo(sentinelRedisInstance *ri, const char *info) {
sds *lines;
int numlines, j;
...
/* Process line by line. */
lines = sdssplitlen(info,strlen(info),"\r\n",2,&numlines);
for (j = 0; j < numlines; j++) {
sentinelRedisInstance *slave;
sds l = lines[j];
...
/* old versions: slave0:<ip>,<port>,<state>
* new versions: slave0:ip=127.0.0.1,port=9999,... */
if ((ri->flags & SRI_MASTER) &&
sdslen(l) >= 7 &&
!memcmp(l,"slave",5) && isdigit(l[5]))
{
char *ip, *port, *end;
if (strstr(l,"ip=") == NULL) {
/* Old format. */
ip = strchr(l,':'); if (!ip) continue;
ip++; /* Now ip points to start of ip address. */
port = strchr(ip,','); if (!port) continue;
*port = '\0'; /* nul term for easy access. */
port++; /* Now port points to start of port number. */
end = strchr(port,','); if (!end) continue;
*end = '\0'; /* nul term for easy access. */
} else {
/* New format. */
ip = strstr(l,"ip="); if (!ip) continue;
ip += 3; /* Now ip points to start of ip address. */
port = strstr(l,"port="); if (!port) continue;
port += 5; /* Now port points to start of port number. */
/* Nul term both fields for easy access. */
end = strchr(ip,','); if (end) *end = '\0';
end = strchr(port,','); if (end) *end = '\0';
}
/* Check if we already have this slave into our table,
* otherwise add it. */
if (sentinelRedisInstanceLookupSlave(ri,ip,atoi(port)) == NULL) {
if ((slave = createSentinelRedisInstance(NULL,SRI_SLAVE,ip,
atoi(port), ri->quorum, ri)) != NULL)
{
sentinelEvent(LL_NOTICE,"+slave",slave,"%@");
sentinelFlushConfig();
}
}
}
...
}
ri->info_refresh = mstime();
sdsfreesplitres(lines,numlines);
...
}
- 将响应根据\r\n拆分成一行一行的
- 逐行解析
- 当解析到salve时
- 判断此slave(ip+port)是否在当前master的slaves字典中是否存在
- 不存在,则建立replicate对象,并且放入到master->slaves字典中
createSentinelRedisInstance(NULL,SRI_SLAVE,ip,
atoi(port), ri->quorum, ri))
sentinelRedisInstance *createSentinelRedisInstance(char *name, int flags, char *hostname, int port, int quorum, sentinelRedisInstance *master) {
sentinelRedisInstance *ri;
...
dict *table = NULL;
...
else if (flags & SRI_SLAVE) table = master->slaves;
...
/* Create the instance object. */
ri = zmalloc(sizeof(*ri));
...
/* Add into the right table. */
dictAdd(table, ri->name, ri);
return ri;
}
四、哨兵主动连接replicate
serverCron() ->
sentinelTimer() ->
sentinelHandleDictOfRedisInstances()
void sentinelHandleDictOfRedisInstances(dict *instances) {
dictIterator *di;
dictEntry *de;
...
/* There are a number of things we need to perform against every master. */
di = dictGetIterator(instances);
while((de = dictNext(di)) != NULL) {
sentinelRedisInstance *ri = dictGetVal(de);
sentinelHandleRedisInstance(ri);
if (ri->flags & SRI_MASTER) {
sentinelHandleDictOfRedisInstances(ri->slaves);
...
}
}
...
dictReleaseIterator(di);
}
可以看出,当ri是master时,将递归调用sentinelHandleDictOfRedisInstances,并且参数传递的是ri->slaves,这样就会对slaves遍历,进行和master相同的操作,进行连接,发送PING命令。

1737

被折叠的 条评论
为什么被折叠?



