在一台物理机上启动6个Redis实例,组成3主3从集群,
端口号依次为:1379 ~ 1384,端口号1379、1380和1384三个为master,端口1379的进程ID为17620。
现将进程17620暂停(发送SIGSTOP信号),观察集群发现故障时长,和主从切换时长。
暂停进程17620(端口1379),然后每秒查看一次集群状态
$ kill -19 17620;for ((i=0;i<10000000;++i)) do date +'[%H:%M:%S]';redis-cli -c -p 1380 cluster nodes;echo "";sleep 1; done
[14:23:51]
f03b1008988acbb0f69d96252decda9adf747be9 192.168.31.98:1384 master - 0 1525847030599 137 connected 1987 10923-16383
c1a9d1d23438241803ec97fbd765737df80f402a 192.168.31.98:1381 slave f03b1008988acbb0f69d96252decda9adf747be9 0 1525847031200 137 connected
4e932f2a3d80de29798660c5ea62e473e63a6630 192.168.31.98:1383 slave f6080015129eada3261925cc1b466f1824263358 0 1525847031100 134 connected
689f7c1ae71ea294c4ad7c5d1b32ae4e78e27915 192.168.31.98:1382 slave fa7bbbf7d48389409ce05d303272078c3a6fd44f 0 1525847030097 132 connected
fa7bbbf7d48389409ce05d303272078c3a6fd44f 192.168.31.98:1379 master - 0 1525847030799 132 connected 0-1986 1988-5457
f6080015129eada3261925cc1b466f1824263358 192.168.31.98:1380 myself,master - 0 0 134 connected 5458-10922
[14:23:52] 第1秒故障还未被发现
f03b1008988acbb0f69d96252decda9adf747be9 192.168.31.98:1384 master - 0 1525847031602 137 connected 1987 10923-16383
c1a9d1d23438241803ec97fbd765737df80f402a 192.168.31.98:1381 slave f03b1008988acbb0f69d96252decda9adf747be9 0 1525847031200 137 connected
4e932f2a3d80de29798660c5ea62e473e63a6630 192.168.31.98:1383 slave f6080015129eada3261925cc1b466f1824263358 0 1525847031100 134 connected
689f7c1ae71ea294c4ad7c5d1b32ae4e78e27915 192.168.31.98:1382 slave fa7bbbf7d48389409ce05d303272078c3a6fd44f 0 1525847031602 132 connected
fa7bbbf7d48389409ce05d303272078c3a6fd44f 192.168.31.98:1379 master - 1525847032302 1525847030799 132 connected 0-1986 1988-5457
f6080015129eada3261925cc1b466f1824263358 192.168.31.98:1380 myself,master - 0 0 134 connected 5458-10922
[14:23:53] 第2秒故障还未被发现
f03b1008988acbb0f69d96252decda9adf747be9 192.168.31.98:1384 master - 0 1525847033103 137 connected 1987 10923-16383
c1a9d1d23438241803ec97fbd765737df80f402a 192.168.31.98:1381 slave f03b1008988acbb0f69d96252decda9adf747be9 0 1525847032703 137 connected
4e932f2a3d80de29798660c5ea62e473e63a6630 192.168.31.98:1383 slave f6080015129eada3261925cc1b466f1824263358 0 1525847032602 134 connected
689f7c1ae71ea294c4ad7c5d1b32ae4e78e27915 192.168.31.98:1382 slave fa7bbbf7d48389409ce05d303272078c3a6fd44f 0 1525847033103 132 connected
fa7bbbf7d48389409ce05d303272078c3a6fd44f 192.168.31.98:1379 master - 1525847032302 1525847030799 132 connected 0-1986 1988-5457
f6080015129eada3261925cc1b466f1824263358 192.168.31.98:1380 myself,master - 0 0 134 connected 5458-10922
[14:23:54] 第3秒故障还未被发现
f03b1008988acbb0f69d96252decda9adf747be9 192.168.31.98:1384 master - 0 1525847033604 137 connected 1987 10923-16383
c1a9d1d23438241803ec97fbd765737df80f402a 192.168.31.98:1381 slave f03b1008988acbb0f69d96252decda9adf747be9 0 1525847034205 137 connected
4e932f2a3d80de29798660c5ea62e473e63a6630 192.168.31.98:1383 slave f6080015129eada3261925cc1b466f1824263358 0 1525847034106 134 connected
689f7c1ae71ea294c4ad7c5d1b32ae4e78e27915 192.168.31.98:1382 slave fa7bbbf7d48389409ce05d303272078c3a6fd44f 0 1525847033103 132 connected
fa7bbbf7d48389409ce05d303272078c3a6fd44f 192.168.31.98:1379 master - 1525847032302 1525847030799 132 connected 0-1986 1988-5457
f6080015129eada3261925cc1b466f1824263358 192.168.31.98:1380 myself,master - 0 0 134 connected 5458-10922
[14:23:55] 第4秒发现故障,但未选举出新的master
f03b1008988acbb0f69d96252decda9adf747be9 192.168.31.98:1384 master - 0 1525847034606 137 connected 1987 10923-16383
c1a9d1d23438241803ec97fbd765737df80f402a 192.168.31.98:1381 slave f03b1008988acbb0f69d96252decda9adf747be9 0 1525847034205 137 connected
4e932f2a3d80de29798660c5ea62e473e63a6630 192.168.31.98:1383 slave f6080015129eada3261925cc1b466f1824263358 0 1525847034106 134 connected
689f7c1ae71ea294c4ad7c5d1b32ae4e78e27915 192.168.31.98:1382 slave fa7bbbf7d48389409ce05d303272078c3a6fd44f 0 1525847034606 132 connected
fa7bbbf7d48389409ce05d303272078c3a6fd44f 192.168.31.98:1379 master,fail? - 1525847032302 1525847030799 132 connected 0-1986 1988-5457
f6080015129eada3261925cc1b466f1824263358 192.168.31.98:1380 myself,master - 0 0 134 connected 5458-10922
[14:23:56] 第5秒,仍未选举出新的master
f03b1008988acbb0f69d96252decda9adf747be9 192.168.31.98:1384 master - 0 1525847036207 137 connected 1987 10923-16383
c1a9d1d23438241803ec97fbd765737df80f402a 192.168.31.98:1381 slave f03b1008988acbb0f69d96252decda9adf747be9 0 1525847035706 137 connected
4e932f2a3d80de29798660c5ea62e473e63a6630 192.168.31.98:1383 slave f6080015129eada3261925cc1b466f1824263358 0 1525847035606 134 connected
689f7c1ae71ea294c4ad7c5d1b32ae4e78e27915 192.168.31.98:1382 slave fa7bbbf7d48389409ce05d303272078c3a6fd44f 0 1525847036206 132 connected
fa7bbbf7d48389409ce05d303272078c3a6fd44f 192.168.31.98:1379 master,fail - 1525847032302 1525847030799 132 connected 0-1986 1988-5457
f6080015129eada3261925cc1b466f1824263358 192.168.31.98:1380 myself,master - 0 0 134 connected 5458-10922
[14:23:57] 第6秒,选择出新的master
f03b1008988acbb0f69d96252decda9adf747be9 192.168.31.98:1384 master - 0 1525847036207 137 connected 1987 10923-16383
c1a9d1d23438241803ec97fbd765737df80f402a 192.168.31.98:1381 slave f03b1008988acbb0f69d96252decda9adf747be9 0 1525847037212 137 connected
4e932f2a3d80de29798660c5ea62e473e63a6630 192.168.31.98:1383 slave f6080015129eada3261925cc1b466f1824263358 0 1525847036606 134 connected
689f7c1ae71ea294c4ad7c5d1b32ae4e78e27915 192.168.31.98:1382 master - 0 1525847036206 138 connected 0-1986 1988-5457
fa7bbbf7d48389409ce05d303272078c3a6fd44f 192.168.31.98:1379 master,fail - 1525847032302 1525847030799 132 connected
f6080015129eada3261925cc1b466f1824263358 192.168.31.98:1380 myself,master - 0 0 134 connected 5458-10922
与时间有关的配置项:
repl-ping-slave-period 1
repl-timeout 10
cluster-node-timeout 3000