##重建集群 1、查看当前集群节点状态:
[root@centos4 ~]# redis-cli -c -p 7111
127.0.0.1:7111> cluster info
cluster_state:ok
cluster_slots_assigned:16384
cluster_slots_ok:16384
cluster_slots_pfail:0
cluster_slots_fail:0
cluster_known_nodes:6
cluster_size:3
cluster_current_epoch:7
cluster_my_epoch:6
cluster_stats_messages_sent:273044
cluster_stats_messages_received:272409
127.0.0.1:7111> cluster nodes
967f4154bc4ced3c0c76a74c280f2779f2f70fb2 10.1.200.118:7114 master - 0 1469706909199 7 connected 0-5460
778b0cd6414d92ad4aec93754445648fc57963ca 10.1.200.120:7115 master - 0 1469706908184 2 connected 5461-10922
7226e609e47616e0d3cff2f80831ac6d811e0027 10.1.200.125:7116 slave 967f4154bc4ced3c0c76a74c280f2779f2f70fb2 0 1469706907169 7 connected
e1ced4f32594b0d2b82b268059e2ce89abebfde5 10.1.200.78:7112 slave 778b0cd6414d92ad4aec93754445648fc57963ca 0 1469706910219 5 connected
c812379709120ab74b20bd76dd2b9fcaf6c62209 10.1.200.77:7111 myself,slave bb79ff3fdf24a99df6ebb93d17f026bb81eb221b 0 0 4 connected
bb79ff3fdf24a99df6ebb93d17f026bb81eb221b 10.1.200.87:7113 master - 0 1469706911244 6 connected 10923-16383
注:可以看到此时
Master
节点为:10.1.200.118:7114
,10.1.200.120:7115
,10.1.200.87:7113
;Slave
节点为:10.1.200.125:7116
,10.1.200.78:7112
,10.1.200.77:7111
2、关闭集群的各节点:
10.1.200.77
服务器:
[root@centos4 ~]# ps -ef|grep redis
root 2442 1 0 Jul27 ? 00:02:31 /usr/local/redis/bin/redis-server *:7111 [cluster]
root 16469 13753 0 19:59 pts/0 00:00:00 grep redis
[root@centos4 ~]# kill 2442
[root@centos4 ~]# ps -ef|grep redis
root 16472 13753 0 19:59 pts/0 00:00:00 grep redis
10.1.200.78
服务器:
[root@centos4 ~]# ps -ef|grep redis
root 2436 1 0 Jul27 ? 00:02:29 /usr/local/redis/bin/redis-server *:7112 [cluster]
root 16462 13745 0 19:59 pts/0 00:00:00 grep redis
[root@centos4 ~]# kill 2436
[root@centos4 ~]# ps -ef|grep redis
root 16470 13745 0 20:00 pts/0 00:00:00 grep redis
10.1.200.87
服务器:
[root@centos4 ~]# ps -ef|grep redis
root 2427 1 0 Jul27 ? 00:02:26 /usr/local/redis/bin/redis-server *:7113 [cluster]
root 16874 14076 0 20:00 pts/0 00:00:00 grep redis
[root@centos4 ~]# kill 2427
[root@centos4 ~]# ps -ef|grep redis
root 16876 14076 0 20:00 pts/0 00:00:00 grep redis
10.1.200.118
服务器:
[root@centos4 ~]# ps -ef|grep redis
root 2425 1 0 Jul27 ? 00:02:28 /usr/local/redis/bin/redis-server *:7114 [cluster]
root 16491 13748 0 20:01 pts/0 00:00:00 grep redis
[root@centos4 ~]# kill 2425
[root@centos4 ~]# ps -ef|grep redis
root 16495 13748 0 20:01 pts/0 00:00:00 grep redis
10.1.200.120
服务器:
[root@centos4 ~]# ps -ef|grep redis
root 2428 1 0 Jul27 ? 00:02:27 /usr/local/redis/bin/redis-server *:7115 [cluster]
root 16530 13785 0 20:01 pts/0 00:00:00 grep redis
[root@centos4 ~]# kill 2428
[root@centos4 ~]# ps -ef|grep redis
root 16533 13785 0 20:02 pts/0 00:00:00 grep redis
10.1.200.125
服务器:
[root@centos4 ~]# ps -ef|grep redis
root 2445 1 0 Jul27 ? 00:02:31 /usr/local/redis/bin/redis-server *:7116 [cluster]
root 16504 13755 0 20:02 pts/0 00:00:00 grep redis
[root@centos4 ~]# kill 2445
[root@centos4 ~]# ps -ef|grep redis
root 16507 13755 0 20:02 pts/0 00:00:00 grep redis
3、删除各节点数据目录下的 nodes.conf
、appendonly.aof
、dump.rdb
:
10.1.200.77
服务器:
[root@centos4 ~]# cd /usr/local/redis/cluster/7111/
[root@centos4 7111]# ls -all
总用量 80
drwxr-xr-x. 2 root root 4096 7月 28 19:59 .
drwxr-xr-x. 3 root root 4096 7月 26 15:59 ..
-rw-r--r--. 1 root root 15880 7月 28 12:52 appendonly.aof
-rw-r--r--. 1 root root 8080 7月 28 19:59 dump.rdb
-rw-r--r--. 1 root root 760 7月 28 09:03 nodes.conf
-rw-r--r--. 1 root root 42387 7月 23 13:20 redis-7111.conf
[root@centos4 7111]# rm -rf appendonly.aof dump.rdb nodes.conf
10.1.200.78
服务器:
[root@centos4 ~]# cd /usr/local/redis/cluster/7112/
[root@centos4 7112]# ls -all
总用量 80
drwxr-xr-x. 2 root root 4096 7月 28 20:00 .
drwxr-xr-x. 3 root root 4096 7月 26 16:02 ..
-rw-r--r--. 1 root root 15330 7月 28 12:52 appendonly.aof
-rw-r--r--. 1 root root 7806 7月 28 20:00 dump.rdb
-rw-r--r--. 1 root root 768 7月 28 19:59 nodes.conf
-rw-r--r--. 1 root root 42387 7月 23 13:21 redis-7112.conf
[root@centos4 7112]# rm -rf appendonly.aof dump.rdb nodes.conf
10.1.200.87
服务器:
[root@centos4 ~]# cd /usr/local/redis/cluster/7113/
[root@centos4 7113]# ls -all
总用量 80
drwxr-xr-x. 2 root root 4096 7月 28 20:00 .
drwxr-xr-x. 3 root root 4096 7月 26 16:05 ..
-rw-r--r--. 1 root root 15880 7月 28 12:52 appendonly.aof
-rw-r--r--. 1 root root 8080 7月 28 20:00 dump.rdb
-rw-r--r--. 1 root root 788 7月 28 20:00 nodes.conf
-rw-r--r--. 1 root root 42387 7月 23 13:21 redis-7113.conf
[root@centos4 7113]# rm -rf appendonly.aof dump.rdb nodes.conf
10.1.200.118
服务器:
[root@centos4 ~]# cd /usr/local/redis/cluster/7114/
[root@centos4 7114]# ls -all
总用量 80
drwxr-xr-x. 2 root root 4096 7月 28 20:01 .
drwxr-xr-x. 3 root root 4096 7月 26 16:11 ..
-rw-r--r--. 1 root root 15703 7月 28 12:52 appendonly.aof
-rw-r--r--. 1 root root 7973 7月 28 20:01 dump.rdb
-rw-r--r--. 1 root root 808 7月 28 20:00 nodes.conf
-rw-r--r--. 1 root root 42387 7月 23 13:22 redis-7114.conf
[root@centos4 7114]# rm -rf appendonly.aof dump.rdb nodes.conf
10.1.200.120
服务器:
[root@centos4 ~]# cd /usr/local/redis/cluster/7115/
[root@centos4 7115]# ls -all
总用量 80
drwxr-xr-x. 2 root root 4096 7月 28 20:02 .
drwxr-xr-x. 3 root root 4096 7月 26 16:14 ..
-rw-r--r--. 1 root root 15330 7月 28 12:52 appendonly.aof
-rw-r--r--. 1 root root 7806 7月 28 20:02 dump.rdb
-rw-r--r--. 1 root root 808 7月 28 20:00 nodes.conf
-rw-r--r--. 1 root root 42387 7月 23 13:22 redis-7115.conf
[root@centos4 7115]# rm -rf appendonly.aof dump.rdb nodes.conf
10.1.200.125
服务器:
[root@centos4 ~]# cd /usr/local/redis/cluster/7116/
[root@centos4 7116]# ls -all
总用量 80
drwxr-xr-x. 2 root root 4096 7月 28 20:02 .
drwxr-xr-x. 3 root root 4096 7月 26 16:17 ..
-rw-r--r--. 1 root root 15703 7月 28 12:52 appendonly.aof
-rw-r--r--. 1 root root 7973 7月 28 20:02 dump.rdb
-rw-r--r--. 1 root root 808 7月 28 20:00 nodes.conf
-rw-r--r--. 1 root root 42387 7月 23 13:23 redis-7116.conf
[root@centos4 7116]# rm -rf appendonly.aof dump.rdb nodes.conf
4、重新启用所有的节点:
10.1.200.77
服务器:
/usr/local/redis/bin/redis-server /usr/local/redis/cluster/7111/redis-7111.conf
10.1.200.78
服务器:
/usr/local/redis/bin/redis-server /usr/local/redis/cluster/7112/redis-7112.conf
10.1.200.87
服务器:
/usr/local/redis/bin/redis-server /usr/local/redis/cluster/7113/redis-7113.conf
10.1.200.118
服务器:
/usr/local/redis/bin/redis-server /usr/local/redis/cluster/7114/redis-7114.conf
10.1.200.120
服务器:
/usr/local/redis/bin/redis-server /usr/local/redis/cluster/7115/redis-7115.conf
10.1.200.125
服务器:
/usr/local/redis/bin/redis-server /usr/local/redis/cluster/7116/redis-7116.conf
5、启动之后用PS
命令查看实例启动情况:
ps -ef | grep redis
10.1.200.77
服务器:
[root@centos4 7111]# ps -ef | grep redis
root 16596 1 0 20:17 ? 00:00:00 /usr/local/redis/bin/redis-server *:7111 [cluster]
root 16603 13753 0 20:17 pts/0 00:00:00 grep redis
10.1.200.78
服务器:
[root@centos4 7112]# ps -ef | grep redis
root 16590 1 1 20:17 ? 00:00:00 /usr/local/redis/bin/redis-server *:7112 [cluster]
root 16595 13745 0 20:17 pts/0 00:00:00 grep redis
10.1.200.87
服务器:
[root@centos4 7113]# ps -ef | grep redis
root 16997 1 1 20:18 ? 00:00:00 /usr/local/redis/bin/redis-server *:7113 [cluster]
root 17001 14076 0 20:18 pts/0 00:00:00 grep redis
10.1.200.118
服务器:
[root@centos4 7114]# ps -ef | grep redis
root 16602 1 0 20:19 ? 00:00:00 /usr/local/redis/bin/redis-server *:7114 [cluster]
root 16606 13748 0 20:19 pts/0 00:00:00 grep redis
10.1.200.120
服务器:
[root@centos4 7115]# ps -ef | grep redis
root 16639 1 1 20:19 ? 00:00:00 /usr/local/redis/bin/redis-server *:7115 [cluster]
root 16643 13785 0 20:19 pts/0 00:00:00 grep redis
10.1.200.125
服务器:
[root@centos4 7116]# ps -ef | grep redis
root 16618 1 2 20:20 ? 00:00:00 /usr/local/redis/bin/redis-server *:7116 [cluster]
root 16622 13755 0 20:20 pts/0 00:00:00 grep redis
6、执行集群创建命令(只需要在其中一个节点上执行一次则可):
[root@centos4 7111]# redis-trib create --replicas 1 10.1.200.118:7114 10.1.200.120:7115 10.1.200.125:7116 10.1.200.77:7111 10.1.200.78:7112 10.1.200.87:7113
>>> Creating cluster
>>> Performing hash slots allocation on 6 nodes...
Using 3 masters:
10.1.200.78:7112
10.1.200.77:7111
10.1.200.118:7114
Adding replica 10.1.200.87:7113 to 10.1.200.78:7112
Adding replica 10.1.200.125:7116 to 10.1.200.77:7111
Adding replica 10.1.200.120:7115 to 10.1.200.118:7114
M: 087b254c35a87272233346a03753a3fe568f8ac7 10.1.200.118:7114
slots:10923-16383 (5461 slots) master
S: 2ffb5712581c9bbfc507a9c8b5c9f22b818e1fc0 10.1.200.120:7115
replicates 087b254c35a87272233346a03753a3fe568f8ac7
S: 7f06a60a8047b8f29808b46c942fd14204a3f475 10.1.200.125:7116
replicates c767fa06f0a2153a74851f584c7c01389a373d1f
M: c767fa06f0a2153a74851f584c7c01389a373d1f 10.1.200.77:7111
slots:5461-10922 (5462 slots) master
M: f47e75a5c55f88254fca9c6158594b0507f97ff3 10.1.200.78:7112
slots:0-5460 (5461 slots) master
S: cb1fbeb9075bf5510b223625c629affe08bd4495 10.1.200.87:7113
replicates f47e75a5c55f88254fca9c6158594b0507f97ff3
Can I set the above configuration? (type 'yes' to accept): yes
>>> Nodes configuration updated
>>> Assign a different config epoch to each node
>>> Sending CLUSTER MEET messages to join the cluster
Waiting for the cluster to join......
>>> Performing Cluster Check (using node 10.1.200.118:7114)
M: 087b254c35a87272233346a03753a3fe568f8ac7 10.1.200.118:7114
slots:10923-16383 (5461 slots) master
M: 2ffb5712581c9bbfc507a9c8b5c9f22b818e1fc0 10.1.200.120:7115
slots: (0 slots) master
replicates 087b254c35a87272233346a03753a3fe568f8ac7
M: 7f06a60a8047b8f29808b46c942fd14204a3f475 10.1.200.125:7116
slots: (0 slots) master
replicates c767fa06f0a2153a74851f584c7c01389a373d1f
M: c767fa06f0a2153a74851f584c7c01389a373d1f 10.1.200.77:7111
slots:5461-10922 (5462 slots) master
M: f47e75a5c55f88254fca9c6158594b0507f97ff3 10.1.200.78:7112
slots:0-5460 (5461 slots) master
M: cb1fbeb9075bf5510b223625c629affe08bd4495 10.1.200.87:7113
slots: (0 slots) master
replicates f47e75a5c55f88254fca9c6158594b0507f97ff3
[OK] All nodes agree about slots configuration.
>>> Check for open slots...
>>> Check slots coverage...
[OK] All 16384 slots covered.
7、集群创建成功之后,查看当前集群各节点的状态:
[root@centos4 7111]# redis-cli -c -p 7111
127.0.0.1:7111> CLUSTER NODES
2ffb5712581c9bbfc507a9c8b5c9f22b818e1fc0 10.1.200.120:7115 slave 087b254c35a87272233346a03753a3fe568f8ac7 0 1469708730499 2 connected
7f06a60a8047b8f29808b46c942fd14204a3f475 10.1.200.125:7116 slave c767fa06f0a2153a74851f584c7c01389a373d1f 0 1469708727437 4 connected
c767fa06f0a2153a74851f584c7c01389a373d1f 10.1.200.77:7111 myself,master - 0 0 4 connected 5461-10922
cb1fbeb9075bf5510b223625c629affe08bd4495 10.1.200.87:7113 slave f47e75a5c55f88254fca9c6158594b0507f97ff3 0 1469708725392 6 connected
f47e75a5c55f88254fca9c6158594b0507f97ff3 10.1.200.78:7112 master - 0 1469708728457 5 connected 0-5460
087b254c35a87272233346a03753a3fe568f8ac7 10.1.200.118:7114 master - 0 1469708729477 1 connected 10923-16383
注:可以看到此时
Master
节点为:10.1.200.77:7111
,10.1.200.78:7112
,10.1.200.118:7114
;Slave
节点为:10.1.200.120:7115
,10.1.200.125:7116
,10.1.200.87:7113
8、重建集群之后集群中的数据被清空:
127.0.0.1:7111> KEYS *
(empty list or set)
##查看各个节点KEY分布 1、使用demo
应用向集群写入1000
个键值数据。 2、登录各个节点,使用KEY *
查看各节点的所有KEY
:
10.1.200.77
服务器:
[root@centos4 7111]# redis-cli -c -p 7111
127.0.0.1:7111> KEYS dj*
1) "dj11"
2) "dj7"
3) "dj3"
4) "dj20"
5) "dj19"
6) "dj15"
7) "dj4"
10.1.200.78
服务器:
[root@centos4 7112]# redis-cli -c -p 7112
127.0.0.1:7112> KEYS dj*
1) "dj14"
2) "dj5"
3) "dj18"
4) "dj10"
5) "dj1"
6) "dj9"
7) "dj8"
10.1.200.87
服务器:
[root@centos4 7113]# redis-cli -c -p 7113
127.0.0.1:7113> KEYS dj*
1) "dj14"
2) "dj18"
3) "dj10"
4) "dj9"
5) "dj8"
6) "dj5"
7) "dj1"
10.1.200.118
服务器:
[root@centos4 7114]# redis-cli -c -p 7114
127.0.0.1:7114> KEYS dj*
1) "dj6"
2) "dj2"
3) "dj12"
4) "dj17"
5) "dj13"
6) "dj16"
10.1.200.120
服务器:
[root@centos4 7115]# redis-cli -c -p 7115
127.0.0.1:7115> KEYS dj*
1) "dj6"
2) "dj2"
3) "dj16"
4) "dj13"
5) "dj12"
6) "dj17"
10.1.200.125
服务器:
[root@centos4 7116]# redis-cli -c -p 7116
127.0.0.1:7116> KEYS dj*
1) "dj3"
2) "dj4"
3) "dj15"
4) "dj11"
5) "dj7"
6) "dj19"
7) "dj20"
从KEY的分布可以看出来,
10.1.200.77:7111
和10.1.200.125:7116
;10.1.200.78:7112
和10.1.200.87:7113
;10.1.200.118:7114
和10.1.200.120:7115
的节点是一样的,他们是Master
和Slave
对。
##模拟集群节点宕机,实现故障转移 1、运行demo
应用,模拟业务不停连接Redis
集群获取数据的场景。 2、查看Redis
集群当前状态,用于接下来做节点状态变化对比。
[root@centos4 7111]# redis-cli -c -p 7111
127.0.0.1:7111> CLUSTER NODES
2ffb5712581c9bbfc507a9c8b5c9f22b818e1fc0 10.1.200.120:7115 slave 087b254c35a87272233346a03753a3fe568f8ac7 0 1469710790605 2 connected
7f06a60a8047b8f29808b46c942fd14204a3f475 10.1.200.125:7116 slave c767fa06f0a2153a74851f584c7c01389a373d1f 0 1469710792645 4 connected
c767fa06f0a2153a74851f584c7c01389a373d1f 10.1.200.77:7111 myself,master - 0 0 4 connected 5461-10922
cb1fbeb9075bf5510b223625c629affe08bd4495 10.1.200.87:7113 slave f47e75a5c55f88254fca9c6158594b0507f97ff3 0 1469710788973 6 connected
f47e75a5c55f88254fca9c6158594b0507f97ff3 10.1.200.78:7112 master - 0 1469710791625 5 connected 0-5460
087b254c35a87272233346a03753a3fe568f8ac7 10.1.200.118:7114 master - 0 1469710793668 1 connected 10923-16383
3、关闭其中一个master
节点:10.1.200.78:7112
:
[root@centos4 7112]# ps -ef|grep redis
root 16590 1 0 20:17 ? 00:00:03 /usr/local/redis/bin/redis-server *:7112 [cluster]
root 16883 13745 0 21:02 pts/0 00:00:00 grep redis
[root@centos4 7112]# kill 16590
[root@centos4 7112]# ps -ef|grep redis
root 16887 13745 0 21:02 pts/0 00:00:00 grep redis
4、观察该Master
节点和对应的Slave
节点的状态变化。
127.0.0.1:7111> CLUSTER NODES
2ffb5712581c9bbfc507a9c8b5c9f22b818e1fc0 10.1.200.120:7115 slave 087b254c35a87272233346a03753a3fe568f8ac7 0 1469711001934 2 connected
7f06a60a8047b8f29808b46c942fd14204a3f475 10.1.200.125:7116 slave c767fa06f0a2153a74851f584c7c01389a373d1f 0 1469710999889 4 connected
c767fa06f0a2153a74851f584c7c01389a373d1f 10.1.200.77:7111 myself,master - 0 0 4 connected 5461-10922
cb1fbeb9075bf5510b223625c629affe08bd4495 10.1.200.87:7113 master - 0 1469710998864 7 connected 0-5460
f47e75a5c55f88254fca9c6158594b0507f97ff3 10.1.200.78:7112 master,fail - 1469710946085 1469710939743 5 disconnected
087b254c35a87272233346a03753a3fe568f8ac7 10.1.200.118:7114 master - 0 1469711000912 1 connected 10923-16383
1、可以从此时的节点状态中看出来原先的
10.1.200.78:7112
这个Master
节点已经fail
,原来10.1.200.78:7112
节点关联的10.1.200.87:7113
节点从Slave
转变为Mater
。 2、节点状态fail?
表示正在判断是否失败。 3、节点状态fail
表示节点失败,对应的Slave
节点提升为Master
。 4、再看demo
程序,依然正常,说明Slave
替换Master
成功,集群正常。
5、恢复fail
节点 10.1.200.78:7112
,再查看集群状态。
[root@centos4 7112]# /usr/local/redis/bin/redis-server /usr/local/redis/cluster/7112/redis-7112.conf
[root@centos4 7112]# ps -ef|grep redis
root 16943 1 0 21:11 ? 00:00:00 /usr/local/redis/bin/redis-server *:7112 [cluster]
root 16949 13745 0 21:11 pts/0 00:00:00 grep redis
此时集群的状态为:
127.0.0.1:7111> CLUSTER NODES
2ffb5712581c9bbfc507a9c8b5c9f22b818e1fc0 10.1.200.120:7115 slave 087b254c35a87272233346a03753a3fe568f8ac7 0 1469711508378 2 connected
7f06a60a8047b8f29808b46c942fd14204a3f475 10.1.200.125:7116 slave c767fa06f0a2153a74851f584c7c01389a373d1f 0 1469711503278 4 connected
c767fa06f0a2153a74851f584c7c01389a373d1f 10.1.200.77:7111 myself,master - 0 0 4 connected 5461-10922
cb1fbeb9075bf5510b223625c629affe08bd4495 10.1.200.87:7113 master - 0 1469711505315 7 connected 0-5460
f47e75a5c55f88254fca9c6158594b0507f97ff3 10.1.200.78:7112 slave cb1fbeb9075bf5510b223625c629affe08bd4495 0 1469711507358 7 connected
087b254c35a87272233346a03753a3fe568f8ac7 10.1.200.118:7114 master - 0 1469711506338 1 connected 10923-16383
此时
10.1.200.78:7112
节点又变成了10.1.200.87:7113
节点的Slave
6、观察集群节点切换过程中,对客户端的影响。 JedisCluster
链接Redis
集群操作时遇到的几个常见异常:
1)重定向次数过多
redis.clients.jedis.exceptions.JedisClusterMaxRedirectionsException: Too many Cluster redirections?
解决方法: 初始化JedisCluster
时,设定JedisCluster
的maxRedirections
//集群各节点集合,超时时间(默认2秒),最多重定向次数(默认5),链接池
JedisCluster jedisCluster = new JedisCluster(jedisClusterNodes, 2000, 100, config);
2)集群不可以用
redis.clients.jedis.exceptions.JedisClusterException: CLUSTERDOWN The cluster is down
原因:集群节点状态切换过程中会出现临时闪断,客户端重试操作则可。
3)链接超时
redis.clients.jedis.exceptions.JedisConnectionException: java.net.SocketTimeoutException: Read timed out
解决方法: 初始化JedisCluster
时,设定JedisCluster
的timeout
(默认为2秒);也可以修改源码中的默认时间。
7、总结:
1)优点: 在Master
节点下线后,Slave
节点会自动提升为Master
节点,保存集群持续提供服务; fail
节点恢复后,会自动添加到集群中,变成Slave
节点。
2)缺点: 由于redis
的复制使用异步机制,在自动故障转移的过程中,集群可能会丢失写命令。然而 redis
几乎是同时执行(将命令恢复发送给客户端,以及将命令复制到Slave
节点)这两个操作,所以实际中,命令丢失的窗口非常小。