1
2
3
4
5
|
# ssh-keygen
# ssh-copy-id -i /root/.ssh/id_rsa.pub root@192.168.1.12
# ssh-copy-id -i /root/.ssh/id_rsa.pub root@192.168.1.225
# ssh-copy-id -i /root/.ssh/id_rsa.pub root@192.168.1.226
# ssh-copy-id -i /root/.ssh/id_rsa.pub root@192.168.1.227
|
1
2
3
4
5
6
7
8
9
10
11
12
|
Rpm包下载地址,需要翻墙:
https://code.google.com/p/mysql-master-ha/downloads/detail?name=mha4mysql-manager-0.53-0.el6.noarch.rpm
https://code.google.com/p/mysql-master-ha/downloads/detail?name=mha4mysql-node-0.52-0.noarch.rpm
管理节点:
# yum -y localinstall \
mha4mysql-node-0.52-0.noarch.rpm \
mha4mysql-manager-0.53-0.el6.noarch.rpm
数据库节点:
# yum -y localinstall mha4mysql-node-0.52-0.noarch.rpm
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
|
管理节点:
# mkdir -p /usr/local/mha
# mkdir -p /etc/mha
# cat /etc/mha/mha.conf
[server default]
user=root
password=123456
manager_workdir=/usr/local/mha
manager_log=/usr/local/mha/manager.log
remote_workdir=/usr/local/mha
ssh_user=root
repl_user=replication
repl_password=123456
ping_interval=1
secondary_check_script= masterha_secondary_check -s 192.168.1.226 -s 192.168.1.227
master_ip_failover_script=/usr/local/scripts/master_ip_failover
[server1]
hostname=192.168.1.225
ssh_port=22
master_binlog_dir=/mydata
candidate_master=1
[server2]
hostname=192.168.1.226
ssh_port=22
master_binlog_dir=/mydata
candidate_master=1
[server3]
hostname=192.168.1.227
ssh_port=22
master_binlog_dir=/mydata
no_master=1
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
|
#!/usr/bin/env perl
use strict;
use warnings FATAL => 'all';
use Getopt::Long;
my (
$command, $ssh_user, $orig_master_host, $orig_master_ip,
$orig_master_port, $new_master_host, $new_master_ip, $new_master_port
);
my $vip = '192.168.1.231'; # Virtual IP
my $gateway = '192.168.1.1';#Gateway IP
my $interface = 'eth0';
my $key = "1";
my $ssh_start_vip = "/sbin/ifconfig $interface:$key $vip;/sbin/arping -I $interface -c 3 -s $vip $gateway >/dev/null 2>&1";
my $ssh_stop_vip = "/sbin/ifconfig $interface:$key down";
GetOptions(
'command=s' => \$command,
'ssh_user=s' => \$ssh_user,
'orig_master_host=s' => \$orig_master_host,
'orig_master_ip=s' => \$orig_master_ip,
'orig_master_port=i' => \$orig_master_port,
'new_master_host=s' => \$new_master_host,
'new_master_ip=s' => \$new_master_ip,
'new_master_port=i' => \$new_master_port,
);
exit &main();
sub main {
print "\n\nIN SCRIPT TEST====$ssh_stop_vip==$ssh_start_vip===\n\n";
if ( $command eq "stop" || $command eq "stopssh" ) {
# $orig_master_host, $orig_master_ip, $orig_master_port are passed.
# If you manage master ip address at global catalog database,
# invalidate orig_master_ip here.
my $exit_code = 1;
eval {
print "Disabling the VIP on old master: $orig_master_host \n";
&stop_vip();
$exit_code = 0;
};
if ($@) {
warn "Got Error: $@\n";
exit $exit_code;
}
exit $exit_code;
}
elsif ( $command eq "start" ) {
# all arguments are passed.
# If you manage master ip address at global catalog database,
# activate new_master_ip here.
# You can also grant write access (create user, set read_only=0, etc) here.
my $exit_code = 10;
eval {
print "Enabling the VIP - $vip on the new master - $new_master_host \n";
&start_vip();
$exit_code = 0;
};
if ($@) {
warn $@;
exit $exit_code;
}
exit $exit_code;
}
elsif ( $command eq "status" ) {
print "Checking the Status of the script.. OK \n";
`ssh $ssh_user\@$orig_master_host \" $ssh_start_vip \"`;
exit 0;
}
else {
&usage();
exit 1;
}
}
# A simple system call that enable the VIP on the new master
sub start_vip() {
`ssh $ssh_user\@$new_master_host \" $ssh_start_vip \"`;
}
# A simple system call that disable the VIP on the old_master
sub stop_vip() {
`ssh $ssh_user\@$orig_master_host \" $ssh_stop_vip \"`;
}
sub usage {
print
"Usage: master_ip_failover --command=start|stop|stopssh|status --orig_master_host=host --orig_master_ip=ip --orig_master_port=port --new_master_host=host --new_master_ip=ip --new_master_port=port\n";
}
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
|
Tue Oct 28 10:58:35 2014 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping.
Tue Oct 28 10:58:35 2014 - [info] Reading application default configurations from /etc/mha/mha.conf..
Tue Oct 28 10:58:35 2014 - [info] Reading server configurations from /etc/mha/mha.conf..
Tue Oct 28 10:58:35 2014 - [info] MHA::MasterMonitor version 0.53.
Tue Oct 28 10:58:36 2014 - [info] Dead Servers:
Tue Oct 28 10:58:36 2014 - [info] Alive Servers:
Tue Oct 28 10:58:36 2014 - [info] 192.168.1.225(192.168.1.225:3306)
Tue Oct 28 10:58:36 2014 - [info] 192.168.1.226(192.168.1.226:3306)
Tue Oct 28 10:58:36 2014 - [info] 192.168.1.227(192.168.1.227:3306)
Tue Oct 28 10:58:36 2014 - [info] Alive Slaves:
Tue Oct 28 10:58:36 2014 - [info] 192.168.1.226(192.168.1.226:3306) Version=5.6.17-log (oldest major version between slaves) log-bin:enabled
Tue Oct 28 10:58:36 2014 - [info] Replicating from 192.168.1.225(192.168.1.225:3306)
Tue Oct 28 10:58:36 2014 - [info] Primary candidate for the new Master (candidate_master is set)
Tue Oct 28 10:58:36 2014 - [info] 192.168.1.227(192.168.1.227:3306) Version=5.6.17-log (oldest major version between slaves) log-bin:enabled
Tue Oct 28 10:58:36 2014 - [info] Replicating from 192.168.1.225(192.168.1.225:3306)
Tue Oct 28 10:58:36 2014 - [info] Not candidate for the new Master (no_master is set)
Tue Oct 28 10:58:36 2014 - [info] Current Alive Master: 192.168.1.225(192.168.1.225:3306)
Tue Oct 28 10:58:36 2014 - [info] Checking slave configurations..
Tue Oct 28 10:58:36 2014 - [warning] relay_log_purge=0 is not set on slave 192.168.1.227(192.168.1.227:3306).
Tue Oct 28 10:58:36 2014 - [info] Checking replication filtering settings..
Tue Oct 28 10:58:36 2014 - [info] binlog_do_db= , binlog_ignore_db=
Tue Oct 28 10:58:36 2014 - [info] Replication filtering check ok.
Tue Oct 28 10:58:36 2014 - [info] Starting SSH connection tests..
^CTue Oct 28 10:58:41 2014 - [info] Got terminate signal. Exit.
[root@test_redis scripts]# masterha_check_repl --conf=/etc/mha/mha.conf
Tue Oct 28 10:58:51 2014 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping.
Tue Oct 28 10:58:51 2014 - [info] Reading application default configurations from /etc/mha/mha.conf..
Tue Oct 28 10:58:51 2014 - [info] Reading server configurations from /etc/mha/mha.conf..
Tue Oct 28 10:58:51 2014 - [info] MHA::MasterMonitor version 0.53.
Tue Oct 28 10:58:52 2014 - [info] Dead Servers:
Tue Oct 28 10:58:52 2014 - [info] Alive Servers:
Tue Oct 28 10:58:52 2014 - [info] 192.168.1.225(192.168.1.225:3306)
Tue Oct 28 10:58:52 2014 - [info] 192.168.1.226(192.168.1.226:3306)
Tue Oct 28 10:58:52 2014 - [info] 192.168.1.227(192.168.1.227:3306)
Tue Oct 28 10:58:52 2014 - [info] Alive Slaves:
Tue Oct 28 10:58:52 2014 - [info] 192.168.1.226(192.168.1.226:3306) Version=5.6.17-log (oldest major version between slaves) log-bin:enabled
Tue Oct 28 10:58:52 2014 - [info] Replicating from 192.168.1.225(192.168.1.225:3306)
Tue Oct 28 10:58:52 2014 - [info] Primary candidate for the new Master (candidate_master is set)
Tue Oct 28 10:58:52 2014 - [info] 192.168.1.227(192.168.1.227:3306) Version=5.6.17-log (oldest major version between slaves) log-bin:enabled
Tue Oct 28 10:58:52 2014 - [info] Replicating from 192.168.1.225(192.168.1.225:3306)
Tue Oct 28 10:58:52 2014 - [info] Not candidate for the new Master (no_master is set)
Tue Oct 28 10:58:52 2014 - [info] Current Alive Master: 192.168.1.225(192.168.1.225:3306)
Tue Oct 28 10:58:52 2014 - [info] Checking slave configurations..
Tue Oct 28 10:58:52 2014 - [info] Checking replication filtering settings..
Tue Oct 28 10:58:52 2014 - [info] binlog_do_db= , binlog_ignore_db=
Tue Oct 28 10:58:52 2014 - [info] Replication filtering check ok.
Tue Oct 28 10:58:52 2014 - [info] Starting SSH connection tests..
Tue Oct 28 10:58:55 2014 - [info] All SSH connection tests passed successfully.
Tue Oct 28 10:58:55 2014 - [info] Checking MHA Node version..
Tue Oct 28 10:58:55 2014 - [info] Version check ok.
Tue Oct 28 10:58:55 2014 - [info] Checking SSH publickey authentication settings on the current master..
Tue Oct 28 10:58:56 2014 - [info] HealthCheck: SSH to 192.168.1.225 is reachable.
Tue Oct 28 10:58:56 2014 - [info] Master MHA Node version is 0.52.
Tue Oct 28 10:58:56 2014 - [info] Checking recovery script configurations on the current master..
Tue Oct 28 10:58:56 2014 - [info] Executing command: save_binary_logs --command=test --start_pos=4 --binlog_dir=/mydata --output_file=/usr/local/mha/save_binary_logs_test --manager_version=0.53 --start_file=mysql-bin.000013
Tue Oct 28 10:58:56 2014 - [info] Connecting to root@192.168.1.225(192.168.1.225)..
Creating /usr/local/mha if not exists.. ok.
Checking output directory is accessible or not..
ok.
Binlog found at /mydata, up to mysql-bin.000013
Tue Oct 28 10:58:56 2014 - [info] Master setting check done.
Tue Oct 28 10:58:56 2014 - [info] Checking SSH publickey authentication and checking recovery script configurations on all alive slave servers..
Tue Oct 28 10:58:56 2014 - [info] Executing command : apply_diff_relay_logs --command=test --slave_user=root --slave_host=192.168.1.226 --slave_ip=192.168.1.226 --slave_port=3306 --workdir=/usr/local/mha --target_version=5.6.17-log --manager_version=0.53 --relay_log_info=/mydata/relay-log.info --relay_dir=/mydata/ --slave_pass=xxx
Tue Oct 28 10:58:56 2014 - [info] Connecting to root@192.168.1.226(192.168.1.226:22)..
Checking slave recovery environment settings..
Opening /mydata/relay-log.info ... ok.
Relay log found at /mydata, up to mysql-relay-bin.000006
Temporary relay log file is /mydata/mysql-relay-bin.000006
Testing mysql connection and privileges.. done.
Testing mysqlbinlog output.. done.
Cleaning up test file(s).. done.
Tue Oct 28 10:58:57 2014 - [info] Executing command : apply_diff_relay_logs --command=test --slave_user=root --slave_host=192.168.1.227 --slave_ip=192.168.1.227 --slave_port=3306 --workdir=/usr/local/mha --target_version=5.6.17-log --manager_version=0.53 --relay_log_info=/mydata/relay-log.info --relay_dir=/mydata/ --slave_pass=xxx
Tue Oct 28 10:58:57 2014 - [info] Connecting to root@192.168.1.227(192.168.1.227:22)..
Checking slave recovery environment settings..
Opening /mydata/relay-log.info ... ok.
Relay log found at /mydata, up to mysql-relay-bin.000016
Temporary relay log file is /mydata/mysql-relay-bin.000016
Testing mysql connection and privileges.. done.
Testing mysqlbinlog output.. done.
Cleaning up test file(s).. done.
Tue Oct 28 10:58:58 2014 - [info] Slaves settings check done.
Tue Oct 28 10:58:58 2014 - [info]
192.168.1.225 (current master)
+--192.168.1.226
+--192.168.1.227
Tue Oct 28 10:58:58 2014 - [info] Checking replication health on 192.168.1.226..
Tue Oct 28 10:58:58 2014 - [info] ok.
Tue Oct 28 10:58:58 2014 - [info] Checking replication health on 192.168.1.227..
Tue Oct 28 10:58:58 2014 - [info] ok.
Tue Oct 28 10:58:58 2014 - [info] Checking master_ip_failover_script status:
Tue Oct 28 10:58:58 2014 - [info] /usr/local/scripts/master_ip_failover --command=status --ssh_user=root --orig_master_host=192.168.1.225 --orig_master_ip=192.168.1.225 --orig_master_port=3306
IN SCRIPT TEST====/sbin/ifconfig eth0:1 down==/sbin/ifconfig eth0:1 192.168.1.231;/sbin/arping -I eth0 -c 3 -s 192.168.1.231 192.168.1.1 >/dev/null 2>&1===
Checking the Status of the script.. OK
Tue Oct 28 10:59:02 2014 - [info] OK.
Tue Oct 28 10:59:02 2014 - [warning] shutdown_script is not defined.
Tue Oct 28 10:59:02 2014 - [info] Got exit code 0 (Not master dead).
MySQL Replication Health is OK.
|
1
2
3
4
|
# nohup masterha_manager --conf=/etc/mha/mha.conf > /tmp/mha_manager.log < /dev/null 2>&1 &
# masterha_check_status --conf=/etc/mha/mha.conf
mha (pid:4228) is running(0:PING_OK), master:192.168.1.225
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
|
Tue Oct 28 11:31:36 2014 - [info] HealthCheck: SSH to 192.168.1.225 is reachable.
Monitoring server 192.168.1.226 is reachable, Master is not reachable from 192.168.1.226. OK.
Monitoring server 192.168.1.227 is reachable, Master is not reachable from 192.168.1.227. OK.
Tue Oct 28 11:31:37 2014 - [info] Master is not reachable from all other monitoring servers. Failover should start.
Tue Oct 28 11:31:37 2014 - [warning] Got error on MySQL connect: 2013 (Lost connection to MySQL server at 'reading initial communication packet', system error: 111)
Tue Oct 28 11:31:37 2014 - [warning] Connection failed 1 time(s)..
Tue Oct 28 11:31:38 2014 - [warning] Got error on MySQL connect: 2013 (Lost connection to MySQL server at 'reading initial communication packet', system error: 111)
Tue Oct 28 11:31:38 2014 - [warning] Connection failed 2 time(s)..
Tue Oct 28 11:31:39 2014 - [warning] Got error on MySQL connect: 2013 (Lost connection to MySQL server at 'reading initial communication packet', system error: 111)
Tue Oct 28 11:31:39 2014 - [warning] Connection failed 3 time(s)..
Tue Oct 28 11:31:39 2014 - [warning] Master is not reachable from health checker!
Tue Oct 28 11:31:39 2014 - [warning] Master 192.168.1.225(192.168.1.225:3306) is not reachable!
Tue Oct 28 11:31:39 2014 - [warning] SSH is reachable.
Tue Oct 28 11:31:39 2014 - [info] Connecting to a master server failed. Reading configuration file /etc/masterha_default.cnf and /etc/mha/mha.conf again, and trying to connect to all servers to check server status..
Tue Oct 28 11:31:39 2014 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping.
Tue Oct 28 11:31:39 2014 - [info] Reading application default configurations from /etc/mha/mha.conf..
Tue Oct 28 11:31:39 2014 - [info] Reading server configurations from /etc/mha/mha.conf..
Tue Oct 28 11:31:39 2014 - [info] Dead Servers:
Tue Oct 28 11:31:39 2014 - [info] 192.168.1.225(192.168.1.225:3306)
Tue Oct 28 11:31:39 2014 - [info] Alive Servers:
Tue Oct 28 11:31:39 2014 - [info] 192.168.1.226(192.168.1.226:3306)
Tue Oct 28 11:31:39 2014 - [info] 192.168.1.227(192.168.1.227:3306)
Tue Oct 28 11:31:39 2014 - [info] Alive Slaves:
Tue Oct 28 11:31:39 2014 - [info] 192.168.1.226(192.168.1.226:3306) Version=5.6.17-log (oldest major version between slaves) log-bin:enabled
Tue Oct 28 11:31:39 2014 - [info] Replicating from 192.168.1.225(192.168.1.225:3306)
Tue Oct 28 11:31:39 2014 - [info] Primary candidate for the new Master (candidate_master is set)
Tue Oct 28 11:31:39 2014 - [info] 192.168.1.227(192.168.1.227:3306) Version=5.6.17-log (oldest major version between slaves) log-bin:enabled
Tue Oct 28 11:31:39 2014 - [info] Replicating from 192.168.1.225(192.168.1.225:3306)
Tue Oct 28 11:31:39 2014 - [info] Primary candidate for the new Master (candidate_master is set)
Tue Oct 28 11:31:39 2014 - [info] Checking slave configurations..
Tue Oct 28 11:31:39 2014 - [info] Checking replication filtering settings..
Tue Oct 28 11:31:39 2014 - [info] Replication filtering check ok.
Tue Oct 28 11:31:39 2014 - [info] Master is down!
Tue Oct 28 11:31:39 2014 - [info] Terminating monitoring script.
Tue Oct 28 11:31:39 2014 - [info] Got exit code 20 (Master dead).
Tue Oct 28 11:31:39 2014 - [info] MHA::MasterFailover version 0.53.
Tue Oct 28 11:31:39 2014 - [info] Starting master failover.
Tue Oct 28 11:31:39 2014 - [info]
Tue Oct 28 11:31:39 2014 - [info] * Phase 1: Configuration Check Phase..
Tue Oct 28 11:31:39 2014 - [info]
Tue Oct 28 11:31:39 2014 - [info] Dead Servers:
Tue Oct 28 11:31:39 2014 - [info] 192.168.1.225(192.168.1.225:3306)
Tue Oct 28 11:31:39 2014 - [info] Checking master reachability via mysql(double check)..
Tue Oct 28 11:31:39 2014 - [info] ok.
Tue Oct 28 11:31:39 2014 - [info] Alive Servers:
Tue Oct 28 11:31:39 2014 - [info] 192.168.1.226(192.168.1.226:3306)
Tue Oct 28 11:31:39 2014 - [info] 192.168.1.227(192.168.1.227:3306)
Tue Oct 28 11:31:39 2014 - [info] Alive Slaves:
Tue Oct 28 11:31:39 2014 - [info] 192.168.1.226(192.168.1.226:3306) Version=5.6.17-log (oldest major version between slaves) log-bin:enabled
Tue Oct 28 11:31:39 2014 - [info] Replicating from 192.168.1.225(192.168.1.225:3306)
Tue Oct 28 11:31:39 2014 - [info] Primary candidate for the new Master (candidate_master is set)
Tue Oct 28 11:31:39 2014 - [info] 192.168.1.227(192.168.1.227:3306) Version=5.6.17-log (oldest major version between slaves) log-bin:enabled
Tue Oct 28 11:31:39 2014 - [info] Replicating from 192.168.1.225(192.168.1.225:3306)
Tue Oct 28 11:31:39 2014 - [info] Primary candidate for the new Master (candidate_master is set)
Tue Oct 28 11:31:39 2014 - [info] ** Phase 1: Configuration Check Phase completed.
Tue Oct 28 11:31:39 2014 - [info]
Tue Oct 28 11:31:39 2014 - [info] * Phase 2: Dead Master Shutdown Phase..
Tue Oct 28 11:31:39 2014 - [info]
Tue Oct 28 11:31:39 2014 - [info] Forcing shutdown so that applications never connect to the current master..
Tue Oct 28 11:31:39 2014 - [info] Executing master IP deactivatation script:
Tue Oct 28 11:31:39 2014 - [info] /usr/local/scripts/master_ip_failover --orig_master_host=192.168.1.225 --orig_master_ip=192.168.1.225 --orig_master_port=3306 --command=stopssh --ssh_user=root
IN SCRIPT TEST====/sbin/ifconfig eth0:1 down==/sbin/ifconfig eth0:1 192.168.1.231;/sbin/arping -I eth0 -c 3 -s 192.168.1.231 192.168.1.1 >/dev/null 2>&1===
Disabling the VIP on old master: 192.168.1.225
Tue Oct 28 11:31:39 2014 - [info] done.
Tue Oct 28 11:31:39 2014 - [warning] shutdown_script is not set. Skipping explicit shutting down of the dead master.
Tue Oct 28 11:31:39 2014 - [info] * Phase 2: Dead Master Shutdown Phase completed.
Tue Oct 28 11:31:39 2014 - [info]
Tue Oct 28 11:31:39 2014 - [info] * Phase 3: Master Recovery Phase..
Tue Oct 28 11:31:39 2014 - [info]
Tue Oct 28 11:31:39 2014 - [info] * Phase 3.1: Getting Latest Slaves Phase..
Tue Oct 28 11:31:39 2014 - [info]
Tue Oct 28 11:31:39 2014 - [info] The latest binary log file/position on all slaves is mysql-bin.000016:120
Tue Oct 28 11:31:39 2014 - [info] Latest slaves (Slaves that received relay log files to the latest):
Tue Oct 28 11:31:39 2014 - [info] 192.168.1.226(192.168.1.226:3306) Version=5.6.17-log (oldest major version between slaves) log-bin:enabled
Tue Oct 28 11:31:39 2014 - [info] Replicating from 192.168.1.225(192.168.1.225:3306)
Tue Oct 28 11:31:39 2014 - [info] Primary candidate for the new Master (candidate_master is set)
Tue Oct 28 11:31:39 2014 - [info] 192.168.1.227(192.168.1.227:3306) Version=5.6.17-log (oldest major version between slaves) log-bin:enabled
Tue Oct 28 11:31:39 2014 - [info] Replicating from 192.168.1.225(192.168.1.225:3306)
Tue Oct 28 11:31:39 2014 - [info] Primary candidate for the new Master (candidate_master is set)
Tue Oct 28 11:31:39 2014 - [info] The oldest binary log file/position on all slaves is mysql-bin.000016:120
Tue Oct 28 11:31:39 2014 - [info] Oldest slaves:
Tue Oct 28 11:31:39 2014 - [info] 192.168.1.226(192.168.1.226:3306) Version=5.6.17-log (oldest major version between slaves) log-bin:enabled
Tue Oct 28 11:31:39 2014 - [info] Replicating from 192.168.1.225(192.168.1.225:3306)
Tue Oct 28 11:31:39 2014 - [info] Primary candidate for the new Master (candidate_master is set)
Tue Oct 28 11:31:39 2014 - [info] 192.168.1.227(192.168.1.227:3306) Version=5.6.17-log (oldest major version between slaves) log-bin:enabled
Tue Oct 28 11:31:39 2014 - [info] Replicating from 192.168.1.225(192.168.1.225:3306)
Tue Oct 28 11:31:39 2014 - [info] Primary candidate for the new Master (candidate_master is set)
Tue Oct 28 11:31:39 2014 - [info]
Tue Oct 28 11:31:39 2014 - [info] * Phase 3.2: Saving Dead Master's Binlog Phase..
Tue Oct 28 11:31:39 2014 - [info]
Tue Oct 28 11:31:40 2014 - [info] Fetching dead master's binary logs..
Tue Oct 28 11:31:40 2014 - [info] Executing command on the dead master 192.168.1.225(192.168.1.225:3306): save_binary_logs --command=save --start_file=mysql-bin.000016 --start_pos=120 --binlog_dir=/mydata --output_file=/usr/local/mha/saved_master_binlog_from_192.168.1.225_3306_20141028113139.binlog --handle_raw_binlog=1 --disable_log_bin=0 --manager_version=0.53
Creating /usr/local/mha if not exists.. ok.
Concat binary/relay logs from mysql-bin.000016 pos 120 to mysql-bin.000016 EOF into /usr/local/mha/saved_master_binlog_from_192.168.1.225_3306_20141028113139.binlog ..
Dumping binlog format description event, from position 0 to 120.. ok.
Dumping effective binlog data from /mydata/mysql-bin.000016 position 120 to tail(143).. ok.
Concat succeeded.
Tue Oct 28 11:31:41 2014 - [info] scp from root@192.168.1.225:/usr/local/mha/saved_master_binlog_from_192.168.1.225_3306_20141028113139.binlog to local:/usr/local/mha/saved_master_binlog_from_192.168.1.225_3306_20141028113139.binlog succeeded.
Tue Oct 28 11:31:41 2014 - [info] HealthCheck: SSH to 192.168.1.226 is reachable.
Tue Oct 28 11:31:42 2014 - [info] HealthCheck: SSH to 192.168.1.227 is reachable.
Tue Oct 28 11:31:42 2014 - [info]
Tue Oct 28 11:31:42 2014 - [info] * Phase 3.3: Determining New Master Phase..
Tue Oct 28 11:31:42 2014 - [info]
Tue Oct 28 11:31:42 2014 - [info] Finding the latest slave that has all relay logs for recovering other slaves..
Tue Oct 28 11:31:42 2014 - [info] All slaves received relay logs to the same position. No need to resync each other.
Tue Oct 28 11:31:42 2014 - [info] Searching new master from slaves..
Tue Oct 28 11:31:42 2014 - [info] Candidate masters from the configuration file:
Tue Oct 28 11:31:42 2014 - [info] 192.168.1.226(192.168.1.226:3306) Version=5.6.17-log (oldest major version between slaves) log-bin:enabled
Tue Oct 28 11:31:42 2014 - [info] Replicating from 192.168.1.225(192.168.1.225:3306)
Tue Oct 28 11:31:42 2014 - [info] Primary candidate for the new Master (candidate_master is set)
Tue Oct 28 11:31:42 2014 - [info] 192.168.1.227(192.168.1.227:3306) Version=5.6.17-log (oldest major version between slaves) log-bin:enabled
Tue Oct 28 11:31:42 2014 - [info] Replicating from 192.168.1.225(192.168.1.225:3306)
Tue Oct 28 11:31:42 2014 - [info] Primary candidate for the new Master (candidate_master is set)
Tue Oct 28 11:31:42 2014 - [info] Non-candidate masters:
Tue Oct 28 11:31:42 2014 - [info] Searching from candidate_master slaves which have received the latest relay log events..
Tue Oct 28 11:31:42 2014 - [info] New master is 192.168.1.226(192.168.1.226:3306)
Tue Oct 28 11:31:42 2014 - [info] Starting master failover..
Tue Oct 28 11:31:42 2014 - [info]
From:
192.168.1.225 (current master)
+--192.168.1.226
+--192.168.1.227
To:
192.168.1.226 (new master)
+--192.168.1.227
Tue Oct 28 11:31:42 2014 - [info]
Tue Oct 28 11:31:42 2014 - [info] * Phase 3.3: New Master Diff Log Generation Phase..
Tue Oct 28 11:31:42 2014 - [info]
Tue Oct 28 11:31:42 2014 - [info] This server has all relay logs. No need to generate diff files from the latest slave.
Tue Oct 28 11:31:42 2014 - [info] Sending binlog..
Tue Oct 28 11:31:43 2014 - [info] scp from local:/usr/local/mha/saved_master_binlog_from_192.168.1.225_3306_20141028113139.binlog to root@192.168.1.226:/usr/local/mha/saved_master_binlog_from_192.168.1.225_3306_20141028113139.binlog succeeded.
Tue Oct 28 11:31:43 2014 - [info]
Tue Oct 28 11:31:43 2014 - [info] * Phase 3.4: Master Log Apply Phase..
Tue Oct 28 11:31:43 2014 - [info]
Tue Oct 28 11:31:43 2014 - [info] *NOTICE: If any error happens from this phase, manual recovery is needed.
Tue Oct 28 11:31:43 2014 - [info] Starting recovery on 192.168.1.226(192.168.1.226:3306)..
Tue Oct 28 11:31:43 2014 - [info] Generating diffs succeeded.
Tue Oct 28 11:31:43 2014 - [info] Waiting until all relay logs are applied.
Tue Oct 28 11:31:43 2014 - [info] done.
Tue Oct 28 11:31:43 2014 - [info] Getting slave status..
Tue Oct 28 11:31:43 2014 - [info] This slave(192.168.1.226)'s Exec_Master_Log_Pos equals to Read_Master_Log_Pos(mysql-bin.000016:120). No need to recover from Exec_Master_Log_Pos.
Tue Oct 28 11:31:43 2014 - [info] Connecting to the target slave host 192.168.1.226, running recover script..
Tue Oct 28 11:31:43 2014 - [info] Executing command: apply_diff_relay_logs --command=apply --slave_user=root --slave_host=192.168.1.226 --slave_ip=192.168.1.226 --slave_port=3306 --apply_files=/usr/local/mha/saved_master_binlog_from_192.168.1.225_3306_20141028113139.binlog --workdir=/usr/local/mha --target_version=5.6.17-log --timestamp=20141028113139 --handle_raw_binlog=1 --disable_log_bin=0 --manager_version=0.53 --slave_pass=xxx
Tue Oct 28 11:31:43 2014 - [info]
Applying differential binary/relay log files /usr/local/mha/saved_master_binlog_from_192.168.1.225_3306_20141028113139.binlog on 192.168.1.226:3306. This may take long time...
Applying log files succeeded.
Tue Oct 28 11:31:43 2014 - [info] All relay logs were successfully applied.
Tue Oct 28 11:31:43 2014 - [info] Getting new master's binlog name and position..
Tue Oct 28 11:31:43 2014 - [info] mysql-bin.000010:120
Tue Oct 28 11:31:43 2014 - [info] All other slaves should start replication from here. Statement should be: CHANGE MASTER TO MASTER_HOST='192.168.1.226', MASTER_PORT=3306, MASTER_LOG_FILE='mysql-bin.000010', MASTER_LOG_POS=120, MASTER_USER='replication', MASTER_PASSWORD='xxx';
Tue Oct 28 11:31:43 2014 - [info] Executing master IP activate script:
Tue Oct 28 11:31:43 2014 - [info] /usr/local/scripts/master_ip_failover --command=start --ssh_user=root --orig_master_host=192.168.1.225 --orig_master_ip=192.168.1.225 --orig_master_port=3306 --new_master_host=192.168.1.226 --new_master_ip=192.168.1.226 --new_master_port=3306
IN SCRIPT TEST====/sbin/ifconfig eth0:1 down==/sbin/ifconfig eth0:1 192.168.1.231;/sbin/arping -I eth0 -c 3 -s 192.168.1.231 192.168.1.1 >/dev/null 2>&1===
Enabling the VIP - 192.168.1.231 on the new master - 192.168.1.226
Tue Oct 28 11:31:46 2014 - [info] OK.
Tue Oct 28 11:31:46 2014 - [info] Setting read_only=0 on 192.168.1.226(192.168.1.226:3306)..
Tue Oct 28 11:31:46 2014 - [info] ok.
Tue Oct 28 11:31:46 2014 - [info] ** Finished master recovery successfully.
Tue Oct 28 11:31:46 2014 - [info] * Phase 3: Master Recovery Phase completed.
Tue Oct 28 11:31:46 2014 - [info]
Tue Oct 28 11:31:46 2014 - [info] * Phase 4: Slaves Recovery Phase..
Tue Oct 28 11:31:46 2014 - [info]
Tue Oct 28 11:31:46 2014 - [info] * Phase 4.1: Starting Parallel Slave Diff Log Generation Phase..
Tue Oct 28 11:31:46 2014 - [info]
Tue Oct 28 11:31:46 2014 - [info] -- Slave diff file generation on host 192.168.1.227(192.168.1.227:3306) started, pid: 2761. Check tmp log /usr/local/mha/192.168.1.227_3306_20141028113139.log if it takes time..
Tue Oct 28 11:31:46 2014 - [info]
Tue Oct 28 11:31:46 2014 - [info] Log messages from 192.168.1.227 ...
Tue Oct 28 11:31:46 2014 - [info]
Tue Oct 28 11:31:46 2014 - [info] This server has all relay logs. No need to generate diff files from the latest slave.
Tue Oct 28 11:31:46 2014 - [info] End of log messages from 192.168.1.227.
Tue Oct 28 11:31:46 2014 - [info] -- 192.168.1.227(192.168.1.227:3306) has the latest relay log events.
Tue Oct 28 11:31:46 2014 - [info] Generating relay diff files from the latest slave succeeded.
Tue Oct 28 11:31:46 2014 - [info]
Tue Oct 28 11:31:46 2014 - [info] * Phase 4.2: Starting Parallel Slave Log Apply Phase..
Tue Oct 28 11:31:46 2014 - [info]
Tue Oct 28 11:31:46 2014 - [info] -- Slave recovery on host 192.168.1.227(192.168.1.227:3306) started, pid: 2763. Check tmp log /usr/local/mha/192.168.1.227_3306_20141028113139.log if it takes time..
Tue Oct 28 11:31:48 2014 - [info]
Tue Oct 28 11:31:48 2014 - [info] Log messages from 192.168.1.227 ...
Tue Oct 28 11:31:48 2014 - [info]
Tue Oct 28 11:31:46 2014 - [info] Sending binlog..
Tue Oct 28 11:31:47 2014 - [info] scp from local:/usr/local/mha/saved_master_binlog_from_192.168.1.225_3306_20141028113139.binlog to root@192.168.1.227:/usr/local/mha/saved_master_binlog_from_192.168.1.225_3306_20141028113139.binlog succeeded.
Tue Oct 28 11:31:47 2014 - [info] Starting recovery on 192.168.1.227(192.168.1.227:3306)..
Tue Oct 28 11:31:47 2014 - [info] Generating diffs succeeded.
Tue Oct 28 11:31:47 2014 - [info] Waiting until all relay logs are applied.
Tue Oct 28 11:31:47 2014 - [info] done.
Tue Oct 28 11:31:47 2014 - [info] Getting slave status..
Tue Oct 28 11:31:47 2014 - [info] This slave(192.168.1.227)'s Exec_Master_Log_Pos equals to Read_Master_Log_Pos(mysql-bin.000016:120). No need to recover from Exec_Master_Log_Pos.
Tue Oct 28 11:31:47 2014 - [info] Connecting to the target slave host 192.168.1.227, running recover script..
Tue Oct 28 11:31:47 2014 - [info] Executing command: apply_diff_relay_logs --command=apply --slave_user=root --slave_host=192.168.1.227 --slave_ip=192.168.1.227 --slave_port=3306 --apply_files=/usr/local/mha/saved_master_binlog_from_192.168.1.225_3306_20141028113139.binlog --workdir=/usr/local/mha --target_version=5.6.17-log --timestamp=20141028113139 --handle_raw_binlog=1 --disable_log_bin=0 --manager_version=0.53 --slave_pass=xxx
Tue Oct 28 11:31:47 2014 - [info]
Applying differential binary/relay log files /usr/local/mha/saved_master_binlog_from_192.168.1.225_3306_20141028113139.binlog on 192.168.1.227:3306. This may take long time...
Applying log files succeeded.
Tue Oct 28 11:31:47 2014 - [info] All relay logs were successfully applied.
Tue Oct 28 11:31:47 2014 - [info] Resetting slave 192.168.1.227(192.168.1.227:3306) and starting replication from the new master 192.168.1.226(192.168.1.226:3306)..
Tue Oct 28 11:31:48 2014 - [info] Executed CHANGE MASTER.
Tue Oct 28 11:31:48 2014 - [info] Slave started.
Tue Oct 28 11:31:48 2014 - [info] End of log messages from 192.168.1.227.
Tue Oct 28 11:31:48 2014 - [info] -- Slave recovery on host 192.168.1.227(192.168.1.227:3306) succeeded.
Tue Oct 28 11:31:48 2014 - [info] All new slave servers recovered successfully.
Tue Oct 28 11:31:48 2014 - [info]
Tue Oct 28 11:31:48 2014 - [info] * Phase 5: New master cleanup phease..
Tue Oct 28 11:31:48 2014 - [info]
Tue Oct 28 11:31:48 2014 - [info] Resetting slave info on the new master..
Tue Oct 28 11:31:48 2014 - [info] 192.168.1.226: Resetting slave info succeeded.
Tue Oct 28 11:31:48 2014 - [info] Master failover to 192.168.1.226(192.168.1.226:3306) completed successfully.
Tue Oct 28 11:31:48 2014 - [info]
----- Failover Report -----
mha: MySQL Master failover 192.168.1.225 to 192.168.1.226 succeeded
Master 192.168.1.225 is down!
Check MHA Manager logs at test_redis:/usr/local/mha/manager.log for details.
Started automated(non-interactive) failover.
Invalidated master IP address on 192.168.1.225.
The latest slave 192.168.1.226(192.168.1.226:3306) has all relay logs for recovery.
Selected 192.168.1.226 as a new master.
192.168.1.226: OK: Applying all logs succeeded.
192.168.1.226: OK: Activated master IP address.
192.168.1.227: This host has the latest relay log events.
Generating relay diff files from the latest slave succeeded.
192.168.1.227: OK: Applying all logs succeeded. Slave started, replicating from 192.168.1.226.
192.168.1.226: Resetting slave info succeeded.
Master failover to 192.168.1.226(192.168.1.226:3306) completed successfully.
|
1
2
|
# service mysqld start
Starting MySQL.. SUCCESS!
|
1
2
3
4
5
6
7
8
9
|
mysql> change master to master_host='192.168.1.226',
-> master_user='replication',
-> master_password='123456',
-> master_log_file='mysql-bin.000010',
-> master_log_pos=120;
Query OK, 0 rows affected, 2 warnings (0.38 sec)
mysql> start slave;
Query OK, 0 rows affected (0.08 sec)
|
1
2
3
4
|
# nohup masterha_manager --conf=/etc/mha/mha.conf > /tmp/mha_manager.log < /dev/null 2>&1 &
# masterha_check_status --conf=/etc/mha/mha.conf
mha (pid:2854) is running(0:PING_OK), master:192.168.1.226
|