1 测试目标
- 集群健康状况如何判断 —— masterha查看ssh,repl,status 三个状态
- master故障后,slave能否自动切换——能
- 故障切换效率:切换一次平均耗费多久——局域网下十几秒
- slave切换成功之后,能否正常读写——能
- master恢复后,能否自动加入集群变成slave——不能,需要手动干预
- 数据同步效率:同步一定量的数据需要耗费多久——取决于机器本身的性能和网络状况
- 测试时出现的问题需要记录
2 集群搭建与配置
2.1 环境描述
- MHA需要管理节点,管理节点最好是额外搭建在非mysql集群的主机上,为简化测试环节,放在mysql集群的主机上了
172.16.212.31 mha node1,mha manager,mysql slave1
172.16.212.32 mha node2,mysql slave2
172.16.212.33 mha node3,mysql master(初始)
2.2 准备工作
- 关闭selinux
- 关闭firewalld
- 配置ssh相互信任
- 配置hosts解析
- 下载软件包
# mha
wget https://github.com/yoshinorim/mha4mysql-manager/releases/download/v0.58/mha4mysql-manager-0.58.tar.gz
wget https://github.com/yoshinorim/mha4mysql-node/releases/download/v0.58/mha4mysql-node-0.58.tar.gz
2.3 具体配置
1)mysql
- 全部节点:
wget http://dev.mysql.com/get/mysql57-community-release-el7-8.noarch.rpm
yum localinstall mysql57-community-release-el7-8.noarch.rpm -y
yum install mysql-community-server -y
systemctl start mysqld
grep 'temporary password' /var/log/mysqld.log
mysql -uroot -p
ALTER USER 'root'@'localhost' IDENTIFIED BY '!QA2ws#ED';
GRANT all ON *.* TO manager@'%' IDENTIFIED BY 'manager_1234'; #用于监控的用户
grant all on *.* to root@"%" identified by "!QA2ws#ED"; #用于ssh
grant replication slave on *.* to repl@"%" identified by "!QA2ws#ED"; #用于数据同步
- 主库
vim /etc/my.cnf
[mysqld]
plugin-load = "rpl_semi_sync_master=semisync_master.so;rpl_semi_sync_slave=semisync_slave.so"
rpl-semi-sync-master-enabled = 1
rpl-semi-sync-slave-enabled = 1
server_id=33
log-bin=master33
binlog-format="mixed"
mysql -uroot -p
mysql> set global relay_log_purge=off;
mysql> grant replication slave on *.* to repl@'%' identified by '!QA2ws#ED';
mysql> show master status;
+-----------------+----------+--------------+------------------+-------------------+
| File | Position | Binlog_Do_DB | Binlog_Ignore_DB | Executed_Gtid_Set |
+-----------------+----------+--------------+------------------+-------------------+
| master33.000001 | 1123 | | | |
+-----------------+----------+--------------+------------------+-------------------+
1 row in set (0.00 sec)
ifconfig ens33:1 172.16.212.100
- 从库
vim /etc/my.cnf
[mysqld]
server_id=32
log-bin=node
plugin-load="rpl_semi_sync_master=semisync_master.so;rpl_semi_sync_slave=semisync_slave.so"
rpl_semi_sync_master_enabled=1
rpl_semi_sync_slave_enabled=1
relay_log_purge=off
mysql -uroot -p
mysql> change master to master_host="172.16.212.33",master_user="repl",master_password="!QA2ws#ED",master_log_file="master33.000001",master_log_pos=1123;
mysql> start slave;
mysql> show slave status\G;
2)mha-node
yum install epel-release -y
yum -y install perl-DBD-MySQL perl-ExtUtils-CBuilder perl-ExtUtils-MakeMaker perl-CPAN perl-Mail-Sender perl-Log-Dispatch
tar zvxf mha4mysql-node-0.58.tar.gz
cd mha4mysql-node-0.58
perl Makefile.PL
make && make install
3)mha-master
- 安装
yum install -y perl-DBD-MySQL perl-Config-Tiny perl-Log-Dispatch perl-Parallel-ForkManager perl-Config-IniFiles perl-Time-HiRes
tar zvxf mha4mysql-manager-0.58.tar.gz
cd mha4mysql-manager-0.58
perl Makefile.PL
make && make install
- 配置文件
#/etc/masterha/app1.cnf
[server default]
manager_log=/var/log/masterha/app1/manager.log
manager_workdir=/var/log/masterha/app1
master_ip_failover_script=/usr/local/bin/master_ip_failover
master_ip_online_change_script=/usr/local/bin/master_ip_online_change
password=!QA2ws#ED
ping_interval=1
repl_password=!QA2ws#ED
repl_user=repl
ssh_user=root
user=root
[server1]
hostname=172.16.212.33
master_binlog_dir=/var/lib/mysql
candidate_master=1
check_repl_delay=0
[server2]
hostname=172.16.212.32
master_binlog_dir=/var/lib/mysql
candidate_master=1
check_repl_delay=0
[server3]
hostname=172.16.212.31
master_binlog_dir=/var/lib/mysql
ignore_fail=1
no_master=1
- 添加和删除vip
#./etc/rc.d/init.d/mhaserver
VIP=172.100.100.100
case "$1" in
start)
ifconfig lo:0 $VIP netmask 255.255.255.255 broadcast $VIP
/sbin/route add -host $VIP dev lo:0
echo "1" >/proc/sys/net/ipv4/conf/lo/arp_ignore
echo "2" >/proc/sys/net/ipv4/conf/lo/arp_announce
echo "1" >/proc/sys/net/ipv4/conf/all/arp_ignore
echo "2" >/proc/sys/net/ipv4/conf/all/arp_announce
sysctl -p >/dev/null 2>&1
echo "Server Started"
;;
stop)
ifconfig lo:0 down
route del $VIP >/dev/null 2>&1
echo "0" >/proc/sys/net/ipv4/conf/lo/arp_ignore
echo "0" >/proc/sys/net/ipv4/conf/lo/arp_announce
echo "0" >/proc/sys/net/ipv4/conf/all/arp_ignore
echo "0" >/proc/sys/net/ipv4/conf/all/arp_announce
echo "Server Stoped"
;;
*)
echo "Usage: $0 {start|stop}"
exit 1
esac
exit 0
- 故障切换脚本
#编辑脚本,在35行添加内容:
vim /usr/local/bin/master_ip_failover
#!/usr/bin/env perl
# Copyright (C) 2011 DeNA Co.,Ltd.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
## Note: This is a sample script and is not complete. Modify the script based on your environment.
use strict;
use warnings FATAL => 'all';
use Getopt::Long;
#use MHA::DBHelper;
my (
$command, $ssh_user, $orig_master_host,
$orig_master_ip, $orig_master_port, $new_master_host,
$new_master_ip, $new_master_port
);
my $vip = '172.16.212.100';
my $key = '1';
my $ssh_start_vip = "sudo service mhaserver start";
my $ssh_stop_vip = "sudo service mhaserver stop";
GetOptions(
'command=s' => \$command,
'ssh_user=s' => \$ssh_user,
'orig_master_host=s' => \$orig_master_host,
'orig_master_ip=s' => \$orig_master_ip,
'orig_master_port=i' => \$orig_master_port,
'new_master_host=s' => \$new_master_host,
'new_master_ip=s' => \$new_master_ip,
'new_master_port=i' => \$new_master_port,
);
exit &main();
sub main {
print "\n\nIN SCRIPT TEST====$ssh_stop_vip==$ssh_start_vip===\n\n";
if ( $command eq "stop" || $command eq "stopssh" ) {
# $orig_master_host, $orig_master_ip, $orig_master_port are passed.
# If you manage master ip address at global catalog database,
# invalidate orig_master_ip here.
my $exit_code = 1;
eval {
print "Disabling the VIP on old master: $orig_master_host \n";
# updating global catalog, etc
&stop_vip();
$exit_code = 0;
};
if ($@) {
warn "Got Error: $@\n";
exit $exit_code;
}
exit $exit_code;
}
elsif ( $command eq "start" ) {
# all arguments are passed.
# If you manage master ip address at global catalog database,
# activate new_master_ip here.
# You can also grant write access (create user, set read_only=0, etc) here.
my $exit_code = 10;
eval {
print "Enabling the VIP - $vip on the new master - $new_master_host \n";
&start_vip();
$exit_code = 0;
};
if ($@) {
warn $@;
# If you want to continue failover, exit 10.
exit $exit_code;
}
exit $exit_code;
}
elsif ( $command eq "status" ) {
# do nothing
exit 0;
}
else {
&usage();
exit 1;
}
}
sub start_vip() {
`ssh $ssh_user\@$new_master_host \" $ssh_start_vip \"`;
}
sub stop_vip() {
return 0 unless ($ssh_user);
`ssh $ssh_user\@$orig_master_host \" $ssh_stop_vip \"`;
}
sub usage {
print
"Usage: master_ip_failover --command=start|stop|stopssh|status --orig_master_host=host --orig_master_ip=ip --orig_master_port=port --new_master_host=host --new_master_ip=ip --new_master_port=port\n";
}
- 后台启动监控
#开启mha监控
nohup masterha_manager --conf=/etc/masterha/app1.cnf --remove_dead_master_conf --ignore_last_failover < /dev/null > /var/log/masterha/app1/manager.log 2>&1 &
#mha管理命令:
masterha_check_repl --conf=/etc/masterha/app1.cnf 检查MySQL复制状况
masterha_check_ssh --conf=/etc/masterha/app1.cnf 检查MHA的SSH配置状况
masterha_check_status --conf=/etc/masterha/app1.cnf 检测当前MHA运行状态
masterha_conf_host 添加或删除配置的server信息
masterha_manager --conf=/etc/masterha/app1.cnf 启动MHA
masterha_stop --conf=/etc/masterha/app1.cnf 停止MHA
masterha_master_monitor --conf=/etc/masterha/app1.cnf 检测master是否宕机
masterha_master_switch --conf=/etc/masterha/app1.cnf 控制故障转移(自动或者手动)
masterha_secondary_check 多种线路检测master是否存活
3 测试过程
3.1 主库插入数据-验证数据同步
create database test01;
create table table01 (
id int primary key,
name char not null,
age int not null
);
insert into table01 values(1,'k',18);
insert into table01 values(2,'n',22);
insert into table01 values(3,'t',43);
delete from table01 where id=1;
show databases;
show tables;
select * from table01;
结论:集群正常,数据同步正常!
3.2 模拟主库故障-验证自动切换
1)主库故障
systemctl stop mysqld
#主库故障之后会:
#1)在/etc/masterha/app1.cnf中删除该server的配置信息
#2)停止nohup后台启动的监控程序
2)自动切换
#vip是否漂移
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
inet 127.0.0.1/8 scope host lo
valid_lft forever preferred_lft forever
inet6 ::1/128 scope host
valid_lft forever preferred_lft forever
2: ens33: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP group default qlen 1000
link/ether 00:0c:29:d7:b2:5e brd ff:ff:ff:ff:ff:ff
inet 172.16.212.33/24 brd 172.16.212.255 scope global noprefixroute ens33
valid_lft forever preferred_lft forever
inet 172.16.212.100/24 brd 172.16.212.255 scope global secondary ens33:1
valid_lft forever preferred_lft forever
inet6 fe80::54be:f4fb:10a6:88a3/64 scope link noprefixroute
valid_lft forever preferred_lft forever
3)能否正常读写
#在新的主库上执行
mysql> select * from table01;
+----+------+-----+
| id | name | age |
+----+------+-----+
| 1 | k | 18 |
| 2 | n | 22 |
| 3 | t | 43 |
+----+------+-----+
3 rows in set (0.00 sec)
mysql> delete from table01 where id=1;
Query OK, 1 row affected (0.01 sec)
mysql> insert into table01 values(4,'h',55);
Query OK, 1 row affected (0.00 sec)
mysql> select * from table01;
+----+------+-----+
| id | name | age |
+----+------+-----+
| 2 | n | 22 |
| 3 | t | 43 |
| 4 | h | 55 |
+----+------+-----+
3 rows in set (0.00 sec)
3.3 模拟主库恢复-验证集群功能
#在故障主机上:
systemctl start mysqld
mysql> set global read_only=1;
mysql> change master to master_host="172.16.212.32",master_user="repl",master_password="!QA2ws#ED",master_log_file="node2.000006",master_log_pos=734;
mysql> start slave;
mysql> show slave status\G;
#在mha manager节点上添加回原故障主机的配置:
vim /etc/masterha/app1.cnf
[server1]
candidate_master=1
hostname=172.16.212.33
master_binlog_dir=/var/lib/mysql
#在mha manager节点上开启监控:
nohup masterha_manager --conf=/etc/masterha/app1.cnf --remove_dead_master_conf --ignore_last_failover < /dev/null > /var/log/masterha/app1/manager.log 2>&1 &
#在新主库上插入数据:
mysql> use test01;
Reading table information for completion of table and column names
You can turn off this feature to get a quicker startup with -A
Database changed
mysql> select * from table01;
+----+------+-----+
| id | name | age |
+----+------+-----+
| 2 | n | 22 |
| 3 | t | 43 |
+----+------+-----+
2 rows in set (0.00 sec)
mysql> insert into table01 values(1,'k',18);
Query OK, 1 row affected (0.00 sec)
mysql> select * from table01;
+----+------+-----+
| id | name | age |
+----+------+-----+
| 1 | k | 18 |
| 2 | n | 22 |
| 3 | t | 43 |
+----+------+-----+
3 rows in set (0.00 sec)
3.4 结果总结
基本功能测试通过
故障恢复时需要先确认集群状态,做好主从配置之后再恢复!
4 故障切换效率测试
可见故障切换效率很高
5 主库故障发送邮件
vim /usr/local/bin/send_report
#!/usr/bin/perl
# Copyright (C) 2011 DeNA Co.,Ltd.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
## Note: This is a sample script and is not complete. Modify the script based on your environment.
use strict;
use warnings FATAL => 'all';
use Mail::Sender;
use Getopt::Long;
#new_master_host and new_slave_hosts are set only when recovering master succeeded
my ( $dead_master_host, $new_master_host, $new_slave_hosts, $subject, $body );
my $smtp='smtp.exmail.qq.com';
my $mail_from='发件人邮箱';
my $mail_user='发件人邮箱';
my $mail_pass='发件人邮箱密码';
my $mail_to=['收件人1邮箱','收件人2邮箱'];
GetOptions(
'orig_master_host=s' => \$dead_master_host,
'new_master_host=s' => \$new_master_host,
'new_slave_hosts=s' => \$new_slave_hosts,
'subject=s' => \$subject,
'body=s' => \$body,
);
mailToContacts($smtp,$mail_from,$mail_user,$mail_pass,$mail_to,$subject,$body);
sub mailToContacts {
my ( $smtp, $mail_from, $user, $passwd, $mail_to, $subject, $msg ) = @_;
open my $DEBUG, "> /tmp/monitormail.log"
or die "Can't open the debug file:$!\n";
my $sender = new Mail::Sender {
ctype => 'text/plain; charset=utf-8',
encoding => 'utf-8',
smtp => $smtp,
from => $mail_from,
auth => 'LOGIN',
TLS_allowed => '0',
authid => $user,
authpwd => $passwd,
to => $mail_to,
subject => $subject,
debug => $DEBUG
};
$sender->MailMsg(
{ msg => $msg,
debug => $DEBUG
}
) or print $Mail::Sender::Error;
return 1;
}
# Do whatever you want here
exit 0;
6 问题记录
1)Multi-master configuration is detected/There are 2 non-slave servers!
mysql> set global read_only=1;
mysql> change master to master_host="172.16.212.33",master_user="repl",master_password="!QA2ws#ED",master_log_file="master33.000005",master_log_pos=733;
mysql> start slave;
mysql> show slave status\G;