MHA软件安装及配置
1.1 程序软连接
ln -s /app/mysql/bin/mysqlbinlog /usr/bin/mysqlbinlog
ln -s /app/mysql/bin/mysql /usr/bin/mysql
1.2各节点进行互信
db01:
rm -rf /root/.ssh
ssh-keygen
cd /root/.ssh
mv id_rsa.pub authorized_keys
scp -r /root/.ssh 10.0.0.52:/root
scp -r /root/.ssh 10.0.0.53:/root
1.3各节点验证
db01:
ssh 10.0.0.51 date
ssh 10.0.0.52 date
ssh 10.0.0.53 date
db02:
ssh 10.0.0.51 date
ssh 10.0.0.52 date
ssh 10.0.0.53 date
db03:
ssh 10.0.0.51 date
ssh 10.0.0.52 date
ssh 10.0.0.53 date
2 安装软件
2.1所有节点安装Node软件依赖包
yum install perl-DBD-MySQL -y
rpm -ivh mha4mysql-node-0.58-0.el7.centos.noarch.rpm
2.2Manager软件安装(db03)
yum install -y perl-Config-Tiny epel-release perl-Log-Dispatch perl-Parallel-ForkManager perl-Time-HiRes
yum install -y mha4mysql-manager-0.58-0.el7.centos.noarch.rpm
3.1 在db01主库中创建mha需要的用户
grant all privileges on *.* to mha@'10.0.0.%' identified by 'mha';
3 Manager配置文件准备(db03)
3.1创建配置文件目录
mkdir -p /etc/mha
3.2创建日志目录
mkdir -p /var/log/mha/app1
3.3编辑mha配置文件
cat > /etc/mha/app1.cnf <<EOF
[server default]
manager_log=/var/log/mha/app1/manager
manager_workdir=/var/log/mha/app1
master_binlog_dir=/data/binlog
user=mha
password=mha
ping_interval=2
repl_password=123
repl_user=repl
ssh_user=root
[server1]
hostname=10.0.0.51
port=3306
[server2]
hostname=10.0.0.52
port=3306
candidate_master=1
[server3]
no_master=1
hostname=10.0.0.53
port=3306
EOF
4 状态检查(db03)
masterha_check_ssh --conf=/etc/mha/app1.cnf
masterha_check_repl --conf=/etc/mha/app1.cnf
5开启MHA-manager
开启MHA(db03):
nohup masterha_manager --conf=/etc/mha/app1.cnf --remove_dead_master_conf --ignore_last_failover < /dev/null> /var/log/mha/app1/manager.log 2>&1 &
6 查看MHA状态
[root@db03 ~]# masterha_check_status --conf=/etc/mha/app1.cnf
app1 (pid:4719) is running(0:PING_OK), master:10.0.0.51
vip 故障转移脚本
1上传脚本文件到/usr/local/bin
[root@db03 mha_script]# cp -a * /usr/local/bin
2 修改权限
[root@db03 bin]# chmod +x /usr/local/bin/*
3 修改内容
[root@db03 bin]#vim /usr/local/bin/master_ip_failover
my $vip = '10.0.0.220/24';
my $key = '1';
my $if = 'eth0';
my $ssh_start_vip = "/sbin/ifconfig $if:$key $vip";
my $ssh_stop_vip = "/sbin/ifconfig $if:$key down";
my $ssh_Bcast_arp= "/sbin/arping -I $if -c 3 -A 10.0.0.220";
[root@db03 bin]# yum install -y dos2unix
[root@db03 bin]# dos2unix /usr/local/bin/master_ip_failover
[root@db03 bin]# chmod +x /usr/local/bin/master_ip_failover
4 修改Manager 配置文件
vim /etc/mha/app1.cnf
master_ip_failover_script=/usr/local/bin/master_ip_failover
5 重启MHA
[root@db03 bin]# masterha_stop --conf=/etc/mha/app1.cnf
[root@db03 bin]# nohup masterha_manager --conf=/etc/mha/app1.cnf --remove_dead_master_conf --ignore_last_failover < /dev/null> /var/log/mha/app1/manager.log 2>&1 &
6 手工在主库添加VIP
[root@db02 ~]# ifconfig eth0:1 10.0.0.220/24
7宕机修复
7.0 监控日志
managerj节点监控日志
tail -f /var/log/mha/app1/manager
7.1关闭主库
systemctl stop mysqld
7.2检查VIP漂移情况
各节点 执行 ip a
7.3修复原主库为从节点
change master to master_host='10.0.0.33',
master_user='repl',
master_password='123' ,
MASTER_AUTO_POSITION=1;
db01 [(none)]>start slave;
7.4启动MHA
[root@db03 bin]# nohup masterha_manager --conf=/etc/mha/app1.cnf --remove_dead_master_conf --ignore_last_failover < /dev/null> /var/log/mha/app1/manager.log 2>&1 &
8.故障提醒功能
8.1 准备脚本
[root@db03 bin]# vim send_report
my $smtp='smtp.163.com'; # smtp服务器
my $mail_from='mdy1868778@163.com'; # 发件箱
my $mail_user='mdy1868778'; # 用户名
my $mail_pass='mdy1868778'; # 授权码
my $mail_to=['11582777@qq.com']; # 收件箱
#my $mail_to=['to1@qq.com','to2@qq.com'];
8.2 修改配置文件
vim /etc/mha/app1.cnf
添加一行:
report_script=/usr/local/bin/send_report
8.3 重启MHA
[root@db03 bin]# masterha_stop --conf=/etc/mha/app1.cnf
[root@db03 bin]# nohup masterha_manager --conf=/etc/mha/app1.cnf --remove_dead_master_conf --ignore_last_failover < /dev/null> /var/log/mha/app1/manager.log 2>&1 &
8.4宕机检查邮箱
主库: systemctl stop mysqld
查看邮箱有没有收到邮件
9额外的数据补偿(binlog_server)
9.1找一台额外的机器,必须要有5.6以上的版本,支持gtid并开启,我们直接用的第二个slave(db03)
vim /etc/mha/app1.cnf
[binlog1]
no_master=1
hostname=10.0.0.53
master_binlog_dir=/data/mysql/binlog
9.2 创建必要目录
mkdir -p /data/mysql/binlog
chown -R mysql.mysql /data/*
9.3 拉取主库binlog日志
cd /data/mysql/binlog -----必须进入到自己创建好的目录
mysql -e "show slave status \G"|grep "Master_Log"
mysqlbinlog -R --host=10.0.0.51 --user=mha --password=mha --raw --stop-never mysql-bin.000001 &
注意:
拉取日志的起点,需要按照目前主库正在使用的binlog为起点.
9.4 重启MHA-manager
[root@db03 bin]# masterha_stop --conf=/etc/mha/app1.cnf
[root@db03 bin]# nohup masterha_manager --conf=/etc/mha/app1.cnf --remove_dead_master_conf --ignore_last_failover < /dev/null> /var/log/mha/app1/manager.log 2>&1 &
9.5. 故障模拟及故障处理
9.5.1 宕掉 db01 数据库
/etc/init.d/mysqld stop
恢复故障
(1) 启动故障节点
[root@db01 ~]# /etc/init.d/mysqld start
(2) 恢复1主2从(db01)
[root@db03 bin]# grep "CHANGE MASTER TO" /var/log/mha/app1/manager
Thu Jul 18 18:31:54 2019 - [info] All other slaves should start replication from here. Statement should be:
CHANGE MASTER TO
MASTER_HOST='10.0.0.52',
MASTER_PORT=3306,
MASTER_AUTO_POSITION=1,
MASTER_USER='repl',
MASTER_PASSWORD='xxx';
[root@db03 bin]#
db01 [(none)]>CHANGE MASTER TO
MASTER_HOST='10.0.0.52',
MASTER_PORT=3306,
MASTER_AUTO_POSITION=1,
MASTER_USER='repl',
MASTER_PASSWORD='123';
db01 [(none)]>start slave;
(3) 恢复配置文件(db03)
[server1]
hostname=10.0.0.51
port=3306
[server2]
hostname=10.0.0.52
port=3306
[server3]
hostname=10.0.0.53
port=3306
(4)恢复binlogserver
cd /data/mysql/binlog
rm -rf /data/mysql/binlog/*
[root@db03 ~]# mysql -e "show slave status \G"|grep "Master_Log"
mysqlbinlog -R --host=10.0.0.52 --user=mha --password=mha --raw --stop-never mysql-bin.000009 &
(5) 启动MHA
[root@db03 bin]# nohup masterha_manager --conf=/etc/mha/app1.cnf --remove_dead_master_conf --ignore_last_failover < /dev/null> /var/log/mha/app1/manager.log 2>&1 &
[1] 16543
[root@db03 bin]# masterha_check_status --conf=/etc/mha/app1.cnf
app1 (pid:16543) is running(0:PING_OK), master:10.0.0.52
10在线切换主从
10.1只切换角色
masterha_master_switch --conf=/etc/mha/app1.cnf --master_state=alive --new_master_host=10.0.0.52 --orig_master_is_new_slave --running_updates_limit=10000
注意:
- 此种方法切换,要注意将原主库,FTWRL(Flush table with read lock),否则会造成主从不一致。
- 手工切换vip
- 重新拉去新主库的binlog
10.2master_ip_online_change_script脚本功能作用
功能: 在线切换时,自动锁原主库,VIP自动切换
10.3 准备切换脚本
vim /usr/local/bin/master_ip_online_change
my $vip = "10.0.0.220/24";
my $key = "1";
my $ssh_start_vip = "/sbin/ifconfig ens33:$key $vip";
my $ssh_stop_vip = "/sbin/ifconfig ens33:$key $vip down";
my $ssh_Bcast_arp= "/sbin/arping -I ens33 -c 3 -A 10.0.0.220";
10.4 修改MHA配置文件
vim /etc/mha/app1.cnf
master_ip_online_change_script=/usr/local/bin/master_ip_online_change
10.5 停 MHA
[root@db03 bin]# masterha_stop --conf=/etc/mha/app1.cnf
10.6 检查repl
[root@db03 bin]# masterha_check_repl --conf=/etc/mha/app1.cnf
10.7 在线切换
masterha_master_switch --conf=/etc/mha/app1.cnf --master_state=alive --new_master_host=10.0.0.51 --orig_master_is_new_slave --running_updates_limit=10000
10.8重构binlogserver
[root@db03 bin]# ps -ef |grep mysqlbinlog
root 28144 16272 0 17:50 pts/1 00:00:00 mysqlbinlog -R --host=10.0.0.52 --user=mha --password=x x --raw --stop-never mysql-bin.000005
root 28529 16272 0 18:03 pts/1 00:00:00 grep --color=auto mysqlbinlog
[root@db03 bin]# kill -9 28144
[root@db03 bin]# cd /data/binlog_server/
[root@db03 binlog_server]# ll
total 4
-rw-r----- 1 root root 194 Apr 1 17:50 mysql-bin.000005
[root@db03 binlog_server]# rm -rf *
[root@db03 binlog_server]# mysqlbinlog -R --host=10.0.0.51 --user=mha --password=mha --raw --stop-never mysql-bin.000009 &
[1] 28534
10.9 启动MHA
[root@db03 bin]# nohup masterha_manager --conf=/etc/mha/app1.cnf --remove_dead_master_conf --ignore_last_failover < /dev/null> /var/log/mha/app1/manager.log 2>&1 &
[root@db03 binlog_server]# masterha_check_status --conf=/etc/mha/app1.cnf
app1 (pid:28535) is running(0:PING_OK), master:10.0.0.51
11MHA高可用修复思路
11.1 检查三节点是否启动
11.2 检查主从关系
1主2从:
[root@db03 binlog_server]# mysql -e "show slave status\G" |grep "Master_Host"
Master_Host: 10.0.0.52
[root@db01 data]# mysql -e "show slave status\G" |grep "Master_Host"
修复1主从:
db01:
change master to
master_host='10.0.0.52',
master_user='repl',
master_password='123' ,
MASTER_AUTO_POSITION=1;
start slave;
[root@db01 data]# mysql -e "show slave status\G" |grep "Master_Host"
Master_Host: 10.0.0.52
11.3 检查vip是否在主库
[root@db02 data]# ip a
11.4 检查binlog_server状态
[root@db03 binlog_server]# ps -ef |grep mysqlbinlog
root 77828 39593 0 17:53 pts/2 00:00:00 grep --color=auto mysqlbinlog
[root@db03 binlog_server]#
修复binlog_server:
[root@db03 binlog_server]# rm -rf /data/binlog_server/*
[root@db03 binlog_server]# cd /data/binlog_server/
[root@db03 ~]# mysql -e "show slave status \G"|grep "Master_Log"
[root@db03 ~]# mysqlbinlog -R --host=10.0.0.52 --user=mha --password=mha --raw --stop-never mysql-bin.000002 &
11.5 检查配置文件
三个节点是否存在:
[root@db03 binlog_server]# cat /etc/mha/app1.cnf
11.6 检查ssh互信和repl
[root@db03 ~]# masterha_check_ssh --conf=/etc/mha/app1.cnf
[root@db03 ~]# masterha_check_repl --conf=/etc/mha/app1.cnf
11.7 启动MHA
[root@db03 bin]# nohup masterha_manager --conf=/etc/mha/app1.cnf --remove_dead_master_conf --ignore_last_failover < /dev/null> /var/log/mha/app1/manager.log 2>&1 &
[root@db03 ~]# masterha_check_status --conf=/etc/mha/app1.cnf
app1 (pid:78201) is running(0:PING_OK), master:10.0.0.52
[root@db03 ~]#