一、恢复MHA故障
1.手动修复
1)修复挂掉的数据库
[root@db01 ~]# systemctl start mysqld
2)找到主从语句
[root@db03 ~]# grep 'CHANGE MASTER TO' /service/mha/manager
Mon Nov 9 20:14:17 2020 - [info] All other slaves should start replication from here. Statement should be: CHANGE MASTER TO MASTER_HOST='172.16.1.52', MASTER_PORT=3306, MASTER_LOG_FILE='mysql-bin.000007', MASTER_LOG_POS=120, MASTER_USER='rep', MASTER_PASSWORD='xxx';
3)修复的数据库执行change语句
#修改一下语句中的密码,执行即可
mysql> CHANGE MASTER TO MASTER_HOST='172.16.1.52', MASTER_PORT=3306, MASTER_LOG_FILE='mysql-bin.000007', MASTER_LOG_POS=120, MASTER_USER='rep', MASTER_PASSWORD='123';
Query OK, 0 rows affected, 2 warnings (0.01 sec)
mysql> start slave;
Query OK, 0 rows affected (0.00 sec)
4)查看主从状态
mysql> show slave status\G
*************************** 1. row ***************************
Slave_IO_State: Waiting for master to send event
Master_Host: 172.16.1.52
Master_User: rep
Master_Port: 3306
Connect_Retry: 60
Master_Log_File: mysql-bin.000007
Read_Master_Log_Pos: 211
Relay_Log_File: db01-relay-bin.000002
Relay_Log_Pos: 374
Relay_Master_Log_File: mysql-bin.000007
Slave_IO_Running: Yes
Slave_SQL_Running: Yes
5)修复MHA配置
[root@db03 ~]# vim /service/mha/app1.cnf
[server default]
manager_log=/service/mha/manager
manager_workdir=/service/mha/app1
master_binlog_dir=/usr/local/mysql/data
password=mha
ping_interval=2
repl_password=123
repl_user=rep
ssh_user=root
user=mha
[server1]
hostname=172.16.1.51
port=3306
[server2]
hostname=172.16.1.52
port=3306
[server3]
hostname=172.16.1.53
port=3306
6)重新启动MHA
[root@db03 ~]# nohup masterha_manager --conf=/service/mha/app1.cnf --remove_dead_master_conf --ignore_last_failover < /dev/null > /service/mha/app1/manager.log 2>&1 &
2.脚本修复
1)创建脚本目录
[root@db02 ~]# mkdir /scr