搭建:
环境:
主库
192.168.56.121:3306
从库
192.168.56.122:3306
--管理节点
192.168.56.125:3306
vip:192.168.56.166
配置hosts
cat << delimiter >> /etc/hosts
192.168.56.121 mysql1
192.168.56.122 mysql2
delimiter
主库:
grant replication slave on *.* to 'repl'@'192.168.56.121' identified by 'repl';
grant replication slave on *.* to 'repl'@'192.168.56.122' identified by 'repl';
grant replication slave on *.* to 'repl'@'192.168.56.125' identified by 'repl';
grant all on *.* to 'root'@'192.168.56.121' identified by 'root123';
grant all on *.* to 'root'@'192.168.56.122' identified by 'root123';
grant all on *.* to 'root'@'192.168.56.125' identified by 'root123';
flush privileges;
show master status;
从库:
grant replication slave on *.* to 'repl'@'192.168.56.121' identified by 'repl';
grant replication slave on *.* to 'repl'@'192.168.56.122' identified by 'repl';
grant all on *.* to 'root'@'192.168.56.121' identified by 'root123';
grant all on *.* to 'root'@'192.168.56.122' identified by 'root123';
flush privileges;
CHANGE MASTER TO MASTER_HOST='192.168.56.121',MASTER_PORT=3306,MASTER_USER='repl',MASTER_PASSWORD='repl',MASTER_LOG_FILE='mysql-bin.000003',MASTER_LOG_POS=154;
CHANGE MASTER TO MASTER_HOST='192.168.56.121',MASTER_PORT=3306,MASTER_USER='repl',MASTER_PASSWORD='repl',MASTER_AUTO_POSITION=1;
--如果启用了gtid的mysql,直接通过MASTER_AUTO_POSITION即可自动追加所需日志
start slave;
show slave status \G
--配置所有节点间的ssh互信
ssh-keygen -t rsa
ssh-copy-id -i /root/.ssh/id_rsa.pub '-p 22 192.168.56.121'
ssh-copy-id -i /root/.ssh/id_rsa.pub '-p 22 192.168.56.122'
ssh-copy-id -i /root/.ssh/id_rsa.pub '-p 22 192.168.56.125'
--部署MHA
所有节点安装epel源
rpm -ivh http://dl.fedoraproject.org/pub/epel/6/x86_64/epel-release-6-8.noarch.rpm
[root@mysql1 yum.repos.d]# cat epel.repo
[epel]
name=Extra Packages for Enterprise Linux 6 - $basearch
baseurl=http://mirrors.aliyun.com/epel/6/$basearch
failovermethod=priority
enabled=1
gpgcheck=0
gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-EPEL-6
[epel-debuginfo]
name=Extra Packages for Enterprise Linux 6 - $basearch - Debug
baseurl=http://mirrors.aliyun.com/epel/6/$basearch/debug
failovermethod=priority
enabled=0
gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-EPEL-6
gpgcheck=0
[epel-source]
name=Extra Packages for Enterprise Linux 6 - $basearch - Source
baseurl=http://mirrors.aliyun.com/epel/6/SRPMS
failovermethod=priority
enabled=0
gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-EPEL-6
gpgcheck=0
[root@mysql1 yum.repos.d]# pwd
/etc/yum.repos.d
--安装MHA软件
cd /tmp
unzip mha4mysql-manager-master.zip
cd /tmp/mha4mysql-manager-master
perl Makefile.PL
make && make install
管理节点应该在独立的一个服务器上,这里就直接拿从库做管理节点了。
三个节点安装:
rpm -ivh mha4mysql-node-0.56-0.el6.noarch.rpm
管理节点安装:
rpm -ivh mha4mysql-manager-0.56-0.el6.noarch.rpm
unzip /tmp/mha4mysql-manager-master.zip
cd /tmp/mha4mysql-manager-master
mkdir -p /etc/mha/{app1,scripts}
cp samples/conf/app1.cnf /etc/mha/app1/
cp samples/conf/masterha_default.cnf /etc/
cp -r samples/scripts/* /etc/mha/scripts/
vi /etc/masterha_default.cnf
[server default]
user=root
password=root123
ssh_user=root
repl_user=repl
repl_password=repl
ping_interval=3
#shutdown_script=""
#secondary_check_script=masterha_secondary_check-s node1 -s node2 -s node3 --user=root --master_host=mysql1 --master_ip=192.168.56.121 --master_port=3306
# shutdown_script= /script/masterha/power_manager
#report_script=""
master_ip_failover_script= /etc/mha/scripts/master_ip_failover
master_ip_online_change_script= /etc/mha/scripts/master_ip_online_change
[root@mysql2 scripts]# cat /etc/mha/app1/app1.cnf
[server default]
manager_workdir=/var/log/masterha/app1
manager_log=/var/log/masterha/app1/manager.log
[server1]
hostname=mysql1
master_binlog_dir="/mysql/3306/data/"
candidate_master=1
check_repl_delay=0
[server2]
hostname=mysql2
candidate_master=1
master_binlog_dir="/mysql/3306/"
check_repl_delay=0
[server3]
hostname=192.168.56.125
candidate_master=1
master_binlog_dir="/mysql/3306/"
配置
master_ip_failover
master_ip_online_change
------------------------------
mha管理
验证ssh互信:
masterha_check_ssh --conf=/etc/mha/app1/app1.cnf
验证主从复制:
masterha_check_repl --conf=/etc/mha/app1/app1.cnf --global_conf=/etc/masterha_default.cnf
启动mha:
nohup masterha_manager --conf=/etc/mha/app1/app1.cnf &
nohup masterha_manager --global_conf=/etc/masterha_default.cnf --conf=/etc/mha/app1/app1.cnf > /tmp/mha_manager.log 2>&1 &
nohup masterha_manager --global_conf=/etc/masterha_default.cnf --conf=/etc/mha/app1/app1.cnf --ignore_fail_on_start > /tmp/mha_manager.log 2>&1 &
--当有slave 节点宕掉时,默认是启动不了的,加上 --ignore_fail_on_start 即使有节点宕掉也能启动MHA
tail -300f /var/log/masterha/app1/manager.log
ip addr add 192.168.56.166/32 dev eth1
--第一次启动manager,需要手工在主库增加VIP
masterha_check_status --conf=/etc/mha/app1/app1.cnf --global_conf=/etc/masterha_default.cnf
masterha_stop --conf=/etc/mha/app1/app1.cnf --global_conf=/etc/masterha_default.cnf
------------------------------
failover:
手工 failover 场景,master 死掉,但是 masterha_manager 没有开启,通过手工 failover:
masterha_master_switch --global_conf=/etc/masterha_default.cnf \
--conf=/etc/mha/app1/app1.cnf --dead_master_host=192.168.56.122 \
--master_state=dead --new_master_host=192.168.56.121 --ignore_last_failover
在线切换:
masterha_master_switch --conf=/etc/mha/app1/app1.cnf --orig_master_is_new_slave \
--global_conf=/etc/masterha_default.cnf --running_updates_limit=10000 \
--master_state=alive --new_master_host=192.168.56.122 --new_master_port=3306 --interactive=0
说明:当前主从状态均正常,选择一个从库切换成主库
--running_updates_limit=10000 切换时候选 master 如果有延迟的话,mha 切换不能成功,
加上此参数表示延迟在此时间范围内都可切换(单位为 s),但是切换的时间长短是由 recover时 relay 日志的大小决定
------------------------------
回切:
目前通过masterha_master_switch无法正常回切,报错如下:
[root@mysql2 app1]# masterha_master_switch --global_conf=/etc/masterha_default.cnf \
> --conf=/etc/mha/app1/app1.cnf --dead_master_host=192.168.56.122 \
> --master_state=dead --new_master_host=192.168.56.121 --ignore_last_failover
--dead_master_ip=<dead_master_ip> is not set. Using 192.168.56.122.
--dead_master_port=<dead_master_port> is not set. Using 3306.
Tue Sep 4 16:35:02 2018 - [info] Reading default configuration from /etc/masterha_default.cnf..
Tue Sep 4 16:35:02 2018 - [info] Reading application default configuration from /etc/mha/app1/app1.cnf..
Tue Sep 4 16:35:02 2018 - [info] Reading server configuration from /etc/mha/app1/app1.cnf..
Tue Sep 4 16:35:02 2018 - [info] MHA::MasterFailover version 0.56.
Tue Sep 4 16:35:02 2018 - [info] Starting master failover.
Tue Sep 4 16:35:02 2018 - [info]
Tue Sep 4 16:35:02 2018 - [info] * Phase 1: Configuration Check Phase..
Tue Sep 4 16:35:02 2018 - [info]
Tue Sep 4 16:35:03 2018 - [error][/usr/share/perl5/vendor_perl/MHA/ServerManager.pm, ln653] There are 2 non-slave servers! MHA manages at most one non-slave server. Check configurations.
Tue Sep 4 16:35:03 2018 - [error][/usr/share/perl5/vendor_perl/MHA/ManagerUtil.pm, ln177] Got ERROR: at /usr/share/perl5/vendor_perl/MHA/MasterFailover.pm line 150
纯手工回切方法:
原主库:
启动数据库
从库:
所有从库需要change master到原主库
备主库:
change master到原主库
ifconfig eth1:1 down
原主库:
ifconfig eth1:1 192.168.56.166
验证主从:
masterha_check_repl --conf=/etc/mha/app1/app1.cnf
添加新节点server4到配置文件:
masterha_conf_host --command=add —conf=/etc/mha/app1/app1.cnf —hostname=geekwolf —block=server4 —params=“no_master=1;ignore_fail=1”
删除server4节点:
masterha_conf_host --command=delete —conf=/etc/mha/app1/app1.cnf —block=server4
# 注:block:为节点区名,默认值 为[server_$hostname],如果设置成block=100,则为[server100] params:参数,分号隔开(参考https://code.google.com/p/mysql-master-ha/wiki/Parameters)
关闭manager服务:
masterha_stop --conf=/etc/mha/app1/app1.cnf
------------------------------
排错:
1. masterha_check_repl --conf=/etc/mha/app1/app1.cnf
报Access denied for user root@xxx
解决:
masterha_default.cnf中定义的密码和实际的密码是否一致。
master_ip_failover,master_ip_online_change需要的权限为755