Mysql mha+GTID+半同步复制(一主二从)

     在OracleLinux 7.9环境上部署MySQL 5.7.37版本一主二从复制架构,部署MHA manager节点部署在slave02上。

节点主机IPLinux versionMysql version

一主二从复制

master192.168.152.140OracleLinux-R7-U9-Server-x86_64-dvdmysql-5.7.37
slave01192.168.152.141OracleLinux-R7-U9-Server-x86_64-dvdmysql-5.7.37
slave02192.168.152.142OracleLinux-R7-U9-Server-x86_64-dvdmysql-5.7.37
Mha managerslave02192.168.152.142OracleLinux-R7-U9-Server-x86_64-dvdmysql-5.7.37
  • (二)、安装mysql 数据库

  分别在安装master、slave01和slave02 上安装mysql

  • 2.1、关闭NetworkManager

# systemctl stop NetworkManager.service
# systemctl disable NetworkManager.service
# systemctl status NetworkManager.service 
  • 2.2、关闭防火墙

# systemctl stop firewalld.service
# systemctl disable firewalld.service 
# systemctl status firewalld.service 

2.3、关闭Selinux

# sed -i "s/SELINUX=.*/SELINUX=disabled/g" /etc/selinux/config  
# setenforce 0

2.4、配置YUM源

方法1:
# mount  /dev/cdrom  /mnt
vi /etc/yum.repos.d/dvd.repo
[dvd]
name=dvd
baseurl=file:///mnt
enabled=1
gpgcheck=0

方法2:
[root@master yum.repos.d]# vi /etc/yum.repos.d/OracleLinux.repo
[base]
name=Base
baseurl=http://xxxx/centos/7/base
gpgcheck=0
[updates]
name=Updates
baseurl=http://xxxx/centos/7/updates
gpgcheck=0
[extras]
name=Extras
baseurl=http://xxxx/luxsan/centos/7/extras
gpgcheck=0
[epel]
name=epel
baseurl=http://xxxx/epel
gpgcheck=0
[docker-repo]
name=docker-repo
baseurl=http://xxxx/docker/docker-repo
gpgcheck=0

yum clean all
yum repolist

2.5、内核参数设置

##编辑/etc/security/limits.conf 添加如下内容:

vi /etc/security/limits.conf 
mysql	soft	nproc 	2047 
mysql	hard	nproc 	16384
mysql	soft	nofile	1024
mysql	hard	nofile	65536
mysql	soft	stack 	10240

##编辑/etc/pam.d/login 添加如下内容:

vi /etc/pam.d/login 
session    required     pam_limits.so

##编辑vi /etc/profile添加如下内容:

if [ /$USER = "mysql" ]; then
    if [ /$SHELL = "/bin/ksh" ]; then
        ulimit -p 16384
        ulimit -n 65536
    else
        ulimit -u 16384 -n 65536
    fi
    umask 022
fi

2.6、安装所需的依赖包

# yum -y install gcc* gcc-c++ ncurses* ncurses-devel* cmake* make* perl* bison* libaio-devel* libgcrypt

##安装常用故障与性能诊断工具
yum install -y unzip man screen tigervnc-server iotop dstat openssl make sysstat vsftpd ftp

2.7、配置/etc/hosts

[root@master yum.repos.d]# cat /etc/hosts

192.168.152.140 master
192.168.152.141 slave01
192.168.152.142 slave02

2.8、修改主机名

# hostnamectl status
# hostnamectl set-hostname master	#主机名自定义
# su -

2.9、mysql用户创建

# groupadd -g 1001 mysql 
# useradd -u 1001 -g mysql mysql 
# echo '123456' | passwd --stdin mysql

2.10、创建必要目录并赋权

# mkdir -p /mysql/app   #创建安装目录
# mkdir -p /mysql/product/{binlog,relaylog,log,undo,redo,data} #创建数据目录
# chown -R mysql:mysql /mysql
# chmod -R 755 /mysql

2.11、卸载系统自带Mariadb库

redhat7.x操作系统默认安装开源数据库MARIADB,在安装MySQL之前,请先卸载该库。
# rpm -qa|grep -i mariadb
# rpm -ql mariadb-libs-5.5.56-2.el7
# rpm -qf /usr/lib64/mysql/
# rpm -e --nodeps mariadb-libs-5.5.56-2.el7		#卸载Mariadb

 2.12、上传介质并解压

# tar zxvf mysql-5.7.37-linux-glibc2.12-x86_64.tar.gz -C /mysql/app --strip-components=1

2.13、初始化参数设置

     mysql参数设置:

[root@master ~]# cat /etc/my.cnf
[mysqld]
user=mysql
port=3306
basedir=/mysql/app
datadir=/mysql/product/data
symbolic-links=0
open_files_limit = 65535
server_id=1

#开启GTID
gtid_mode=on
enforce_gtid_consistency=on
log-slave-updates=on

#会话参数
max_connections=4532
max_user_connections=4000
max_connect_errors=50000
wait_timeout=7200
interactive_timeout=7200
thread_cache_size=256

#binlog日志参数
max_binlog_size = 1073741824
log-bin=/mysql/product/binlog/mysql-bin
relay_log=/mysql/product/relaylog/mysql-relay-bin
binlog_cache_size=131072
binlog_error_action=ABORT_SERVER
binlog_format=ROW
expire_logs_days=30
sync_binlog=1
#慢日志参数
slow_query_log=0
long_query_time=10
log_output=FILE
log_error=/mysql/product/log/myerror.log
slow_query_log_file = /mysql/product/log/mysql-slow.log
general_log = OFF
general_log_file = /mysql/product/log/mysql-general.log

#Redo日志参数
innodb_log_group_home_dir = /mysql/product/redo
innodb_log_file_size=1048576000
innodb_log_files_in_group=3
#缓存参数
sort_buffer_size=720896
read_buffer_size=720896
read_rnd_buffer_size=360448
join_buffer_size=360448
max_allowed_packet=1073741824
max_heap_table_size=16777216
tmp_table_size=16777216
#innodb参数
innodb_buffer_pool_size=134217728
innodb_data_file_path=ibdata1:200M:autoextend
innodb_file_per_table=ON
innodb_log_buffer_size=16777216
innodb_flush_method=O_DIRECT
innodb_undo_directory = /mysql/product/undo
innodb_undo_tablespaces = 2
innodb_flush_log_at_trx_commit=1
innodb_temp_data_file_path = ibtmp1:200M:autoextend
#事务隔离级别
transaction-isolation=READ-COMMITTED
#字符集
character_set_server=utf8mb4
#查询缓存
query_cache_size=0
query_cache_type=OFF
#设置时区
default_time_zone='+8:00'
log_timestamps=SYSTEM
#大小写不敏感
lower_case_table_names=1
log_bin_trust_function_creators=1


#多源复制必须的参数
master-info-repository = table # 这个参数是必须的
relay-log-info-repository = table # 这个参数是必须的
relay_log_recovery= 1
relay_log_purge=0               #关闭清除中继日志
rpl_semi_sync_master_enabled=1  #开启半同步
rpl_semi_sync_slave_enabled=1
rpl_semi_sync_master_timeout=1000       #设置超时时间

#以下参数根据服务器性能调整
sync_relay_log=1 #默认值是10000
sync_relay_log_info=1 #默认值是10000
sync_master_info = 1 #默认值是10000 

2.14、初始化MySQL5.7

# cd /mysql/app
# ./bin/mysqld --defaults-file=/etc/my.cnf --initialize 
# grep 'temporary password' /mysql/product/log/myerror.log
2024-04-28T15:44:49.248365Z 1 [Note] A temporary password is generated for root@localhost: qdcf_56>k9L3
获取数据库初始登录临时密码:qdcf_56>k9L3

2.15、添加环境变量

# vi /root/.bash_profile
export PATH=/mysql/app/bin:$PATH
# source /root/.bash_profile

2.16、配置数据库服务

# cp /mysql/app/support-files/mysql.server /etc/init.d/mysqld
# vi /etc/init.d/mysqld
basedir=/mysql/app  #与上面创建的目录一致
datadir=/mysql/product/data #与上面创建的目录一致

2.17、启停MySQL

# service mysqld start     //启动mysql
# service mysqld status    //查看启动状态
# service mysqld stop      //停止mysql   ----做下面的操作不要停库

2.18、修改root用户密码

# mysqladmin -uroot -p password "123456"
Enter password:qdcf_56>k9L3
# mysql -uroot -p123456

2.19、设置远程登录

mysql> create user root@'%' identified by '123456';
mysql> grant all privileges on *.* to root@'%';

(三)、创建一主二从复制

3.1、master创建复制用户

create user 'repl'@'192.168.152.%' identified by '123456';
grant replication slave on *.* to 'repl'@'192.168.152.%';
flush privileges;

3.2、master创建监控用户

create user 'monitor'@'192.168.152.% ' identified by '123456';
grant all privileges on *.* to 'monitor'@'192.168.152.%';
flush privileges;

3.3、slave01,slave02搭建复制关系

change master to master_host='192.168.152.140',master_user='repl',master_password='123456',master_auto_position = 1;

3.4、 slave01,slave02端启动复制

start slave;

启动复制,确认复制正常
Mysql > show slave status\G;
……
Slave_IO_Running: Yes
Slave_SQL_Running: Yes
……

(四)、半同步复制配置

4.1、master,slave01,slave02都需要添加plugin

[root@master ~]# service mysqld start

mysql> install plugin rpl_semi_sync_master soname 'semisync_master.so';
Query OK, 0 rows affected (0.18 sec)

mysql> install plugin rpl_semi_sync_slave soname 'semisync_slave.so';
Query OK, 0 rows affected (0.09 sec)

mysql> set global rpl_semi_sync_master_enabled=1;
Query OK, 0 rows affected (0.01 sec)

mysql> show global variables like 'rpl%';
+-------------------------------------------+------------+
| Variable_name                             | Value      |
+-------------------------------------------+------------+
| rpl_semi_sync_master_enabled              | ON         |
| rpl_semi_sync_master_timeout              | 1000       |
| rpl_semi_sync_master_trace_level          | 32         |
| rpl_semi_sync_master_wait_for_slave_count | 1          |
| rpl_semi_sync_master_wait_no_slave        | ON         |
| rpl_semi_sync_master_wait_point           | AFTER_SYNC |
| rpl_semi_sync_slave_enabled               | ON         |
| rpl_semi_sync_slave_trace_level           | 32         |
| rpl_stop_slave_timeout                    | 31536000   |
+-------------------------------------------+------------+
9 rows in set (0.00 sec)

mysql> show plugins;
+----------------------------+----------+--------------------+--------------------+---------+
| Name                       | Status   | Type               | Library            | License |
+----------------------------+----------+--------------------+--------------------+---------+   |
| rpl_semi_sync_master       | ACTIVE   | REPLICATION        | semisync_master.so | GPL     |
| rpl_semi_sync_slave        | ACTIVE   | REPLICATION        | semisync_slave.so  | GPL     |
+----------------------------+----------+--------------------+--------------------+---------+

4.2、额外说明

注意一:
binlog-do-db 和 replicate-ignore-db 设置必须一致。 MHA在启动时候会检测过滤规则,如果过滤规则不同,MHA将不启动监控和故障转移。

注意二:
设置从库对外提供 只读服务,之所以没有写进 /etc/my.cnf 配置文件,是因为slave节点随时可能会提升为master节点

[root@slave1 ~]# mysql -uroot -pmysql -e"set global read_only=on"
[root@slave2 ~]# mysql -uroot -pmysql -e"set global read_only=on"

注意三:
MHA在发生切换的过程中,从库的恢复过程中依赖于relay log的相关信息,所以这里要将relay log的自动清除设置为OFF,采用手动清除relay log的方式。
[root@slave1 ~]# mysql -uroot -pmysql -e"set global relay_log_purge=off"
[root@slave2 ~]# mysql -uroot -pmysql -e"set global relay_log_purge=off"

(五)、安装配置MySQL MHA高可用架构

5.1、安装Mha依赖

yum -y install perl-DBD-MySQL perl-Config-Tiny perl-Log-Dispatch perl-Parallel-ForkManager perl-Config-IniFiles ncftp perl-Params-Validate perl-CPAN perl-Test-Mock-LWP.noarch perl-LWP-Authen-Negotiate.noarch perl-devel perl-ExtUtils-CBuilder   perl-ExtUtils-MakeMaker

5.2、配置3台机器的互信,ssh免密码登陆。

master:
ssh-keygen -t rsa
cp /root/.ssh/id_rsa.pub /root/.ssh/key_master

slave01:
ssh-keygen -t rsa
cp /root/.ssh/id_rsa.pub /root/.ssh/key_slave01
scp /root/.ssh/key_slave01 root@master:/root/.ssh/

slave02:
ssh-keygen -t rsa
cp /root/.ssh/id_rsa.pub /root/.ssh/key_slave02
scp /root/.ssh/key_slave02 root@master:/root/.ssh/

master上合并密匙,然后分发给slave01,slave02
cd /root/.ssh
cat key_master >> authorized_keys
cat key_slave01 >> authorized_keys
cat key_slave02 >> authorized_keys
chmod 600 authorized_keys
scp authorized_keys root@slave01:/root/.ssh/
scp authorized_keys root@slave02:/root/.ssh/

验证:
ssh slave01
ssh slave02

5.3、安装MHA

安装mha-node和mha-manger,并编译配置文件:

安装Mha(master,,slave01,slave02)
在3台数据库节点都要安装MHA的node节点,MHA manager只在节点slave02上安装:
上传mha4mysql-node和manager包
 下载地址:https://github.com/yoshinorim/mha4mysql-manager/releases
          https://github.com/yoshinorim/mha4mysql-node/releases


1、分别在主库和两个从库上安装Mha-node数据节点
解压上传到/usr/local数据节点的软件包:
cd /usr/local
tar  -zxvf  mha4mysql-node-0.58.tar.gz
cd  mha4mysql-node-0.58/
perl   Makefile.PL
make  &&  make install

2、在从库slave02上安装管理manager节点:
tar zxf mha4mysql-manager-0.58.tar.gz 
cd mha4mysql-manager-0.58/
perl Makefile.PL 
make && make install
3、再管理节点slave02上创建MHA家目录,编译配置文件mha.conf
mkdir -p /usr/local/mha
mkdir -p /etc/mha
cd /etc/mha/
vi /etc/mha/mha.conf
[server default]
##设置mysql中root用户和密码,这个密码是前文中创建监控用户的那个密码
user=monitor
password=123456
##设置manager的工作目录
manager_workdir=/usr/local/mha
##设置manager的日志   
manager_log=/usr/local/mha/manager.log  
##设置master 保存binlog的位置,以便MHA可以找到master的日志,我这里的也就是mysql的数据目录
master_binlog_dir=/mysql/product/binlog  

##设置复制环境中的复制用户名和复制用户的密码
repl_user=repl      
repl_password=123456
##设置ssh的登录用户名
ssh_user=root
##设置监控主库,发送ping包的时间间隔,默认是3秒,尝试三次没有回应的时候自动进行railover
ping_interval=1    
ping_type=CONNECT


##设置自动failover时候的切换脚本
master_ip_failover_script=/usr/local/scripts/master_ip_failover
##设置手动切换时候的切换脚本
master_ip_online_change_script=/usr/local/scripts/master_ip_online_change

##secondary_check_script=/usr/local/bin/masterha_secondary_check -s server03 -s server02
##secondary_check_script=/soft/mha4mysql-manager-0.58/bin/masterha_secondary_check -s 192.168.152.141 -s 192.168.152.142

##设置发生切换后发送的报警的脚本
report_script=/usr/local/scripts/send_report

#shutdown_script="" 设置故障发生后关闭故障主机脚本(该脚本的主要作用是关闭主机放在发生脑裂,这里没有使用)
[server1]
hostname=192.168.152.140
ssh_port=22
master_binlog_dir=/mysql/product/binlog 
port=3306
candidate_master=1

[server2]
hostname=192.168.152.141
ssh_port=22
master_binlog_dir=/mysql/product/binlog 
port=3306
##设置为候选master,如果设置该参数以后,发生主从切换以后将会将此从库提升为主库,即使这个主库不是集群中事件最新的slave
candidate_master=1
##默认情况下如果一个slave落后master 100M的relay logs的话,MHA将不会选择该slave作为一个新的master,
##因为对于这个slave的恢复需要花费很长时间,通过设置check_repl_delay=0,MHA触发切换在选择一个新的master的时候将会忽略复制延时
##这个参数对于设置了candidate_master=1的主机非常有用,因为这个候选主在切换的过程中一定是新的master
check_repl_delay=0 

[server3]
hostname=192.168.152.142
ssh_port=22
master_binlog_dir=/mysql/product/binlog 
port=3306
no_master=1

5.4、在管理节点slave02上编译master_ip_online_change和master_ip_failover脚本

为防止脑裂发生,推荐生产环境上才用脚本的方式管理虚拟IP,而不是使用keepalived来完成;

首先创建failover、online脚本的目录,添加虚拟VIP,命令如下;

mkdir -p /usr/local/scripts

1、 安装ifconfig命令
yum install net-tools
# 第一次配置需要在 Master 节点上手动开启虚拟IP,虚拟ip与之前在脚本中配置的保持一直,选用未占用的地址
/sbin/ifconfig ens32:1 192.168.152.146/24

2、修改脚本/usr/local/scripts/master_ip_failover, /usr/local/scripts/master_ip_online_change中的vip信息和网卡信息

vi /usr/local/scripts/master_ip_failover添加:
my $vip = '192.168.152.146/24';
my $key = '1';
my $ssh_start_vip = "/sbin/ifconfig ens32:$key $vip";
my $ssh_stop_vip = "/sbin/ifconfig ens32:$key down";

vi /usr/local/scripts/master_ip_online_change添加:
my $vip = '192.168.152.146/24';  # Virtual IP
my $key = "1";
my $ssh_start_vip = "/sbin/ifconfig ens32:$key $vip";
my $ssh_stop_vip = "/sbin/ifconfig ens32:$key down";
my $exit_code = 0;

具体脚本如下:

vi /usr/local/scripts/master_ip_failover
[root@slave02 scripts]# cat master_ip_failover
#!/usr/bin/env perl
use strict;
use warnings FATAL => 'all';
use Getopt::Long;

my (
    $command,          $ssh_user,        $orig_master_host, $orig_master_ip,
    $orig_master_port, $new_master_host, $new_master_ip,    $new_master_port
);

my $vip = '192.168.152.146/24';
my $key = '1';
my $ssh_start_vip = "/sbin/ifconfig ens32:$key $vip";
my $ssh_stop_vip = "/sbin/ifconfig ens32:$key down";

GetOptions(
    'command=s'          => \$command,
    'ssh_user=s'         => \$ssh_user,
    'orig_master_host=s' => \$orig_master_host,
    'orig_master_ip=s'   => \$orig_master_ip,
    'orig_master_port=i' => \$orig_master_port,
    'new_master_host=s'  => \$new_master_host,
    'new_master_ip=s'    => \$new_master_ip,
    'new_master_port=i'  => \$new_master_port,
);

exit &main();

sub main {

    print "\n\nIN SCRIPT TEST====$ssh_stop_vip==$ssh_start_vip===\n\n";

    if ( $command eq "stop" || $command eq "stopssh" ) {

        my $exit_code = 1;
        eval {
            print "Disabling the VIP on old master: $orig_master_host \n";
            &stop_vip();
            $exit_code = 0;
        };
        if ($@) {
            warn "Got Error: $@\n";
            exit $exit_code;
        }
        exit $exit_code;
    }
    elsif ( $command eq "start" ) {

        my $exit_code = 10;
        eval {
            print "Enabling the VIP - $vip on the new master - $new_master_host \n";
            &start_vip();
            $exit_code = 0;
        };
        if ($@) {
            warn $@;
            exit $exit_code;
        }
        exit $exit_code;
    }
    elsif ( $command eq "status" ) {
        print "Checking the Status of the script.. OK \n";
        exit 0;
    }
    else {
        &usage();
        exit 1;
    }
}

sub start_vip() {
    `ssh $ssh_user\@$new_master_host \" $ssh_start_vip \"`;
}
sub stop_vip() {
     return 0  unless  ($ssh_user);
    `ssh $ssh_user\@$orig_master_host \" $ssh_stop_vip \"`;
}

sub usage {
    print
    "Usage: master_ip_failover --command=start|stop|stopssh|status --orig_master_host=host --orig_master_ip=ip --orig_master_port=port --new_master_host=host --new_master_ip=ip --new_master_port=port\n";
}



vi /usr/local/scripts/master_ip_online_change
[root@slave02 scripts]# cat master_ip_online_change
#!/usr/bin/env perl
use strict;
use warnings FATAL =>'all';

use Getopt::Long;

my $vip = '192.168.152.146/24';  # Virtual IP
my $key = "1";
my $ssh_start_vip = "/sbin/ifconfig ens32:$key $vip";
my $ssh_stop_vip = "/sbin/ifconfig ens32:$key down";
my $exit_code = 0;

my (
  $command,              $orig_master_is_new_slave, $orig_master_host,
  $orig_master_ip,       $orig_master_port,         $orig_master_user,
  $orig_master_password, $orig_master_ssh_user,     $new_master_host,
  $new_master_ip,        $new_master_port,          $new_master_user,
  $new_master_password,  $new_master_ssh_user,
);
GetOptions(
  'command=s'                => \$command,
  'orig_master_is_new_slave' => \$orig_master_is_new_slave,
  'orig_master_host=s'       => \$orig_master_host,
  'orig_master_ip=s'         => \$orig_master_ip,
  'orig_master_port=i'       => \$orig_master_port,
  'orig_master_user=s'       => \$orig_master_user,
  'orig_master_password=s'   => \$orig_master_password,
  'orig_master_ssh_user=s'   => \$orig_master_ssh_user,
  'new_master_host=s'        => \$new_master_host,
  'new_master_ip=s'          => \$new_master_ip,
  'new_master_port=i'        => \$new_master_port,
  'new_master_user=s'        => \$new_master_user,
  'new_master_password=s'    => \$new_master_password,
  'new_master_ssh_user=s'    => \$new_master_ssh_user,
);


exit &main();

sub main {

#print "\n\nIN SCRIPT TEST====$ssh_stop_vip==$ssh_start_vip===\n\n";

if ( $command eq "stop" || $command eq "stopssh" ) {

        # $orig_master_host, $orig_master_ip, $orig_master_port are passed.
        # If you manage master ip address at global catalog database,
        # invalidate orig_master_ip here.
        my $exit_code = 1;
        eval {
            print "\n\n\n***************************************************************\n";
            print "Disabling the VIP - $vip on old master: $orig_master_host\n";
            print "***************************************************************\n\n\n\n";
&stop_vip();
            $exit_code = 0;
        };
        if ($@) {
            warn "Got Error: $@\n";
            exit $exit_code;
        }
        exit $exit_code;
}
elsif ( $command eq "start" ) {

        # all arguments are passed.
        # If you manage master ip address at global catalog database,
        # activate new_master_ip here.
        # You can also grant write access (create user, set read_only=0, etc) here.
my $exit_code = 10;
        eval {
            print "\n\n\n***************************************************************\n";
            print "Enabling the VIP - $vip on new master: $new_master_host \n";
            print "***************************************************************\n\n\n\n";
&start_vip();
            $exit_code = 0;
        };
        if ($@) {
            warn $@;
            exit $exit_code;
        }
        exit $exit_code;
}
elsif ( $command eq "status" ) {
        print "Checking the Status of the script.. OK \n";
        `ssh $orig_master_ssh_user\@$orig_master_host \" $ssh_start_vip \"`;
        exit 0;
}
else {
&usage();
        exit 1;
}
}

# A simple system call that enable the VIP on the new master
sub start_vip() {
`ssh $new_master_ssh_user\@$new_master_host \" $ssh_start_vip \"`;
}
# A simple system call that disable the VIP on the old_master
sub stop_vip() {
`ssh $orig_master_ssh_user\@$orig_master_host \" $ssh_stop_vip \"`;
}

sub usage {
print
"Usage: master_ip_failover --command=start|stop|stopssh|status --orig_master_host=host --orig_master_ip=ip --orig_master_port=port --new_master_host=host --new_master_ip=ip --new_master_port=port\n"; 
}

5.5、检查复制集群状态

通过masterha_check_rep脚本整个复制集群数据同步情况:

[root@slave02 scripts]# masterha_check_repl --conf=/etc/mha/mha.conf
Sat Apr 27 17:38:14 2024 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping.
Sat Apr 27 17:38:14 2024 - [info] Reading application default configuration from /etc/mha/mha.conf..
Sat Apr 27 17:38:14 2024 - [info] Reading server configuration from /etc/mha/mha.conf..
Sat Apr 27 17:38:14 2024 - [info] MHA::MasterMonitor version 0.58.
Sat Apr 27 17:38:15 2024 - [info] GTID failover mode = 1
Sat Apr 27 17:38:15 2024 - [info] Dead Servers:
Sat Apr 27 17:38:15 2024 - [info] Alive Servers:
Sat Apr 27 17:38:15 2024 - [info]   192.168.152.140(192.168.152.140:3306)
Sat Apr 27 17:38:15 2024 - [info]   192.168.152.141(192.168.152.141:3306)
Sat Apr 27 17:38:15 2024 - [info]   192.168.152.142(192.168.152.142:3306)
Sat Apr 27 17:38:15 2024 - [info] Alive Slaves:
Sat Apr 27 17:38:15 2024 - [info]   192.168.152.141(192.168.152.141:3306)  Version=5.7.37-log (oldest major version between slaves) log-bin:enabled
Sat Apr 27 17:38:15 2024 - [info]     GTID ON
Sat Apr 27 17:38:15 2024 - [info]     Replicating from 192.168.152.140(192.168.152.140:3306)
Sat Apr 27 17:38:15 2024 - [info]     Primary candidate for the new Master (candidate_master is set)
Sat Apr 27 17:38:15 2024 - [info]   192.168.152.142(192.168.152.142:3306)  Version=5.7.37-log (oldest major version between slaves) log-bin:enabled
Sat Apr 27 17:38:15 2024 - [info]     GTID ON
Sat Apr 27 17:38:15 2024 - [info]     Replicating from 192.168.152.140(192.168.152.140:3306)
Sat Apr 27 17:38:15 2024 - [info]     Not candidate for the new Master (no_master is set)
Sat Apr 27 17:38:15 2024 - [info] Current Alive Master: 192.168.152.140(192.168.152.140:3306)
Sat Apr 27 17:38:15 2024 - [info] Checking slave configurations..
Sat Apr 27 17:38:15 2024 - [info]  read_only=1 is not set on slave 192.168.152.142(192.168.152.142:3306).
Sat Apr 27 17:38:15 2024 - [info] Checking replication filtering settings..
Sat Apr 27 17:38:15 2024 - [info]  binlog_do_db= , binlog_ignore_db=
Sat Apr 27 17:38:15 2024 - [info]  Replication filtering check ok.
Sat Apr 27 17:38:15 2024 - [info] GTID (with auto-pos) is supported. Skipping all SSH and Node package checking.
Sat Apr 27 17:38:15 2024 - [info] Checking SSH publickey authentication settings on the current master..
Sat Apr 27 17:38:15 2024 - [info] HealthCheck: SSH to 192.168.152.140 is reachable.
Sat Apr 27 17:38:15 2024 - [info]
192.168.152.140(192.168.152.140:3306) (current master)
 +--192.168.152.141(192.168.152.141:3306)
 +--192.168.152.142(192.168.152.142:3306)

Sat Apr 27 17:38:15 2024 - [info] Checking replication health on 192.168.152.141..
Sat Apr 27 17:38:16 2024 - [info]  ok.
Sat Apr 27 17:38:16 2024 - [info] Checking replication health on 192.168.152.142..
Sat Apr 27 17:38:16 2024 - [info]  ok.
Sat Apr 27 17:38:16 2024 - [info] Checking master_ip_failover_script status:
Sat Apr 27 17:38:16 2024 - [info]   /usr/local/scripts/master_ip_failover --command=status --ssh_user=root --orig_master_host=192.168.152.140 --orig_master_ip=192.168.152.140 --orig_master_port=3306


IN SCRIPT TEST====/sbin/ifconfig ens32:1 down==/sbin/ifconfig ens32:1 192.168.152.146/24===

Checking the Status of the script.. OK
Sat Apr 27 17:38:16 2024 - [info]  OK.
Sat Apr 27 17:38:16 2024 - [warning] shutdown_script is not defined.
Sat Apr 27 17:38:16 2024 - [info] Got exit code 0 (Not master dead).

MySQL Replication Health is OK.

5.6、检查SSH配置

通过masterha_check_ssh脚本检查MHA Manger到所有MHA Node的SSH连接状态:

[root@slave02 scripts]# masterha_check_ssh --conf=/etc/mha/mha.conf
Sat Apr 27 17:38:52 2024 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping.
Sat Apr 27 17:38:52 2024 - [info] Reading application default configuration from /etc/mha/mha.conf..
Sat Apr 27 17:38:52 2024 - [info] Reading server configuration from /etc/mha/mha.conf..
Sat Apr 27 17:38:52 2024 - [info] Starting SSH connection tests..
Sat Apr 27 17:38:52 2024 - [debug]
Sat Apr 27 17:38:52 2024 - [debug]  Connecting via SSH from root@192.168.152.140(192.168.152.140:22) to root@192.168.152.141(192.168.152.141:22)..
Sat Apr 27 17:38:52 2024 - [debug]   ok.
Sat Apr 27 17:38:52 2024 - [debug]  Connecting via SSH from root@192.168.152.140(192.168.152.140:22) to root@192.168.152.142(192.168.152.142:22)..
Sat Apr 27 17:38:52 2024 - [debug]   ok.
Sat Apr 27 17:38:53 2024 - [debug]
Sat Apr 27 17:38:52 2024 - [debug]  Connecting via SSH from root@192.168.152.141(192.168.152.141:22) to root@192.168.152.140(192.168.152.140:22)..
Sat Apr 27 17:38:52 2024 - [debug]   ok.
Sat Apr 27 17:38:52 2024 - [debug]  Connecting via SSH from root@192.168.152.141(192.168.152.141:22) to root@192.168.152.142(192.168.152.142:22)..
Sat Apr 27 17:38:53 2024 - [debug]   ok.
Sat Apr 27 17:38:53 2024 - [debug]
Sat Apr 27 17:38:53 2024 - [debug]  Connecting via SSH from root@192.168.152.142(192.168.152.142:22) to root@192.168.152.140(192.168.152.140:22)..
Sat Apr 27 17:38:53 2024 - [debug]   ok.
Sat Apr 27 17:38:53 2024 - [debug]  Connecting via SSH from root@192.168.152.142(192.168.152.142:22) to root@192.168.152.141(192.168.152.141:22)..
Sat Apr 27 17:38:53 2024 - [debug]   ok.
Sat Apr 27 17:38:53 2024 - [info] All SSH connection tests passed successfully.

5.7、检查MHA manager节点状态

通过master_check_status脚本检查Manager的状态:

[root@slave02 scripts]# masterha_check_status --conf=/etc/mha/mha.conf
mha is stopped(2:NOT_RUNNING).

5.8、启动MHA manager监控

[root@slave02 scripts]# nohup masterha_manager --conf=/etc/mha/mha.conf --ignore_last_failover < /dev/null > /tmp/manager.log 2>&1 &
[1] 6931
[root@slave02 scripts]# masterha_check_status --conf=/etc/mha/mha.conf
mha (pid:6931) is running(0:PING_OK), master:192.168.152.140

5.9、关闭MHA manager监控

  1. [root@slave02 scripts]# masterha_stop --conf=/etc/mha/mha.conf
    Stopped mha successfully.
    [1]+  Exit 1                  nohup masterha_manager --conf=/etc/mha/mha.conf --ignore_last_failover < /dev/null > /tmp/manager.log 2>&1
    
  • (六)、MHA高可用架构下master节点切换

  • 6.1、Master节点自动故障切换

  • 模拟Master节点MySQL服务异常宕机。
  • 1、在slave02上开启mha-manager
    [root@slave02 scripts]# nohup masterha_manager --conf=/etc/mha/mha.conf --ignore_last_failover < /dev/null > /tmp/manager.log 2>&1 &
    [1] 7582
    [root@slave02 scripts]# masterha_check_status --conf=/etc/mha/mha.conf
    mha (pid:7582) is running(0:PING_OK), master:192.168.152.140
    
    
    2、在master节点上模拟mysql宕机
    [root@master ~]# mysqladmin -uroot -p123456 shutdown
    
    3、在slave02上查看输出mha-manager日志信息,
    
    部分日志截取信息如下:
    [root@slave02 scripts]# tail -1000f /usr/local/mha/manager.log
    
    Unknown option: new_master_password
    
    
    IN SCRIPT TEST====/sbin/ifconfig ens32:1 down==/sbin/ifconfig ens32:1 192.168.152.146/24===
    
    Enabling the VIP - 192.168.152.146/24 on the new master - 192.168.152.141
    Sun Apr 28 17:01:21 2024 - [info]  OK.
    Sun Apr 28 17:01:21 2024 - [info] Setting read_only=0 on 192.168.152.141(192.168.152.141:3306)..
    Sun Apr 28 17:01:21 2024 - [info]  ok.
    Sun Apr 28 17:01:21 2024 - [info] ** Finished master recovery successfully.
    Sun Apr 28 17:01:21 2024 - [info] * Phase 3: Master Recovery Phase completed.
    Sun Apr 28 17:01:21 2024 - [info]
    Sun Apr 28 17:01:21 2024 - [info] * Phase 4: Slaves Recovery Phase..
    Sun Apr 28 17:01:21 2024 - [info]
    Sun Apr 28 17:01:21 2024 - [info]
    Sun Apr 28 17:01:21 2024 - [info] * Phase 4.1: Starting Slaves in parallel..
    Sun Apr 28 17:01:21 2024 - [info]
    Sun Apr 28 17:01:21 2024 - [info] -- Slave recovery on host 192.168.152.142(192.168.152.142:3306) started, pid: 7816. Check tmp log /usr/local/mha/192.168.152.142_3306_20240428170120.log if it takes time..
    Sun Apr 28 17:01:23 2024 - [info]
    Sun Apr 28 17:01:23 2024 - [info] Log messages from 192.168.152.142 ...
    Sun Apr 28 17:01:23 2024 - [info]
    Sun Apr 28 17:01:21 2024 - [info]  Resetting slave 192.168.152.142(192.168.152.142:3306) and starting replication from the new master 192.168.152.141(192.168.152.141:3306)..
    Sun Apr 28 17:01:21 2024 - [info]  Executed CHANGE MASTER.
    Sun Apr 28 17:01:22 2024 - [info]  Slave started.
    Sun Apr 28 17:01:22 2024 - [info]  gtid_wait(a6a544e0-0106-11ef-96c2-000c290d88fa:1-4,
    a6a544e0-0106-11ef-96c2-000c290d8efa:1-20) completed on 192.168.152.142(192.168.152.142:3306). Executed 0 events.
    Sun Apr 28 17:01:23 2024 - [info] End of log messages from 192.168.152.142.
    Sun Apr 28 17:01:23 2024 - [info] -- Slave on host 192.168.152.142(192.168.152.142:3306) started.
    Sun Apr 28 17:01:23 2024 - [info] All new slave servers recovered successfully.
    Sun Apr 28 17:01:23 2024 - [info]
    Sun Apr 28 17:01:23 2024 - [info] * Phase 5: New master cleanup phase..
    Sun Apr 28 17:01:23 2024 - [info]
    Sun Apr 28 17:01:23 2024 - [info] Resetting slave info on the new master..
    Sun Apr 28 17:01:23 2024 - [info]  192.168.152.141: Resetting slave info succeeded.
    Sun Apr 28 17:01:23 2024 - [info] Master failover to 192.168.152.141(192.168.152.141:3306) completed successfully.
    Sun Apr 28 17:01:23 2024 - [info]
    
    ----- Failover Report -----
    
    mha: MySQL Master failover 192.168.152.140(192.168.152.140:3306) to 192.168.152.141(192.168.152.141:3306) succeeded
    
    Master 192.168.152.140(192.168.152.140:3306) is down!
    
    Check MHA Manager logs at slave02:/usr/local/mha/manager.log for details.
    
    Started automated(non-interactive) failover.
    Invalidated master IP address on 192.168.152.140(192.168.152.140:3306)
    Selected 192.168.152.141(192.168.152.141:3306) as a new master.
    192.168.152.141(192.168.152.141:3306): OK: Applying all logs succeeded.
    192.168.152.141(192.168.152.141:3306): OK: Activated master IP address.
    192.168.152.142(192.168.152.142:3306): OK: Slave started, replicating from 192.168.152.141(192.168.152.141:3306)
    192.168.152.141(192.168.152.141:3306): Resetting slave info succeeded.
    Master failover to 192.168.152.141(192.168.152.141:3306) completed successfully.
    Sun Apr 28 17:01:23 2024 - [info] Sending mail..
    Unknown option: conf
    

    当master节点宕机并发生切换之后,MHA监控服务会随之被关闭掉。

  • [root@slave02 scripts]# masterha_check_status --conf=/etc/mha/mha.conf
    mha is stopped(2:NOT_RUNNING).
    

    在slave01上查看vip,能够查看到VIP已经漂移到slave01节点。

  • [root@slave01 keepalived]# ifconfig -a
    ens32: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1500
            inet 192.168.152.141  netmask 255.255.255.0  broadcast 192.168.152.255
            inet6 fe80::250:56ff:fe2a:a58e  prefixlen 64  scopeid 0x20<link>
            ether 00:50:56:2a:a5:8e  txqueuelen 1000  (Ethernet)
            RX packets 52872  bytes 5649260 (5.3 MiB)
            RX errors 0  dropped 0  overruns 0  frame 0
            TX packets 14023  bytes 1452988 (1.3 MiB)
            TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0
    
    ens32:1: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1500
            inet 192.168.152.146  netmask 255.255.255.0  broadcast 192.168.152.255
            ether 00:50:56:2a:a5:8e  txqueuelen 1000  (Ethernet)
    
    lo: flags=73<UP,LOOPBACK,RUNNING>  mtu 65536
            inet 127.0.0.1  netmask 255.0.0.0
            inet6 ::1  prefixlen 128  scopeid 0x10<host>
            loop  txqueuelen 1000  (Local Loopback)
            RX packets 62  bytes 7486 (7.3 KiB)
            RX errors 0  dropped 0  overruns 0  frame 0
            TX packets 62  bytes 7486 (7.3 KiB)
            TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0
    
    

  • 6.2、修复原Master并加入复制集群(回切)

  • 将原master节点MySQL服务正常启动,注意:当原Master恢复后,不会自动转成Slave,需要手动处理,并将原Master加入到复制集群。这时的原master节点作为备库节点。

  • [root@master ~]# service mysqld start
    Starting MySQL. SUCCESS!
    将原Master节点加入到复制集群:
    [root@master ~]# mysql -uroot -p123456
    mysql: [Warning] Using a password on the command line interface can be insecure.
    Welcome to the MySQL monitor.  Commands end with ; or \g.
    Your MySQL connection id is 2
    Server version: 5.7.37-log MySQL Community Server (GPL)
    
    Copyright (c) 2000, 2022, Oracle and/or its affiliates.
    
    Oracle is a registered trademark of Oracle Corporation and/or its
    affiliates. Other names may be trademarks of their respective
    owners.
    
    Type 'help;' or '\h' for help. Type '\c' to clear the current input statement.
    
    mysql> CHANGE MASTER TO MASTER_HOST='192.168.152.141', MASTER_PORT=3306, MASTER_AUTO_POSITION=1, MASTER_USER='repl', MASTER_PASSWORD='123456';
    Query OK, 0 rows affected, 2 warnings (0.01 sec)
    
    mysql> start slave;
    Query OK, 0 rows affected (0.00 sec)
    
    mysql> show slave status\G
    *************************** 1. row ***************************
                   Slave_IO_State: Waiting for master to send event
                      Master_Host: 192.168.152.141
                      Master_User: repl
                      Master_Port: 3306
                    Connect_Retry: 60
                  Master_Log_File: mysql-bin.000015
              Read_Master_Log_Pos: 234
                   Relay_Log_File: mysql-relay-bin.000002
                    Relay_Log_Pos: 367
            Relay_Master_Log_File: mysql-bin.000015
                 Slave_IO_Running: Yes
                Slave_SQL_Running: Yes
                  Replicate_Do_DB:
              Replicate_Ignore_DB:
               Replicate_Do_Table:
           Replicate_Ignore_Table:
          Replicate_Wild_Do_Table:
      Replicate_Wild_Ignore_Table:
                       Last_Errno: 0
                       Last_Error:
                     Skip_Counter: 0
              Exec_Master_Log_Pos: 234
                  Relay_Log_Space: 574
                  Until_Condition: None
                   Until_Log_File:
                    Until_Log_Pos: 0
               Master_SSL_Allowed: No
               Master_SSL_CA_File:
               Master_SSL_CA_Path:
                  Master_SSL_Cert:
                Master_SSL_Cipher:
                   Master_SSL_Key:
            Seconds_Behind_Master: 0
    Master_SSL_Verify_Server_Cert: No
                    Last_IO_Errno: 0
                    Last_IO_Error:
                   Last_SQL_Errno: 0
                   Last_SQL_Error:
      Replicate_Ignore_Server_Ids:
                 Master_Server_Id: 2
                      Master_UUID: a6a544e0-0106-11ef-96c2-000c290d88fa
                 Master_Info_File: mysql.slave_master_info
                        SQL_Delay: 0
              SQL_Remaining_Delay: NULL
          Slave_SQL_Running_State: Slave has read all relay log; waiting for more updates
               Master_Retry_Count: 86400
                      Master_Bind:
          Last_IO_Error_Timestamp:
         Last_SQL_Error_Timestamp:
                   Master_SSL_Crl:
               Master_SSL_Crlpath:
               Retrieved_Gtid_Set:
                Executed_Gtid_Set: a6a544e0-0106-11ef-96c2-000c290d88fa:1-4,
    a6a544e0-0106-11ef-96c2-000c290d8efa:1-20
                    Auto_Position: 1
             Replicate_Rewrite_DB:
                     Channel_Name:
               Master_TLS_Version:
    1 row in set (0.00 sec)
    
    ##由于原Master节点已被置为新的Slave节点,需要设置原Master只读服务,之所以没有写进 /etc/my.cnf 配置文件,是因为slave节点随时可能会提升为master节点。
    
    mysql> show global variables like 'read_only';
    +---------------+-------+
    | Variable_name | Value |
    +---------------+-------+
    | read_only     | OFF   |
    +---------------+-------+
    1 row in set (0.01 sec)
    
    mysql> set global read_only=on;
    Query OK, 0 rows affected (0.00 sec)
    
    mysql> show global variables like 'read_only';
    +---------------+-------+
    | Variable_name | Value |
    +---------------+-------+
    | read_only     | ON    |
    +---------------+-------+
    1 row in set (0.00 sec)
    
    

  • 6.3、手动在线切换

  • 1、检查SSH配置:
    [root@slave02 scripts]# masterha_check_ssh --conf=/etc/mha/mha.conf
    Sun Apr 28 17:16:11 2024 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping.
    Sun Apr 28 17:16:11 2024 - [info] Reading application default configuration from /etc/mha/mha.conf..
    Sun Apr 28 17:16:11 2024 - [info] Reading server configuration from /etc/mha/mha.conf..
    Sun Apr 28 17:16:11 2024 - [info] Starting SSH connection tests..
    Sun Apr 28 17:16:11 2024 - [debug]
    Sun Apr 28 17:16:11 2024 - [debug]  Connecting via SSH from root@192.168.152.140(192.168.152.140:22) to root@192.168.152.141(192.168.152.141:22)..
    Sun Apr 28 17:16:11 2024 - [debug]   ok.
    Sun Apr 28 17:16:11 2024 - [debug]  Connecting via SSH from root@192.168.152.140(192.168.152.140:22) to root@192.168.152.142(192.168.152.142:22)..
    Sun Apr 28 17:16:11 2024 - [debug]   ok.
    Sun Apr 28 17:16:12 2024 - [debug]
    Sun Apr 28 17:16:11 2024 - [debug]  Connecting via SSH from root@192.168.152.141(192.168.152.141:22) to root@192.168.152.140(192.168.152.140:22)..
    Sun Apr 28 17:16:12 2024 - [debug]   ok.
    Sun Apr 28 17:16:12 2024 - [debug]  Connecting via SSH from root@192.168.152.141(192.168.152.141:22) to root@192.168.152.142(192.168.152.142:22)..
    Sun Apr 28 17:16:12 2024 - [debug]   ok.
    Sun Apr 28 17:16:12 2024 - [debug]
    Sun Apr 28 17:16:12 2024 - [debug]  Connecting via SSH from root@192.168.152.142(192.168.152.142:22) to root@192.168.152.140(192.168.152.140:22)..
    Sun Apr 28 17:16:12 2024 - [debug]   ok.
    Sun Apr 28 17:16:12 2024 - [debug]  Connecting via SSH from root@192.168.152.142(192.168.152.142:22) to root@192.168.152.141(192.168.152.141:22)..
    Sun Apr 28 17:16:12 2024 - [debug]   ok.
    Sun Apr 28 17:16:12 2024 - [info] All SSH connection tests passed successfully.
    2、检查复制集群状态:
    [root@slave02 scripts]# masterha_check_repl --conf=/etc/mha/mha.conf
    Sun Apr 28 17:16:24 2024 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping.
    Sun Apr 28 17:16:24 2024 - [info] Reading application default configuration from /etc/mha/mha.conf..
    Sun Apr 28 17:16:24 2024 - [info] Reading server configuration from /etc/mha/mha.conf..
    Sun Apr 28 17:16:24 2024 - [info] MHA::MasterMonitor version 0.58.
    Sun Apr 28 17:16:25 2024 - [info] GTID failover mode = 1
    Sun Apr 28 17:16:25 2024 - [info] Dead Servers:
    Sun Apr 28 17:16:25 2024 - [info] Alive Servers:
    Sun Apr 28 17:16:25 2024 - [info]   192.168.152.140(192.168.152.140:3306)
    Sun Apr 28 17:16:25 2024 - [info]   192.168.152.141(192.168.152.141:3306)
    Sun Apr 28 17:16:25 2024 - [info]   192.168.152.142(192.168.152.142:3306)
    Sun Apr 28 17:16:25 2024 - [info] Alive Slaves:
    Sun Apr 28 17:16:25 2024 - [info]   192.168.152.140(192.168.152.140:3306)  Version=5.7.37-log (oldest major version between slaves) log-bin:enabled
    Sun Apr 28 17:16:25 2024 - [info]     GTID ON
    Sun Apr 28 17:16:25 2024 - [info]     Replicating from 192.168.152.141(192.168.152.141:3306)
    Sun Apr 28 17:16:25 2024 - [info]     Primary candidate for the new Master (candidate_master is set)
    Sun Apr 28 17:16:25 2024 - [info]   192.168.152.142(192.168.152.142:3306)  Version=5.7.37-log (oldest major version between slaves) log-bin:enabled
    Sun Apr 28 17:16:25 2024 - [info]     GTID ON
    Sun Apr 28 17:16:25 2024 - [info]     Replicating from 192.168.152.141(192.168.152.141:3306)
    Sun Apr 28 17:16:25 2024 - [info]     Not candidate for the new Master (no_master is set)
    Sun Apr 28 17:16:25 2024 - [info] Current Alive Master: 192.168.152.141(192.168.152.141:3306)
    Sun Apr 28 17:16:25 2024 - [info] Checking slave configurations..
    Sun Apr 28 17:16:25 2024 - [info]  read_only=1 is not set on slave 192.168.152.142(192.168.152.142:3306).
    Sun Apr 28 17:16:25 2024 - [info] Checking replication filtering settings..
    Sun Apr 28 17:16:25 2024 - [info]  binlog_do_db= , binlog_ignore_db=
    Sun Apr 28 17:16:25 2024 - [info]  Replication filtering check ok.
    Sun Apr 28 17:16:25 2024 - [info] GTID (with auto-pos) is supported. Skipping all SSH and Node package checking.
    Sun Apr 28 17:16:25 2024 - [info] Checking SSH publickey authentication settings on the current master..
    Sun Apr 28 17:16:25 2024 - [info] HealthCheck: SSH to 192.168.152.141 is reachable.
    Sun Apr 28 17:16:25 2024 - [info]
    192.168.152.141(192.168.152.141:3306) (current master)
     +--192.168.152.140(192.168.152.140:3306)
     +--192.168.152.142(192.168.152.142:3306)
    
    Sun Apr 28 17:16:25 2024 - [info] Checking replication health on 192.168.152.140..
    Sun Apr 28 17:16:25 2024 - [info]  ok.
    Sun Apr 28 17:16:25 2024 - [info] Checking replication health on 192.168.152.142..
    Sun Apr 28 17:16:25 2024 - [info]  ok.
    Sun Apr 28 17:16:25 2024 - [info] Checking master_ip_failover_script status:
    Sun Apr 28 17:16:25 2024 - [info]   /usr/local/scripts/master_ip_failover --command=status --ssh_user=root --orig_master_host=192.168.152.141 --orig_master_ip=192.168.152.141 --orig_master_port=3306
    
    
    IN SCRIPT TEST====/sbin/ifconfig ens32:1 down==/sbin/ifconfig ens32:1 192.168.152.146/24===
    
    Checking the Status of the script.. OK
    Sun Apr 28 17:16:25 2024 - [info]  OK.
    Sun Apr 28 17:16:25 2024 - [warning] shutdown_script is not defined.
    Sun Apr 28 17:16:25 2024 - [info] Got exit code 0 (Not master dead).
    
    MySQL Replication Health is OK.
    
    3、检查自动切换后MHA监控服务状态:
    [root@slave02 scripts]# masterha_check_status --conf=/etc/mha/mha.conf
    mha is stopped(2:NOT_RUNNING).
    注意:如果MHA监控服务处于正常启动状态,则需要先关闭。
    [root@slave02 scripts]# masterha_stop --conf=/etc/mha/mha.conf
    
    4、执行手动在线切换(switchover)
    masterha_master_switch --conf=/etc/mha/mha.conf --master_state=alive --new_master_host=192.168.152.140 --new_master_port=3306 --orig_master_is_new_slave --running_updates_limit=10000
    
    --orig_master_is_new_slave:切换时加上此选项是将原 master 变更为 slave 节点,如果不加此选项,原来的 master 将不启用
    --running_updates_limit=10000:故障切换时,候选master如果有延迟的话,mha 切换不能成功,加上此参数表示延迟在此时间范围内都可切换(单位为s),但是切换的时间长短是由recover时relay日志的大小决定。
    
    切换输出日志如下:
    [root@slave02 scripts]# masterha_master_switch --conf=/etc/mha/mha.conf --master_state=alive --new_master_host=192.168.152.140 --new_master_port=3306 --orig_master_is_new_slave --running_updates_limit=10000
    Sun Apr 28 17:21:58 2024 - [info] MHA::MasterRotate version 0.58.
    Sun Apr 28 17:21:58 2024 - [info] Starting online master switch..
    Sun Apr 28 17:21:58 2024 - [info]
    Sun Apr 28 17:21:58 2024 - [info] * Phase 1: Configuration Check Phase..
    Sun Apr 28 17:21:58 2024 - [info]
    Sun Apr 28 17:21:58 2024 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping.
    Sun Apr 28 17:21:58 2024 - [info] Reading application default configuration from /etc/mha/mha.conf..
    Sun Apr 28 17:21:58 2024 - [info] Reading server configuration from /etc/mha/mha.conf..
    Sun Apr 28 17:21:59 2024 - [info] GTID failover mode = 1
    Sun Apr 28 17:21:59 2024 - [info] Current Alive Master: 192.168.152.141(192.168.152.141:3306)
    Sun Apr 28 17:21:59 2024 - [info] Alive Slaves:
    Sun Apr 28 17:21:59 2024 - [info]   192.168.152.140(192.168.152.140:3306)  Version=5.7.37-log (oldest major version between slaves) log-bin:enabled
    Sun Apr 28 17:21:59 2024 - [info]     GTID ON
    Sun Apr 28 17:21:59 2024 - [info]     Replicating from 192.168.152.141(192.168.152.141:3306)
    Sun Apr 28 17:21:59 2024 - [info]     Primary candidate for the new Master (candidate_master is set)
    Sun Apr 28 17:21:59 2024 - [info]   192.168.152.142(192.168.152.142:3306)  Version=5.7.37-log (oldest major version between slaves) log-bin:enabled
    Sun Apr 28 17:21:59 2024 - [info]     GTID ON
    Sun Apr 28 17:21:59 2024 - [info]     Replicating from 192.168.152.141(192.168.152.141:3306)
    Sun Apr 28 17:21:59 2024 - [info]     Not candidate for the new Master (no_master is set)
    
    It is better to execute FLUSH NO_WRITE_TO_BINLOG TABLES on the master before switching. Is it ok to execute on 192.168.152.141(192.168.152.141:3306)? (YES/no): yes
    Sun Apr 28 17:22:02 2024 - [info] Executing FLUSH NO_WRITE_TO_BINLOG TABLES. This may take long time..
    Sun Apr 28 17:22:02 2024 - [info]  ok.
    Sun Apr 28 17:22:02 2024 - [info] Checking MHA is not monitoring or doing failover..
    Sun Apr 28 17:22:02 2024 - [info] Checking replication health on 192.168.152.140..
    Sun Apr 28 17:22:02 2024 - [info]  ok.
    Sun Apr 28 17:22:02 2024 - [info] Checking replication health on 192.168.152.142..
    Sun Apr 28 17:22:02 2024 - [info]  ok.
    Sun Apr 28 17:22:02 2024 - [info] 192.168.152.140 can be new master.
    Sun Apr 28 17:22:02 2024 - [info]
    From:
    192.168.152.141(192.168.152.141:3306) (current master)
     +--192.168.152.140(192.168.152.140:3306)
     +--192.168.152.142(192.168.152.142:3306)
    
    To:
    192.168.152.140(192.168.152.140:3306) (new master)
     +--192.168.152.142(192.168.152.142:3306)
     +--192.168.152.141(192.168.152.141:3306)
    
    Starting master switch from 192.168.152.141(192.168.152.141:3306) to 192.168.152.140(192.168.152.140:3306)? (yes/NO): yes
    Sun Apr 28 17:22:03 2024 - [info] Checking whether 192.168.152.140(192.168.152.140:3306) is ok for the new master..
    Sun Apr 28 17:22:03 2024 - [info]  ok.
    Sun Apr 28 17:22:03 2024 - [info] 192.168.152.141(192.168.152.141:3306): SHOW SLAVE STATUS returned empty result. To check replication filtering rules, temporarily executing CHANGE MASTER to a dummy host.
    Sun Apr 28 17:22:03 2024 - [info] 192.168.152.141(192.168.152.141:3306): Resetting slave pointing to the dummy host.
    Sun Apr 28 17:22:03 2024 - [info] ** Phase 1: Configuration Check Phase completed.
    Sun Apr 28 17:22:03 2024 - [info]
    Sun Apr 28 17:22:03 2024 - [info] * Phase 2: Rejecting updates Phase..
    Sun Apr 28 17:22:03 2024 - [info]
    Sun Apr 28 17:22:03 2024 - [info] Executing master ip online change script to disable write on the current master:
    Sun Apr 28 17:22:03 2024 - [info]   /usr/local/scripts/master_ip_online_change --command=stop --orig_master_host=192.168.152.141 --orig_master_ip=192.168.152.141 --orig_master_port=3306 --orig_master_user='monitor' --new_master_host=192.168.152.140 --new_master_ip=192.168.152.140 --new_master_port=3306 --new_master_user='monitor' --orig_master_ssh_user=root --new_master_ssh_user=root   --orig_master_is_new_slave --orig_master_password=xxx --new_master_password=xxx
    
    
    
    ***************************************************************
    Disabling the VIP - 192.168.152.146/24 on old master: 192.168.152.141
    ***************************************************************
    
    
    
    Sun Apr 28 17:22:03 2024 - [info]  ok.
    Sun Apr 28 17:22:03 2024 - [info] Locking all tables on the orig master to reject updates from everybody (including root):
    Sun Apr 28 17:22:03 2024 - [info] Executing FLUSH TABLES WITH READ LOCK..
    Sun Apr 28 17:22:03 2024 - [info]  ok.
    Sun Apr 28 17:22:03 2024 - [info] Orig master binlog:pos is mysql-bin.000015:234.
    Sun Apr 28 17:22:03 2024 - [info]  Waiting to execute all relay logs on 192.168.152.140(192.168.152.140:3306)..
    Sun Apr 28 17:22:03 2024 - [info]  master_pos_wait(mysql-bin.000015:234) completed on 192.168.152.140(192.168.152.140:3306). Executed 0 events.
    Sun Apr 28 17:22:03 2024 - [info]   done.
    Sun Apr 28 17:22:03 2024 - [info] Getting new master's binlog name and position..
    Sun Apr 28 17:22:03 2024 - [info]  mysql-bin.000028:234
    Sun Apr 28 17:22:03 2024 - [info]  All other slaves should start replication from here. Statement should be: CHANGE MASTER TO MASTER_HOST='192.168.152.140', MASTER_PORT=3306, MASTER_AUTO_POSITION=1, MASTER_USER='repl', MASTER_PASSWORD='xxx';
    Sun Apr 28 17:22:03 2024 - [info] Executing master ip online change script to allow write on the new master:
    Sun Apr 28 17:22:03 2024 - [info]   /usr/local/scripts/master_ip_online_change --command=start --orig_master_host=192.168.152.141 --orig_master_ip=192.168.152.141 --orig_master_port=3306 --orig_master_user='monitor' --new_master_host=192.168.152.140 --new_master_ip=192.168.152.140 --new_master_port=3306 --new_master_user='monitor' --orig_master_ssh_user=root --new_master_ssh_user=root   --orig_master_is_new_slave --orig_master_password=xxx --new_master_password=xxx
    
    
    
    ***************************************************************
    Enabling the VIP - 192.168.152.146/24 on new master: 192.168.152.140
    ***************************************************************
    
    
    
    Sun Apr 28 17:22:03 2024 - [info]  ok.
    Sun Apr 28 17:22:03 2024 - [info] Setting read_only=0 on 192.168.152.140(192.168.152.140:3306)..
    Sun Apr 28 17:22:03 2024 - [info]  ok.
    Sun Apr 28 17:22:03 2024 - [info]
    Sun Apr 28 17:22:03 2024 - [info] * Switching slaves in parallel..
    Sun Apr 28 17:22:03 2024 - [info]
    Sun Apr 28 17:22:03 2024 - [info] -- Slave switch on host 192.168.152.142(192.168.152.142:3306) started, pid: 7900
    Sun Apr 28 17:22:03 2024 - [info]
    Sun Apr 28 17:22:05 2024 - [info] Log messages from 192.168.152.142 ...
    Sun Apr 28 17:22:05 2024 - [info]
    Sun Apr 28 17:22:03 2024 - [info]  Waiting to execute all relay logs on 192.168.152.142(192.168.152.142:3306)..
    Sun Apr 28 17:22:03 2024 - [info]  master_pos_wait(mysql-bin.000015:234) completed on 192.168.152.142(192.168.152.142:3306). Executed 0 events.
    Sun Apr 28 17:22:03 2024 - [info]   done.
    Sun Apr 28 17:22:03 2024 - [info]  Resetting slave 192.168.152.142(192.168.152.142:3306) and starting replication from the new master 192.168.152.140(192.168.152.140:3306)..
    Sun Apr 28 17:22:03 2024 - [info]  Executed CHANGE MASTER.
    Sun Apr 28 17:22:04 2024 - [info]  Slave started.
    Sun Apr 28 17:22:05 2024 - [info] End of log messages from 192.168.152.142 ...
    Sun Apr 28 17:22:05 2024 - [info]
    Sun Apr 28 17:22:05 2024 - [info] -- Slave switch on host 192.168.152.142(192.168.152.142:3306) succeeded.
    Sun Apr 28 17:22:05 2024 - [info] Unlocking all tables on the orig master:
    Sun Apr 28 17:22:05 2024 - [info] Executing UNLOCK TABLES..
    Sun Apr 28 17:22:05 2024 - [info]  ok.
    Sun Apr 28 17:22:05 2024 - [info] Starting orig master as a new slave..
    Sun Apr 28 17:22:05 2024 - [info]  Resetting slave 192.168.152.141(192.168.152.141:3306) and starting replication from the new master 192.168.152.140(192.168.152.140:3306)..
    Sun Apr 28 17:22:05 2024 - [info]  Executed CHANGE MASTER.
    Sun Apr 28 17:22:06 2024 - [info]  Slave started.
    Sun Apr 28 17:22:06 2024 - [info] All new slave servers switched successfully.
    Sun Apr 28 17:22:06 2024 - [info]
    Sun Apr 28 17:22:06 2024 - [info] * Phase 5: New master cleanup phase..
    Sun Apr 28 17:22:06 2024 - [info]
    Sun Apr 28 17:22:07 2024 - [info]  192.168.152.140: Resetting slave info succeeded.
    Sun Apr 28 17:22:07 2024 - [info] Switching master to 192.168.152.140(192.168.152.140:3306) completed successfully.
    [root@slave02 scripts]#
    
    
    5、从节点收尾工作:
    
    mysql>  show global variables like 'read_only';
    +---------------+-------+
    | Variable_name | Value |
    +---------------+-------+
    | read_only     | OFF   |
    +---------------+-------+
    1 row in set (0.00 sec)
    
    mysql> set global read_only=on;
    Query OK, 0 rows affected (0.00 sec)
    
    
    mysql> show global variables like 'relay_log_purge';
    +-----------------+-------+
    | Variable_name   | Value |
    +-----------------+-------+
    | relay_log_purge | OFF   |
    +-----------------+-------+
    1 row in set (0.00 sec)

6.4、添加relay_log手动清理脚本

主库和2个从库,都需要添加:
[root@master ~]# vi /usr/local/masterha/script/purge_relay_log.sh
#!/bin/bash
user=root
passwd=123456
port=3306
log_dir='/usr/local/masterha/relaylogs_purge'
work_dir='/var/tmp'
purge='/usr/bin/purge_relay_logs'

if [ ! -d $log_dir ]
then
   mkdir $log_dir -p
fi

$purge --user=$user --password=$passwd --disable_relay_log_purge --port=$port --workdir=$work_dir >> $log_dir/purge_relay_logs.log 2>&1
赋权:
chmod a+x /usr/local/masterha/script/purge_relay_log.sh
[root@slave1 ~]# crontab -e
30 22 * * * /bin/bash /usr/local/masterha/script/purge_relay_log.sh

 参考文献:

https://blog.csdn.net/weixin_61666110/article/details/129289206

https://blog.csdn.net/weixin_43328213/article/details/88429212

https://blog.csdn.net/m0_74250610/article/details/134650149

https://github.com/yoshinorim/mha4mysql-manager/wiki/Parameters

https://blog.csdn.net/misakivv/article/details/137128947

https://blog.csdn.net/m0_74250610/article/details/134650149

  • 28
    点赞
  • 28
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值