一.优缺点
HA方案没有最好,只有最合适;MMM虽然最古老&有点跟不上时代,但配置完成后运维基本不需要人工介入;其它几种多少都需要人工介入,MySQL8.0及以上强烈推荐MGR单主模式。其它几种HA方案网上的相关资料比较多,不再赘述,这篇主要介绍Orchestrator。
二.简介
Orchestrator是使用go语言编写的MySQL高可用性和复制拓扑管理工具,支持复制拓扑结构的调整,自动故障转移和手动主从切换等。后端数据库用MySQL或SQLite存储元数据,并提供Web界面展示MySQL复制的拓扑关系及状态,通过Web可更改MySQL实例的复制关系和部分配置信息,同时也提供命令行和api接口,方便运维管理。相对比MHA来看最重要的是解决了管理节点的单点问题,其通过raft协议保证本身的高可用。
集群推荐使用3节点,当然,在不使用raft&数据安全性要求不是特别高的情况下,2节点也可以运行。
三.配置步骤
3.1 系统架构
3.2 MySQL安装
网上文档比较多,不再赘述。
3.3 主从配置
3.3.1 my.cnf
slave_net_timeout=8; #all nodes
read_only = ON; #only for slave node
gtid_mode=on #all nodes
enforce_gtid_consistency=on #all nodes
binlog_format=row #all nodes
3.3.2 config master-slave
create user ‘repl’@‘192.168.100.%’ identified by ‘Repl#2021’;
grant replication slave on . to ‘repl’@‘192.168.100.%’;
flush privileges;
change master to master_host=‘k8s01’, master_user=‘repl’, master_password=‘Repl#2021’, master_port=3310, master_auto_position=1,master_heartbeat_period=2,master_connect_retry=1,master_retry_count=86400;
start slave;
3.4 config Orchestrator
3.4.1.create mysql topology user #master node
create user ‘orchestrator’@’%’ identified by ‘Orch#2021’;
grant super,process,replication slave,reload on . to ‘orchestrator’@’%’;
grant select on mysql.slave_master_info to ‘orchestrator’@’%’;
flush privileges;
3.4.2.config ssh
#all nodes
ssh-keygen -t rsa
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
#all nodes
ssh-copy-id -i ~/.ssh/id_rsa.pub root@IP_others
ssh-copy-id -i ~/.ssh/id_rsa.pub root@IP_others
3.4.3 install package #all nodes
#yum -y install net-tools
yum install -y http://dl.fedoraproject.org/pub/epel/7/x86_64/Packages/e/epel-release-7-13.noarch.rpm
yum install -y jq
wget https://github.com/openark/orchestrator/releases/download/v3.2.5/orchestrator-3.2.5-1.x86_64.rpm
rpm -ivh orchestrator-3.0.13-1.x86_64.rpm
#ln -s /usr/local/orchestrator/orchestrator /usr/bin/orchestrator
3.4.4.config vip
vi /usr/local/orchestrator/failovervip.sh #all nodes
#!/bin/bash
source /etc/profile
original_master=$3
new_master=$4
ssh_user=root
vip_eth=ens33
vip=192.168.100.100
vip_mask=24
echo $failedHost
echo $vip
echo "Down VIP on $original_master"
eval echo '`date +%Y-%m-%d\ %H:%M:%S` Warning: ssh $ssh_user@$original_master "sudo -n /sbin/ip address delete $vip/$vip_mask dev $vip_eth"'
ssh -o StrictHostKeyChecking=no $ssh_user@$original_master "sudo -n /sbin/ip address delete $vip/$vip_mask dev $vip_eth"
echo "Up VIP on $new_master"
eval echo '`date +%Y-%m-%d\ %H:%M:%S` Warning: ssh $ssh_user@$new_master "sudo -n /sbin/ip address add $vip/$vip_mask dev $vip_eth"'
ssh -o StrictHostKeyChecking=no $ssh_user@$new_master "sudo -n /sbin/ip address add $vip/$vip_mask dev $vip_eth"
echo "Refresh arp"
eval echo '`date +%Y-%m-%d\ %H:%M:%S` Warning: ssh $ssh_user@$new_master "sudo /sbin/arping -q -c 3 -A $vip -I $vip_eth"'
ssh -o StrictHostKeyChecking=no $ssh_user@$new_master "sudo /sbin/arping -q -c 3 -A $vip -I $vip_eth"
chmod +x /usr/local/orchestrator/failovervip.sh
scp /usr/local/orchestrator/failovervip.sh k8s02:/usr/local/orchestrator/
scp /usr/local/orchestrator/failovervip.sh k8s03:/usr/local/orchestrator/
/usr/local/orchestrator/failovervip.sh 1 2 192.168.100.102 192.168.100.101
3.4.5 config parameters
cp /usr/local/orchestrator/orchestrator-sample.conf.json /etc/orchestrator.conf.json
vi /etc/orchestrator.conf.json #注意"RaftBind": "k8s01"每节点不一样
{
"Debug": true,
"EnableSyslog": false,
"ListenAddress": ":3000",
"MySQLTopologyUser": "orchestrator",
"MySQLTopologyPassword": "Orch#2021",
"MySQLTopologyCredentialsConfigFile": "",
"MySQLTopologySSLPrivateKeyFile": "",
"MySQLTopologySSLCertFile": "",
"MySQLTopologySSLCAFile": "",
"MySQLTopologySSLSkipVerify": true,
"MySQLTopologyUseMutualTLS": false,
"BackendDB": "sqlite",
"SQLite3DataFile": "/usr/local/orchestrator/orchestrator.db",
"DefaultInstancePort": 3310,
"DiscoverByShowSlaveHosts": true,
"InstancePollSeconds": 5,
"DiscoveryIgnoreReplicaHostnameFilters": [
"a_host_i_want_to_ignore[.]example[.]com",
".*[.]ignore_all_hosts_from_this_domain[.]example[.]com",
"a_host_with_extra_port_i_want_to_ignore[.]example[.]com:3307"
],
"UnseenInstanceForgetHours": 240,
"SnapshotTopologiesIntervalHours": 0,
"InstanceBulkOperationsWaitTimeoutSeconds": 10,
"HostnameResolveMethod": "default",
"MySQLHostnameResolveMethod": "@@hostname",
"SkipBinlogServerUnresolveCheck": true,
"ExpiryHostnameResolvesMinutes": 60,
"RejectHostnameResolvePattern": "",
"ReasonableReplicationLagSeconds": 10,
"ProblemIgnoreHostnameFilters": [],
"VerifyReplicationFilters": false,
"ReasonableMaintenanceReplicationLagSeconds": 20,
"CandidateInstanceExpireMinutes": 60,
"AuditLogFile": "",
"AuditToSyslog": false,
"RemoveTextFromHostnameDisplay": ".mydomain.com:3310",
"ReadOnly": false,
"AuthenticationMethod": "",
"HTTPAuthUser": "",
"HTTPAuthPassword": "",
"AuthUserHeader": "",
"PowerAuthUsers": [
"*"
],
"ClusterNameToAlias": {
"127.0.0.1": "test suite"
},
"ReplicationLagQuery": "",
"DetectClusterAliasQuery": "SELECT SUBSTRING_INDEX(@@hostname, '.', 1)",
"DetectClusterDomainQuery": "",
"DetectInstanceAliasQuery": "",
"DetectPromotionRuleQuery": "",
"DataCenterPattern": "[.]([^.]+)[.][^.]+[.]mydomain[.]com",
"PhysicalEnvironmentPattern": "[.]([^.]+[.][^.]+)[.]mydomain[.]com",
"PromotionIgnoreHostnameFilters": [],
"DetectSemiSyncEnforcedQuery": "",
"ServeAgentsHttp": false,
"AgentsServerPort": ":3001",
"AgentsUseSSL": false,
"AgentsUseMutualTLS": false,
"AgentSSLSkipVerify": false,
"AgentSSLPrivateKeyFile": "",
"AgentSSLCertFile": "",
"AgentSSLCAFile": "",
"AgentSSLValidOUs": [],
"UseSSL": false,
"UseMutualTLS": false,
"SSLSkipVerify": false,
"SSLPrivateKeyFile": "",
"SSLCertFile": "",
"SSLCAFile": "",
"SSLValidOUs": [],
"URLPrefix": "",
"StatusEndpoint": "/api/status",
"StatusSimpleHealth": true,
"StatusOUVerify": false,
"AgentPollMinutes": 60,
"UnseenAgentForgetHours": 6,
"StaleSeedFailMinutes": 60,
"SeedAcceptableBytesDiff": 8192,
"PseudoGTIDPattern": "",
"PseudoGTIDPatternIsFixedSubstring": false,
"PseudoGTIDMonotonicHint": "asc:",
"DetectPseudoGTIDQuery": "",
"BinlogEventsChunkSize": 10000,
"SkipBinlogEventsContaining": [],
"ReduceReplicationAnalysisCount": true,
"FailureDetectionPeriodBlockMinutes": 60,
"FailMasterPromotionOnLagMinutes": 0,
"RecoveryPeriodBlockSeconds": 3600,
"RecoveryIgnoreHostnameFilters": [],
"RecoverMasterClusterFilters": [
"*"
],
"RecoverIntermediateMasterClusterFilters": [
"*"
],
"OnFailureDetectionProcesses": [
"echo 'Detected {failureType} on {failureCluster}. Affected replicas: {countSlaves}' >> /tmp/recovery.log"
],
"PreGracefulTakeoverProcesses": [
"echo 'Planned takeover about to take place on {failureCluster}. Master will switch to read_only' >> /tmp/recovery.log"
],
"PreFailoverProcesses": [
"echo 'Will recover from {failureType} on {failureCluster}' >> /tmp/recovery.log"
],
"PostFailoverProcesses": [
"echo '(for all types) Recovered from {failureType} on {failureCluster}. Failed: {failedHost}:{failedPort}; Successor: {successorHost}:{successorPort}' >> /tmp/recovery.log"
],
"PostUnsuccessfulFailoverProcesses": [],
"PostMasterFailoverProcesses": [
"/usr/local/orchestrator/failovervip.sh {failureType} {failureClusterAlias} {failedHost} {successorHost} >> /tmp/orc_failover.log"
],
"PostIntermediateMasterFailoverProcesses": [
"echo 'Recovered from {failureType} on {failureCluster}. Failed: {failedHost}:{failedPort}; Successor: {successorHost}:{successorPort}' >> /tmp/recovery.log"
],
"PostGracefulTakeoverProcesses": [
"echo 'Planned takeover complete' >> /tmp/recovery.log"
],
"CoMasterRecoveryMustPromoteOtherCoMaster": true,
"DetachLostSlavesAfterMasterFailover": true,
"ApplyMySQLPromotionAfterMasterFailover": true,
"PreventCrossDataCenterMasterFailover": false,
"PreventCrossRegionMasterFailover": false,
"MasterFailoverDetachReplicaMasterHost": false,
"MasterFailoverLostInstancesDowntimeMinutes": 0,
"PostponeReplicaRecoveryOnLagMinutes": 0,
"OSCIgnoreHostnameFilters": [],
"GraphiteAddr": "",
"GraphitePath": "",
"GraphiteConvertHostnameDotsToUnderscores": true,
"ConsulAddress": "",
"ConsulAclToken": "",
"ConsulKVStoreProvider": "consul",
"RaftEnabled": true,
"RaftBind": "k8s01",
"RaftDataDir": "/var/lib/orchestrator",
"DefaultRaftPort": 10008,
"RaftNodes": [
"k8s01",
"k8s02",
"k8s03"
]
}
3.4.6 start orchestrator #all nodes
cd /usr/local/orchestrator && nohup ./orchestrator --config=/etc/orchestrator.conf.json --debug http &
3.5 check
http://192.168.100.102:3000/
k8s01关机:
[root@k8s02 ~]# ip a
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
inet 127.0.0.1/8 scope host lo
valid_lft forever preferred_lft forever
inet6 ::1/128 scope host
valid_lft forever preferred_lft forever
2: ens33: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP group default qlen 1000
link/ether 00:0c:29:84:36:60 brd ff:ff:ff:ff:ff:ff
inet 192.168.100.103/24 brd 192.168.100.255 scope global noprefixroute ens33
valid_lft forever preferred_lft forever
inet 192.168.100.100/24 scope global secondary ens33
valid_lft forever preferred_lft forever
inet6 fe80::a00a:4e4e:2497:d0ed/64 scope link noprefixroute
valid_lft forever preferred_lft forever
mysql> show slave status\G
Empty set (0.00 sec)
mysql> show variables like ‘read_only’;
±--------------±------+
| Variable_name | Value |
±--------------±------+
| read_only | OFF |
±--------------±------+
1 row in set (0.01 sec)
k8s02:
mysql> show slave status\G
*************************** 1. row ***************************
Slave_IO_State: Waiting for master to send event
Master_Host: k8s02
Master_User: repl
Master_Port: 3310
Connect_Retry: 1
Master_Log_File: mysql-bin.000006
Read_Master_Log_Pos: 194
Relay_Log_File: k8s03-relay-bin.000002
Relay_Log_Pos: 367
Relay_Master_Log_File: mysql-bin.000006
Slave_IO_Running: Yes
Slave_SQL_Running: Yes
Replicate_Do_DB:
Replicate_Ignore_DB:
Replicate_Do_Table:
Replicate_Ignore_Table:
Replicate_Wild_Do_Table:
Replicate_Wild_Ignore_Table:
Last_Errno: 0
Last_Error:
Skip_Counter: 0
Exec_Master_Log_Pos: 194
Relay_Log_Space: 574
Until_Condition: None
Until_Log_File:
Until_Log_Pos: 0
Master_SSL_Allowed: No
Master_SSL_CA_File:
Master_SSL_CA_Path:
Master_SSL_Cert:
Master_SSL_Cipher:
Master_SSL_Key:
Seconds_Behind_Master: 0
Master_SSL_Verify_Server_Cert: No
Last_IO_Errno: 0
Last_IO_Error:
Last_SQL_Errno: 0
Last_SQL_Error:
Replicate_Ignore_Server_Ids:
Master_Server_Id: 2
Master_UUID: 5f3e3530-db10-11eb-848c-000c2921e80e
Master_Info_File: mysql.slave_master_info
SQL_Delay: 0
SQL_Remaining_Delay: NULL
Slave_SQL_Running_State: Slave has read all relay log; waiting for more updates
Master_Retry_Count: 86400
Master_Bind:
Last_IO_Error_Timestamp:
Last_SQL_Error_Timestamp:
Master_SSL_Crl:
Master_SSL_Crlpath:
Retrieved_Gtid_Set:
Executed_Gtid_Set: 5c914d7b-db10-11eb-8456-000c29eda9a7:1-4
Auto_Position: 1
Replicate_Rewrite_DB:
Channel_Name:
Master_TLS_Version:
1 row in set (0.00 sec)
mysql> show variables like ‘read_only’;
±--------------±------+
| Variable_name | Value |
±--------------±------+
| read_only | ON |
±--------------±------+
1 row in set (0.00 sec)
mysql>