【orchestrator】002、单机安装部署 + VIP切换

002、单机安装部署 + VIP切换

环境说明

操作系统版本:Red Hat Enterprise Linux Server release 7.2

orchestrator组件环境:

用途IPPORT
orchestrator服务器10.0.0.2383000(默认)
orchestrator元数据库(MYSQL)10.0.0.2353311
orchestrator启动用户dbadmin(涉及vip切换免密配置)

受管mysql环境说明

用途IPPORT
mysql-master10.0.0.2363315
mysql-slave10.0.0.2373315
VIP10.0.0.163

1、orchestrator元数据库配置—(10.0.0.235:3311)

创建数据库和用户
# 建元数据库
create database orch_meta;
# 建用户
create user usr_orch_meta@'%' identified by '********';
grant all on orch_meta.* to usr_orch_meta@'%';

2、orachestrator软件安装—(10.0.0.238)

下载软件并解压安装

下载地址:https://github.com/openark/orchestrator/releases

当前最新版本:orchestrator-3.2.6-linux-amd64.tar.gz

拷贝到238服务器,解压并拷贝到指定目录

tar -xvzf orchestrator-3.2.6-linux-amd64.tar.gz
### 拷贝到根目录下
cp -r usr/  /
cp -r etc/  /

目录文件说明:

在这里插入图片描述

配置文件修改
cd /usr/local/orchestrator
# 从样例复制一个配置文件
cp orchestrator-sample.conf.json  orchestrator.conf.json
# 修改配置文件
vim orchestrator.conf.json

主要修改的参数如下:

## 拓扑发现使用的用户密码,就是orchestrator管理mysql实例的用户密码
"MySQLTopologyUser": "orc_client_user",
"MySQLTopologyPassword": "orc_client_password",
 
## orchestrator元数据库的连接信息
"MySQLOrchestratorHost": "127.0.0.1",
"MySQLOrchestratorPort": 3306,
"MySQLOrchestratorDatabase": "orchestrator",
"MySQLOrchestratorUser": "orc_server_user",
"MySQLOrchestratorPassword": "orc_server_password",
 
## 启用web用户认证
"AuthenticationMethod": "",
"HTTPAuthUser": "",
"HTTPAuthPassword": "",
 
## 主机名解析方法设置
"HostnameResolveMethod": "default",
"MySQLHostnameResolveMethod": "@@hostname",
 
# 支持自动恢复的实例组的匹配规则
"RecoverMasterClusterFilters": [
    "_master_pattern_"
  ],
"RecoverIntermediateMasterClusterFilters": [
    "_intermediate_master_pattern_"
  ],

修改后的配置文件如下:

点击查看完整配置文件
    
    {
  "Debug": true,
  "EnableSyslog": false,
  "ListenAddress": ":3000",
  "MySQLTopologyUser": "usr_orch_mng",
  "MySQLTopologyPassword": "Cbd%661b",
  "MySQLTopologyCredentialsConfigFile": "",
  "MySQLTopologySSLPrivateKeyFile": "",
  "MySQLTopologySSLCertFile": "",
  "MySQLTopologySSLCAFile": "",
  "MySQLTopologySSLSkipVerify": true,
  "MySQLTopologyUseMutualTLS": false,
  "MySQLOrchestratorHost": "10.0.0.235",
  "MySQLOrchestratorPort": 3311,
  "MySQLOrchestratorDatabase": "db_orch",
  "MySQLOrchestratorUser": "usr_orch_meta",
  "MySQLOrchestratorPassword": "Abc%d98e",
  "MySQLOrchestratorCredentialsConfigFile": "",
  "MySQLOrchestratorSSLPrivateKeyFile": "",
  "MySQLOrchestratorSSLCertFile": "",
  "MySQLOrchestratorSSLCAFile": "",
  "MySQLOrchestratorSSLSkipVerify": true,
  "MySQLOrchestratorUseMutualTLS": false,
  "MySQLConnectTimeoutSeconds": 1,
  "DefaultInstancePort": 3306,
  "DiscoverByShowSlaveHosts": true,
  "InstancePollSeconds": 5,
  "DiscoveryIgnoreReplicaHostnameFilters": [
    "a_host_i_want_to_ignore[.]example[.]com",
    ".*[.]ignore_all_hosts_from_this_domain[.]example[.]com",
    "a_host_with_extra_port_i_want_to_ignore[.]example[.]com:3307"
  ],
  "UnseenInstanceForgetHours": 240,
  "SnapshotTopologiesIntervalHours": 0,
  "InstanceBulkOperationsWaitTimeoutSeconds": 10,
  "HostnameResolveMethod": "none",
  "MySQLHostnameResolveMethod": "@@report_host",
  "SkipBinlogServerUnresolveCheck": true,
  "ExpiryHostnameResolvesMinutes": 60,
  "RejectHostnameResolvePattern": "",
  "ReasonableReplicationLagSeconds": 10,
  "ProblemIgnoreHostnameFilters": [],
  "VerifyReplicationFilters": false,
  "ReasonableMaintenanceReplicationLagSeconds": 20,
  "CandidateInstanceExpireMinutes": 60,
  "AuditLogFile": "",
  "AuditToSyslog": false,
  "RemoveTextFromHostnameDisplay": ".mydomain.com:3306",
  "ReadOnly": false,
  "AuthenticationMethod": "basic",
  "HTTPAuthUser": "admin",
  "HTTPAuthPassword": "123456",
  "AuthUserHeader": "",
  "PowerAuthUsers": [
    "*"
  ],
  "ClusterNameToAlias": {
    "127.0.0.1": "test suite"
  },
  "ReplicationLagQuery": "",
  "DetectClusterAliasQuery": "SELECT SUBSTRING_INDEX(@@hostname, '.', 1)",
  "DetectClusterDomainQuery": "",
  "DetectInstanceAliasQuery": "",
  "DetectPromotionRuleQuery": "",
  "DataCenterPattern": "[.]([^.]+)[.][^.]+[.]mydomain[.]com",
  "PhysicalEnvironmentPattern": "[.]([^.]+[.][^.]+)[.]mydomain[.]com",
  "PromotionIgnoreHostnameFilters": [],
  "DetectSemiSyncEnforcedQuery": "",
  "ServeAgentsHttp": false,
  "AgentsServerPort": ":3001",
  "AgentsUseSSL": false,
  "AgentsUseMutualTLS": false,
  "AgentSSLSkipVerify": false,
  "AgentSSLPrivateKeyFile": "",
  "AgentSSLCertFile": "",
  "AgentSSLCAFile": "",
  "AgentSSLValidOUs": [],
  "UseSSL": false,
  "UseMutualTLS": false,
  "SSLSkipVerify": false,
  "SSLPrivateKeyFile": "",
  "SSLCertFile": "",
  "SSLCAFile": "",
  "SSLValidOUs": [],
  "URLPrefix": "",
  "StatusEndpoint": "/api/status",
  "StatusSimpleHealth": true,
  "StatusOUVerify": false,
  "AgentPollMinutes": 60,
  "UnseenAgentForgetHours": 6,
  "StaleSeedFailMinutes": 60,
  "SeedAcceptableBytesDiff": 8192,
  "PseudoGTIDPattern": "",
  "PseudoGTIDPatternIsFixedSubstring": false,
  "PseudoGTIDMonotonicHint": "asc:",
  "DetectPseudoGTIDQuery": "",
  "BinlogEventsChunkSize": 10000,
  "SkipBinlogEventsContaining": [],
  "ReduceReplicationAnalysisCount": true,
  "FailureDetectionPeriodBlockMinutes": 60,
  "FailMasterPromotionOnLagMinutes": 0,
  "RecoveryPeriodBlockSeconds": 3600,
  "RecoveryIgnoreHostnameFilters": [],
  "RecoverMasterClusterFilters": [
    "*"
  ],
  "RecoverIntermediateMasterClusterFilters": [
    "*"
  ],
  "OnFailureDetectionProcesses": [
    "echo 'Detected {failureType} on {failureCluster}. Affected replicas: {countSlaves}' >> /tmp/recovery.log"
  ],
  "PreGracefulTakeoverProcesses": [
    "echo 'Planned takeover about to take place on {failureCluster}. Master will switch to read_only' >> /tmp/recovery.log"
  ],
  "PreFailoverProcesses": [
    "echo 'Will recover from {failureType} on {failureCluster}' >> /tmp/recovery.log"
  ],
  "PostFailoverProcesses": [
    "echo 'failover found!!! postFailover scripts will exec'",
    "bash /usr/local/orchestrator/orch_hook.sh"
  ],
  "PostUnsuccessfulFailoverProcesses": [],
  "PostMasterFailoverProcesses": [
    "echo 'Recovered from {failureType} on {failureCluster}. Failed: {failedHost}:{failedPort}; Promoted: {successorHost}:{successorPort}' >> /tmp/recovery.log"
  ],
  "PostIntermediateMasterFailoverProcesses": [
    "echo 'Recovered from {failureType} on {failureCluster}. Failed: {failedHost}:{failedPort}; Successor: {successorHost}:{successorPort}' >> /tmp/recovery.log"
  ],
  "PostGracefulTakeoverProcesses": [
    "echo 'Planned takeover complete' >> /tmp/recovery.log"
  ],
  "CoMasterRecoveryMustPromoteOtherCoMaster": true,
  "DetachLostSlavesAfterMasterFailover": true,
  "ApplyMySQLPromotionAfterMasterFailover": true,
  "PreventCrossDataCenterMasterFailover": false,
  "PreventCrossRegionMasterFailover": false,
  "MasterFailoverDetachReplicaMasterHost": false,
  "MasterFailoverLostInstancesDowntimeMinutes": 0,
  "PostponeReplicaRecoveryOnLagMinutes": 0,
  "OSCIgnoreHostnameFilters": [],
  "GraphiteAddr": "",
  "GraphitePath": "",
  "GraphiteConvertHostnameDotsToUnderscores": true,
  "ConsulAddress": "",
  "ConsulAclToken": "",
  "ConsulKVStoreProvider": "consul"
}
启动服务
su - dbadmin
# 进入工作目录(必须)
/usr/local/orchestrator
# 前台启动
./orchestrator --config=./orchestrator.conf.json http
# nohup后台运行
nohup ./orchestrator --config=./orchestrator.conf.json http &

3、受管MYSQL配置

mysql安装及主从配置

my.cnf添加配置—(10.0.0.236/237都需要配置)
# 实例所在服务器IP,以下以10.0.0.236:3315实例说明
report_host="10.0.0.236"
report_port= 3315
slave_net_timeout = 4
主从配置—(只在从库237上执行)
# 重点修改参数MASTER_CONNECT_RETRY=1/MASTER_RETRY_COUNT=86400
change master to 
master_host='10.0.0.236',
master_port=3315,
master_user='repl',
master_password='xxxx',
master_auto_position=1,
MASTER_CONNECT_RETRY=1,
MASTER_RETRY_COUNT=86400;
受管用户创建----(只在主库236上执行,会自动同步到从库)
CREATE USER 'usr_orch_mng'@'%' IDENTIFIED BY 'xxxxx';
GRANT SUPER, PROCESS, REPLICATION SLAVE, RELOAD ON *.* TO 'usr_orch_mng'@'%';
GRANT SELECT ON mysql.slave_master_info TO 'usr_orch_mng'@'%';

4、WEB界面方式发现节点

浏览器访问orchestrator

http://10.0.0.238:3000,看到如下界面即表示服务启动正常。

在这里插入图片描述

实例发现

如下图,输入mysql实例的IP和Port即可。前提是在实例上已经建好了’usr_orch_mng’@'%'用户。

在这里插入图片描述

发现完成后,查看具体主从拓扑架构。

在这里插入图片描述

5、VIP切换配置

以下介绍使用脚本方式切换VIP。

参考:https://www.percona.com/blog/2016/11/03/orchestrator-moving-vips-during-failover/

本说明使用dbadmin用户。

orchestrator服务器和受管服务器创建dbadmin用户
# 样例命令
useradd dbadmin
passwd dbadmin
orchestrator服务器免密到受管MYSQL服务器配置
# orchestrator服务器---10.0.0.238
su - dbadmin
ssh-keygen -t rsa
# 一直回车,生成密钥
# 查看生成的公钥
cd ~/.ssh/
cat id_rsa.pub
# MYSQL受管服务器-----10.0.0.236/237
su - dbadmin
vim ~/.ssh/authorized_keys
# 将上面的公钥粘贴进去即可。
# 执行以下命令测试下------10.0.0.238
ssh dbadmin@10.0.0.236 date
# 配置dbadmin用户具有sudo执行ip切换命令权限------10.0.0.236/237
visudo
dbadmin ALL=(ALL) NOPASSWD: ALL
hook脚本配置

主要用到两个脚本:

脚本名来源说明
orch_hook.py自己写的使用python27环境,主要是网上的orch_hook.sh不能实现多套vip的自适配切换,无奈写了一个。
orch_vip.shpercona文章中的github链接通过远程执行ip addr del/add命令卸载/加载vip。修改了一点内容,把发邮件的部分删掉了。

拷贝到/usr/local/orchestrator目录下,并赋予执行权限。

orchestrator.conf.json配置文件修改

## 修改后如下
"PostFailoverProcesses": [
    "echo '(for all types) Recovered from {failureType} on {failureCluster}. Failed: {failedHost}:{failedPort}; Successor: {successorHost}:{successorPort}' >> /tmp/recovery.log",
    "python /usr/local/orchestrator/orch_hook.py {failedHost} {failedPort} {successorHost} {failureType} >> /tmp/recovery.log"
  ],

orch_hook.py脚本内容:

#!/usr/bin/python
# -*- coding:utf8 -*-
import sys
import os

'''
脚本所需位置参数说明:旧masterIP  旧masterPORT  新masterIP 切换类型
{failedHost} {failedPort} {successorHost} {failureType} 

'''
print sys.argv

# 增加受管实例需要修改此部分vip信息
all_vip_info = (
    ('ip', 'port', 'vip', 'interface'),
    ('10.0.0.237', '3315', '10.0.0.163', 'eth0'),
    ('10.0.0.236', '3315', '10.0.0.163', 'eth0')
)

# 基础环境
command_orch_vip = '/usr/local/orchestrator/orch_vip.sh'
ssh_user = 'dbadmin'
log_file = '/tmp/recovery.log'

# 获取脚本参数
v_old_machine_ip = sys.argv[1]
v_old_machine_port = sys.argv[2]
v_new_machine_ip = sys.argv[3]
v_switch_type = sys.argv[4]

# 获取vip
v_vip = ''
v_network_interface = ''
for ip_info in all_vip_info:
    if v_old_machine_ip == ip_info[0] and int(v_old_machine_port) == int(ip_info[1]):
        v_vip = ip_info[2]
        v_network_interface = ip_info[3]
        break

# 参数检查
all_params = {
    'old_machine_ip': v_old_machine_ip,
    'old_machine_port': v_old_machine_port,
    'new_machine_ip': v_new_machine_ip,
    'switch_type': v_switch_type,
    'vip': v_vip,
    'network_interface': v_network_interface
}

for k, v in all_params.items():
    if not v:
        print 'err: not invalid param--{0}'.format(k)
        exit(1)

# 切换命令
if v_switch_type == 'DeadMaster':
    switch_command = '{0} -d 1 -n {1} -i {2} -I {3} -u {4} -o {5} >> {6}'.\
        format(command_orch_vip, v_new_machine_ip, v_network_interface, v_vip, ssh_user, v_old_machine_ip, log_file)
    print 'switch commond: {0}'.format(switch_command)
    res = os.popen(switch_command)
    print 'execute result: {0}'.format(res.readlines())
else:
    print 'not supported switch type--{0}'.format(v_switch_type)
    exit(1)

orch_vip.sh脚本内容:

#!/bin/bash

emailaddress="email@example.com"
sendmail=0
logfile='/tmp/recovery.log'

function usage {
  cat << EOF
 usage: $0 [-h] [-d master is dead] [-o old master ] [-s ssh options] [-n new master] [-i interface] [-I] [-u SSH user]
 
 OPTIONS:
    -h        Show this message
    -o string Old master hostname or IP address 
    -d int    If master is dead should be 1 otherweise it is 0
    -s string SSH options
    -n string New master hostname or IP address
    -i string Interface exmple eth0:1
    -I string Virtual IP
    -u string SSH user
EOF

}

while getopts ho:d:s:n:i:I:u: flag; do
  case $flag in
    o)
      orig_master="$OPTARG";
      ;;
    d)
      isitdead="${OPTARG}";
      ;;
    s)
      ssh_options="${OPTARG}";
      ;;
    n)
      new_master="$OPTARG";
      ;;
    i)
      interface="$OPTARG";
      ;;
    I)
      vip="$OPTARG";
      ;;
    u)
      ssh_user="$OPTARG";
      ;;
    h)
      usage;
      exit 0;
      ;;
    *)
      usage;
      exit 1;
      ;;
  esac
done


if [ $OPTIND -eq 1 ]; then 
    echo "No options were passed"; 
    usage;
fi

shift $(( OPTIND - 1 ));

# discover commands from our path
ssh=$(which ssh)
arping=$(which arping)
ip2util=$(which ip)

# command for adding our vip
cmd_vip_add="sudo -n $ip2util address add ${vip} dev ${interface}"
# command for deleting our vip
cmd_vip_del="sudo -n $ip2util address del ${vip}/32 dev ${interface}"
# command for discovering if our vip is enabled
cmd_vip_chk="sudo -n $ip2util address show dev ${interface} to ${vip%/*}/32"
# command for sending gratuitous arp to announce ip move
cmd_arp_fix="sudo -n $arping -c 1 -I ${interface} ${vip%/*}   "
# command for sending gratuitous arp to announce ip move on current server
cmd_local_arp_fix="sudo -n $arping -c 1 -I ${interface} ${vip%/*}   "

vip_stop() {
    rc=0

    # ensure the vip is removed
    $ssh ${ssh_options} -tt ${ssh_user}@${orig_master} \
    "[ -n \"\$(${cmd_vip_chk})\" ] && ${cmd_vip_del} && sudo ${ip2util} route flush cache || [ -z \"\$(${cmd_vip_chk})\" ]"
    rc=$?
    return $rc
}

vip_start() {
    rc=0

    # ensure the vip is added
    # this command should exit with failure if we are unable to add the vip
    # if the vip already exists always exit 0 (whether or not we added it)
    $ssh ${ssh_options} -tt ${ssh_user}@${new_master} \
     "[ -z \"\$(${cmd_vip_chk})\" ] && ${cmd_vip_add} && ${cmd_arp_fix} || [ -n \"\$(${cmd_vip_chk})\" ]"
    rc=$?
    $cmd_local_arp_fix
    return $rc
}

vip_status() {
    $arping -c 1 -I ${interface} ${vip%/*}   
    if ping -c 1 -W 1 "$vip"; then
        echo "status 0"
        return 0
    else
        echo "status 1"
        return 1
    fi
}

if [[ $isitdead == 0 ]]; then
    echo "Online failover"
    if vip_stop; then 
        if vip_start; then
            echo "$vip is moved to $new_master."
        else
            echo "Can't add $vip on $new_master!" 
            exit 1
        fi
    else
        echo $rc
        echo "Can't remove the $vip from orig_master!"
        exit 1
    fi


elif [[ $isitdead == 1 ]]; then
    echo "Master is dead, failover"
    # make sure the vip is not available 
    if vip_status; then 
        if vip_stop; then
            echo "$vip is removed from orig_master." 
        else
            echo "Couldn't remove $vip from orig_master." 
            exit 1
        fi
    fi
    if vip_start; then
          echo "$vip is moved to $new_master."
    else
          echo "Can't add $vip on $new_master!" 
          exit 1
    fi
else
    echo "Wrong argument, the master is dead or live?"

fi

6、切换演练

主库shutdown或kill -9 宕掉场景
# 登陆236主库
shutdown
# 观察切换情况,及vip切换日志
手工切换

登陆web界面,拖拽237从库成为236的主库,观察切换情况及vip切换情况。

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值