DNS
推荐从Bind-DLZ入手,资料多
可控制度更好(查询DNS记录SQL可定制)
据说性能差
Bind-DLZ
https://www.cnblogs.com/saneri/p/8178065.htmlPowerDNS
SQL schema设置规范
性能比Bind-DLZ好
coredns 和k8s结合比较多
nacos 阿里开源,含DNS和服务发现
监控程序:
主从结构,支持GTID
监控逻辑:
按分组取出来机器节点
master:
尝试连接成功 ok 保持
失败 进行从库选举
slave:
检查是不是在线online
在线的:
连接成功,复制是不是正常,不正常下线,检验延迟
下线更新cmdb,dns records
下线的:
连接成功,复制正常,不延迟 上线
上线更新cmdb,dns records
从库选举:
获取从库列表
故障切换:
确认所有节点都复制中断,判断复制完成
对比,所有节点是不是复制到一个位置
通过获取的GTID对比,是不是所有节点同步到一个位置,如果不是,选举出来最靠前的做master
如果同步位置都一样,根据cmdb中定义的level选择,最大的那个
新的主节点选举成功后,其他节点change过来
更新cmdb中的角色,oldmaster->slave,选举出来的master,更改新主节点的read_only
更新dns_records
在线切换:
oldmaster上执行super_read_only & read_only干掉业务连接
获取oldmaster中的show master status信息
获取从节点中的show slave status对比,确认都同步完成
按cmdb中的level或是指定的节点为新master
更新重做master/slave架构
更新cmdb
更新dns
记录log
一定要提高英文阅读能力
[root@mydb1 ~]# wget https://releases.hashicorp.com/consul/1.4.0/consul_1.4.0_linux_amd64.zip
[root@mydb1 ~]# mkdir -p /opt/consul /opt/consul/conf /data/consul /data/consul/shell/
[root@mydb2 ~]# mkdir -p /opt/consul /opt/consul/conf /data/consul /data/consul/shell/
[root@mydb3 ~]# mkdir -p /opt/consul /opt/consul/conf /data/consul /data/consul/shell/
[root@mydb1 ~]# unzip consul_1.4.0_linux_amd64.zip
将consul拷贝至/opt/consul目录
[root@mydb1 ~]# cat /opt/consul/conf/server.json
{
"data_dir": "/data/consul",
"enable_script_checks": true,
"datacenter": "dc1",
"log_level": "INFO",
"server": true,
"bootstrap_expect": 3,
"ui":true
}
[root@mydb1 consul]# ./consul agent -config-dir=/opt/consul/conf > /data/consul/consul.log &
[root@mydb2 consul]# ./consul agent -config-dir=/opt/consul/conf > /data/consul/consul.log &
[root@mydb3 consul]# ./consul agent -config-dir=/opt/consul/conf > /data/consul/consul.log &
[root@mydb2 consul]# ./consul join 192.168.1.101
[root@mydb3 consul]# ./consul join 192.168.1.101
[root@mydb1 consul]# ./consul members
Node Address Status Type Build Protocol DC Segment
mydb1 192.168.1.101:8301 alive server 1.4.0 2 dc1
mydb2 192.168.1.102:8301 alive server 1.4.0 2 dc1
mydb3 192.168.1.103:8301 alive server 1.4.0 2 dc1
[root@mydb1 consul]# ./consul catalog nodes
Node ID Address DC
mydb1 52514e74 192.168.1.101 dc1
mydb2 aebbf0b2 192.168.1.102 dc1
mydb3 0e179069 192.168.1.103 dc1
# dig @127.0.0.1 -p 8600 mydb1.node.consul
# dig @127.0.0.1 -p 8600 mydb2.node.consul
# dig @127.0.0.1 -p 8600 mydb3.node.consul
[root@mydb1 consul]# ./consul operator raft list-peers
Node ID Address State Voter RaftProtocol
mydb1 52514e74-d063-cfe3-1d58-55fda9fc2451 192.168.1.101:8300 leader true 3
mydb2 aebbf0b2-09ad-f396-4c21-3f9ee40a16da 192.168.1.102:8300 follower true 3
mydb3 0e179069-7360-3866-d9a6-7ea60c540c04 192.168.1.103:8300 follower true 3
[root@mydb1 consul]# ./consul kv put id 11
Success! Data written to: id
[root@mydb1 consul]# ./consul kv get id
11
[root@mydb2 consul]# ./consul kv get id
11
[root@mydb3 consul]# ./consul kv get id
11
consul是用Raft来实现分布式一致性的
[root@mydb1 ~]# cat /opt/consul/conf/r-test-mgr-ser.json
{
"service": {
"name": "r-test-3306-mydb-ser",
"tags": ["测试-3306"],
"address": "192.168.1.101",
"meta": {
"meta": "for my service"
},
"port": 3306,
"enable_tag_override": false,
"checks": [
{
"args": ["/data/consul/shell/check_mysql_mgr_slave.sh"],
"interval": "1s"
}
]
}
}
[root@mydb1 ~]# cat /opt/consul/conf/w-test-mgr-ser.json
{
"service": {
"name": "w-test-3306-mydb-ser",
"tags": ["测试-3306"],
"address": "192.168.1.101",
"meta": {
"meta": "for my service"
},
"port": 3306,
"enable_tag_override": false,
"checks": [
{
"args": ["/data/consul/shell/check_mysql_mgr_master.sh"],
"interval": "10s"
}
]
}
}
注意在mydb2,mydb3上调整ip
检测脚本如下
[root@mydb1 ~]# cat /data/consul/shell/check_mysql_mgr_master.sh
#!/bin/bash
host="192.168.1.101"
port=3306
user="dba_user"
passwod="msds007"
comm="/usr/local/mysql/bin/mysql -u$user -h$host -P $port -p$passwod"
value=`$comm -Nse "select 1"`
primary_member=`$comm -Nse "select variable_value from performance_schema.global_status WHERE VARIABLE_NAME= 'group_replication_primary_member'"`
server_uuid=`$comm -Nse "select variable_value from performance_schema.global_variables where VARIABLE_NAME='server_uuid';"`
# 判断MySQL是否存活
if [ -z $value ]
then
echo "mysql $port is down....."
exit 2
fi
# 判断节点状态,是否存活
node_state=`$comm -Nse "select MEMBER_STATE from performance_schema.replication_group_members where MEMBER_ID='$server_uuid'"`
if [ $node_state != "ONLINE" ]
then
echo "MySQL $port state is not online...."
exit 2
fi
# 判断是不是主节点
if [[ $server_uuid == $primary_member ]]
then
echo "MySQL $port Instance is master ........"
exit 0
else
echo "MySQL $port Instance is slave ........"
exit 2
fi
[root@mydb1 ~]# cat /data/consul/shell/check_mysql_mgr_slave.sh
#!/bin/bash
host="192.168.1.101"
port=3306
user="dba_user"
passwod="msds007"
comm="/usr/local/mysql/bin/mysql -u$user -h$host -P $port -p$passwod"
value=`$comm -Nse "select 1"`
primary_member=`$comm -Nse "select variable_value from performance_schema.global_status WHERE VARIABLE_NAME= 'group_replication_primary_member'"`
server_uuid=`$comm -Nse "select variable_value from performance_schema.global_variables where VARIABLE_NAME='server_uuid';"`
# 判断mysql是否存活
if [ -z $value ]
then
echo "mysql $port is down....."
exit 2
fi
# 判断节点状态
node_state=`$comm -Nse "select MEMBER_STATE from performance_schema.replication_group_members where MEMBER_ID='$server_uuid'"`
if [ $node_state != "ONLINE" ]
then
echo "MySQL $port state is not online...."
exit 2
fi
# 判断是不是主节点
if [[ $server_uuid != $primary_member ]]
then
echo "MySQL $port Instance is slave ........"
exit 0
else
node_num=`$comm -Nse "select count(*) from performance_schema.replication_group_members"`
# 判断如果没有任何从节点,主节点也注册从角色服务。
if [ $node_num -eq 1 ]
then
echo "MySQL $port Instance is slave ........"
exit 0
else
echo "MySQL $port Instance is master ........"
exit 2
fi
fi
注意在mydb2,mydb3上调整ip
[root@mydb1 consul]# ./consul agent -config-dir=/opt/consul/conf > /data/consul/consul.log &
[root@mydb2 consul]# ./consul agent -config-dir=/opt/consul/conf > /data/consul/consul.log &
[root@mydb3 consul]# ./consul agent -config-dir=/opt/consul/conf > /data/consul/consul.log &
[root@mydb2 consul]# ./consul join 192.168.1.101
[root@mydb3 consul]# ./consul join 192.168.1.101
[root@mydb1 consul]# ./consul members
# dig @127.0.0.1 -p 8600 w-test-3306-mydb-ser.service.consul
# dig @127.0.0.1 -p 8600 r-test-3306-mydb-ser.service.consul
Consul使用手册
http://www.liangxiansen.cn/2017/04/06/consul/
367

被折叠的 条评论
为什么被折叠?



