Zabbix监控nginx高可用是否脑裂
实验环境
所有机关闭防火墙与selinux
第一台机zabbix 192.168.159.141 lamp、zabbix_server、zabbix_agentd
第二台机lb1 192.168.159.139 keepalived、nginx_master负载均衡rs1和rs2的网页测试页面
第三台机lb2 192.168.159.147 keepalived、nginx_slave负载均衡rs1和rs2的网页测试页面、zabbix_agentd
第四台机rs1 192.168.159.148 nginx网页测试页面
第五台机rs2 192.168.159.149 nginx网页测试页面
本次高可用虚拟IP(VIP)地址暂定为 192.168.159.250
keepalived安装
配置主keepalived
[root@lb1 ~]# yum -y install keepalived
编写配置文件
[root@lb1 ~]# cd /etc/keepalived/
[root@lb1 keepalived]# ls
keepalived.conf
[root@lb1 keepalived]# mv keepalived.conf{,.bak}
[root@lb1 keepalived]# ls
keepalived.conf.bak
[root@lb1 keepalived]# vim keepalived.conf
[root@lb1 keepalived]# cat keepalived.conf
! Configuration File for keepalived
global_defs {
router_id lb01
}
vrrp_instance VI_1 {
state MASTER
interface ens33
virtual_router_id 71
priority 100
advert_int 1
authentication {
auth_type PASS
auth_pass 123456
}
virtual_ipaddress {
192.168.159.250
}
}
virtual_server 192.168.159.250 80 {
delay_loop 6
lb_algo rr
lb_kind DR
persistence_timeout 50
protocol TCP
real_server 192.168.159.139 80 {
weight 1
TCP_CHECK {
connect_port 80
connect_timeout 3
nb_get_retry 3
delay_before_retry 3
}
}
real_server 192.168.159.147 80 {
weight 1
TCP_CHECK {
connect_port 80
connect_timeout 3
nb_get_retry 3
delay_before_retry 3
}
}
}
启动服务发现有vip了
[root@lb1 ~]# systemctl enable --now keepalived
[root@lb1 ~]# ip a
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
inet 127.0.0.1/8 scope host lo
valid_lft forever preferred_lft forever
inet6 ::1/128 scope host
valid_lft forever preferred_lft forever
2: ens33: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc fq_codel state UP group default qlen 1000
link/ether 00:0c:29:b6:d6:ff brd ff:ff:ff:ff:ff:ff
altname enp2s1
inet 192.168.159.139/24 brd 192.168.159.255 scope global dynamic noprefixroute ens33
valid_lft 1348sec preferred_lft 1348sec
inet 192.168.159.250/32 scope global ens33
valid_lft forever preferred_lft forever
inet6 fe80::20c:29ff:feb6:d6ff/64 scope link noprefixroute
valid_lft forever preferred_lft forever
用同样的方法在备服务器上安装keepalived
[root@lb2 ~]# yum -y install keepalived
[root@lb2 ~]# cd /etc/keepalived/
[root@lb2 keepalived]# ls
keepalived.conf
[root@lb2 keepalived]# mv keepalived.conf{,.bak}
[root@lb2 keepalived]# vim keepalived.conf
[root@lb2 keepalived]# cat keepalived.conf
! Configuration File for keepalived
global_defs {
router_id lb02
}
vrrp_instance VI_1 {
state BACKUP
interface ens160
virtual_router_id 71
priority 80
advert_int 1
authentication {
auth_type PASS
auth_pass 123456
}
virtual_ipaddress {
192.168.159.250
}
}
virtual_server 192.168.159.250 80 {
delay_loop 6
lb_algo rr
lb_kind DR
persistence_timeout 50
protocol TCP
real_server 192.168.159.139 80 {
weight 1
TCP_CHECK {
connect_port 80
connect_timeout 3
nb_get_retry 3
delay_before_retry 3
}
}
real_server 192.168.159.147 80 {
weight 1
TCP_CHECK {
connect_port 80
connect_timeout 3
nb_get_retry 3
delay_before_retry 3
}
}
}
[root@lb2 keepalived]# systemctl enable --now keepalived
Created symlink /etc/systemd/system/multi-user.target.wants/keepalived.service → /usr/lib/systemd/system/keepalived.service.
[root@lb2 keepalived]# ip a
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
inet 127.0.0.1/8 scope host lo
valid_lft forever preferred_lft forever
inet6 ::1/128 scope host
valid_lft forever preferred_lft forever
2: ens160: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc mq state UP group default qlen 1000
link/ether 00:0c:29:f6:3c:cb brd ff:ff:ff:ff:ff:ff
altname enp3s0
inet 192.168.159.147/24 brd 192.168.159.255 scope global dynamic noprefixroute ens160
valid_lft 1141sec preferred_lft 1141sec
inet6 fe80::20c:29ff:fef6:3ccb/64 scope link noprefixroute
valid_lft forever preferred_lft forever
在浏览器上访问试试,确保nginx负载均衡服务能够正常访问
备节点上需要停止nginx服务主节点上的vip才能访问的到
[root@lb2 ~]# systemctl stop nginx
让keepalived监控nginx负载均衡机
keepalived通过脚本来监控nginx负载均衡机的状态
在lb1上编写脚本
[root@lb1 ~]# mkdir /scripts
[root@lb1 ~]# cd /scripts/
[root@lb1 scripts]# vim check_nginx.sh
[root@lb1 scripts]# chmod +x check_nginx.sh
[root@lb1 scripts]# vim notify.sh
[root@lb1 scripts]# cat notify.sh
#!/bin/bash
case "$1" in
master)
nginx_status=$(ps -ef|grep -Ev "grep|$0"|grep '\bnginx\b'|wc -l)
if [ $nginx_status -lt 1 ];then
systemctl start nginx
fi
;;
backup)
nginx_status=$(ps -ef|grep -Ev "grep|$0"|grep '\bnginx\b'|wc -l)
if [ $nginx_status -gt 0 ];then
systemctl stop nginx
fi
;;
*)
echo "Usage:$0 master|backup VIP"
;;
esac
将此脚本传给备节点,主节点不用,只做备份
[root@lb1 scripts]# scp notify.sh 192.168.159.147:/scripts/
The authenticity of host '192.168.159.147 (192.168.159.147)' can't be established.
ED25519 key fingerprint is SHA256:bkL+H8KDU3f4oa6FUb2+zdbsK+6fCEjjgbuaDzWjdoE.
This key is not known by any other names
Are you sure you want to continue connecting (yes/no/[fingerprint])? yes
Warning: Permanently added '192.168.159.147' (ED25519) to the list of known hosts.
root@192.168.159.147's password:
notify.sh 100% 372 315.7KB/s 00:00
lb2上
[root@lb2 ~]# cd /scripts/
[root@lb2 scripts]# ls
check_process.sh log.py notify.sh
[root@lb2 scripts]# chmod +x notify.sh
[root@lb2 scripts]# ls
check_process.sh log.py notify.sh
配置keepalived加入监控脚本的配置
配置主keepalived
[root@lb1 ~]# vim /etc/keepalived/keepalived.conf
[root@lb1 ~]# cat /etc/keepalived/keepalived.conf
! Configuration File for keepalived
global_defs {
router_id lb01
}
vrrp_script nginx_check {
script "/scripts/check_nginx.sh"
interval 1
wgight -30
}
vrrp_instance VI_1 {
state MASTER
interface ens33
virtual_router_id 71
priority 100
advert_int 1
authentication {
auth_type PASS
auth_pass 123456
}
virtual_ipaddress {
192.168.159.250
}
track_script {
nginx_check
}
}
virtual_server 192.168.159.250 80 {
delay_loop 6
lb_algo rr
lb_kind DR
persistence_timeout 50
protocol TCP
real_server 192.168.159.139 80 {
weight 1
TCP_CHECK {
connect_port 80
connect_timeout 3
nb_get_retry 3
delay_before_retry 3
}
}
real_server 192.168.159.147 80 {
weight 1
TCP_CHECK {
connect_port 80
connect_timeout 3
nb_get_retry 3
delay_before_retry 3
}
}
}
[root@lb1 ~]# systemctl restart keepalived
配置备keepalived
[root@lb2 ~]# vim /etc/keepalived/keepalived.conf
[root@lb2 ~]# cat /etc/keepalived/keepalived.conf
! Configuration File for keepalived
global_defs {
router_id lb02
}
vrrp_instance VI_1 {
state BACKUP
interface ens160
virtual_router_id 71
priority 80
advert_int 1
authentication {
auth_type PASS
auth_pass 123456
}
virtual_ipaddress {
192.168.159.250
}
notify_master "/scripts/notify.sh master"
notify_backup "/scripts/notify.sh backup"
}
virtual_server 192.168.159.250 80 {
delay_loop 6
lb_algo rr
lb_kind DR
persistence_timeout 50
protocol TCP
real_server 192.168.159.139 80 {
weight 1
TCP_CHECK {
connect_port 80
connect_timeout 3
nb_get_retry 3
delay_before_retry 3
}
}
real_server 192.168.159.147 80 {
weight 1
TCP_CHECK {
connect_port 80
connect_timeout 3
nb_get_retry 3
delay_before_retry 3
}
}
}
[root@lb2 ~]# systemctl restart keepalived
此时的效果
在lb1上停掉nginx服务,keepalived服务也会停止,同时vip会出现在lb2上,lb2上的nginx服务和keepalived服务也会启动
[root@lb1 ~]# systemctl stop nginx
[root@lb1 ~]# ip a
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
inet 127.0.0.1/8 scope host lo
valid_lft forever preferred_lft forever
inet6 ::1/128 scope host
valid_lft forever preferred_lft forever
2: ens33: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc fq_codel state UP group default qlen 1000
link/ether 00:0c:29:b6:d6:ff brd ff:ff:ff:ff:ff:ff
altname enp2s1
inet 192.168.159.139/24 brd 192.168.159.255 scope global dynamic noprefixroute ens33
valid_lft 962sec preferred_lft 962sec
inet6 fe80::20c:29ff:feb6:d6ff/64 scope link noprefixroute
valid_lft forever preferred_lft forever
[root@lb2 ~]# ip a
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
inet 127.0.0.1/8 scope host lo
valid_lft forever preferred_lft forever
inet6 ::1/128 scope host
valid_lft forever preferred_lft forever
2: ens160: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc mq state UP group default qlen 1000
link/ether 00:0c:29:f6:3c:cb brd ff:ff:ff:ff:ff:ff
altname enp3s0
inet 192.168.159.147/24 brd 192.168.159.255 scope global dynamic noprefixroute ens160
valid_lft 944sec preferred_lft 944sec
inet 192.168.159.250/32 scope global ens160
valid_lft forever preferred_lft forever
inet6 fe80::20c:29ff:fef6:3ccb/64 scope link noprefixroute
valid_lft forever preferred_lft forever
[root@lb2 ~]# ss -antl
State Recv-Q Send-Q Local Address:Port Peer Address:Port Process
LISTEN 0 128 0.0.0.0:22 0.0.0.0:*
LISTEN 0 511 0.0.0.0:80 0.0.0.0:*
LISTEN 0 4096 0.0.0.0:10050 0.0.0.0:*
LISTEN 0 128 [::]:22 [::]:*
LISTEN 0 511 [::]:80 [::]:*
对keepalived进行监控
对keepalived服务的监控应在备用服务器上进行,通过添加zabbix自定义监控进行。
监控的信息是备上面有无VIP地址(192.168.159.250)
所有要在在lb2上编写监控脚本
[root@lb2 ~]# cd /scripts/
[root@lb2 scripts]# ls
check_process.sh log.py notify.sh
[root@lb2 scripts]# vim check_keepalived.sh
[root@lb2 scripts]# chmod +x check_keepalived.sh
[root@lb2 scripts]# cat check_keepalived.sh
#!/bin/bash
if [ `ip a show ens160 | grep 192.168.159.250 | wc -l` -ne 0 ]
then
echo "1"
else
echo "0"
fi
[root@lb2 scripts]# ll check_keepalived.sh
-rwxr-xr-x 1 root root 125 Mar 4 17:07 check_keepalived.sh
[root@lb2 scripts]# ./check_keepalived.sh
1
此处显示1就是说明备上面有vip,显示0则说明备上面没有vip
进入配置文件,创建自定义监控任务
[root@lb2 scripts]# vim /usr/local/etc/zabbix_agentd.conf
添加自定义监控任务
UserParameter=check_keepalived,/bin/bash /scripts/check_keepalived.sh
因为我们修改了配置文件,所以需要重启服务,重新读取配置文件内容
[root@lb2 scripts]# systemctl restart zabbix_agentd.service
创建自定义监控任务后,我们需要在server端去测试一下是否能接受到被监控端的值
[root@zabbix ~]# zabbix_get -s 192.168.159.147 -k check_keepalived
1
成功接收到值,因为备上此时有vip所有显示的是1
在zabbix监控页面添加监控
创建监控项
成功创建
创建触发器
因为此时我的主服务器上的nginx服务已经关闭,所以vip在备上面,此时提取出来的值是1,所以服务异常