prometheus之监控keepalived
文章目录
1.keepalived-exporter项目地址(感谢作者提供,记得star _支持下)
地址:https://github.com/cafebazaar/keepalived-exporter
2.安装keepalived-exporter
目前最新版本1.2.0
export VERSION=1.2.0
wget https://github.com/cafebazaar/keepalived-exporter/releases/download/v${VERSION}/keepalived-exporter-${VERSION}.linux-amd64.tar.gz
tar xvzf keepalived-exporter-${VERSION}.linux-amd64.tar.gz keepalived-exporter-${VERSION}.linux-amd64/keepalived-exporter
sudo mv keepalived-exporter-${VERSION}.linux-amd64/keepalived-exporter /usr/local/bin/
3.加入Unit服务
# cat /usr/lib/systemd/system/keepalived-exporter.service
[Unit]
Description=Keepalived Exporter
Wants=network-online.target
After=network-online.target
[Service]
Type=simple
User=root
Group=root
ExecStart=/usr/bin/keepalived-exporter -web.listen-address=:9165
ExecReload=/bin/kill -HUP
KillMode=process
TimeoutStopSec=20s
Restart=always
[Install]
WantedBy=default.target
4.加入开机自启和启动服务
systemctl enable keepalived-exporter.service
systemctl start keepalived-exporter.service
5.检查服务启动状态和端口
# netstat -antp|grep 9165
tcp6 0 0 :::9165 :::* LISTEN 42139/keepalived-ex
6.查看监控数据
# curl -s http://10.x.x.x:9165/metrics|grep keepalived|grep -v '#'
keepalived_address_list_errors_total{iname="pdns",intf="eth0",state="MASTER",vrid="26"} 0
keepalived_advertisements_interval_errors_total{iname="pdns",intf="eth0",state="MASTER",vrid="26"} 0
keepalived_advertisements_received_total{iname="pdns",intf="eth0",state="MASTER",vrid="26"} 0
keepalived_advertisements_sent_total{iname="pdns",intf="eth0",state="MASTER",vrid="26"} 2229
keepalived_authentication_failure_total{iname="pdns",intf="eth0",state="MASTER",vrid="26"} 0
keepalived_authentication_invalid_total{iname="pdns",intf="eth0",state="MASTER",vrid="26"} 0
keepalived_authentication_mismatch_total{iname="pdns",intf="eth0",state="MASTER",vrid="26"} 0
keepalived_become_master_total{iname="pdns",intf="eth0",state="MASTER",vrid="26"} 1
keepalived_gratuitous_arp_delay_total{iname="x",intf="eth0",state="MASTER",vrid="26"} 5
keepalived_invalid_type_received_total{iname="x",intf="eth0",state="MASTER",vrid="26"} 0
keepalived_ip_ttl_errors_total{iname="pdns",intf="eth0",state="MASTER",vrid="26"} 0
keepalived_packet_length_errors_total{iname="x",intf="eth0",state="MASTER",vrid="26"} 0
keepalived_priority_zero_received_total{iname="x",intf="eth0",state="MASTER",vrid="26"} 0
keepalived_priority_zero_sent_total{iname="x",intf="eth0",state="MASTER",vrid="26"} 0
keepalived_release_master_total{iname="x",intf="eth0",state="MASTER",vrid="26"} 0
keepalived_script_status{name="x"} 1
keepalived_up 1
keepalived_vrrp_state{iname="pdns",intf="eth0",ip_address="10.x.x.x/32",vrid="26"} 2
7.报警rules
# cat rules/rules-keepalived.yml
groups:
- name: keepalived.rules
rules:
- alert: keepalived is down
expr: keepalived_up == 0
for: 5m
labels:
severity: critital
instance: "{{ $labels.instance }}"
apps: "{{ $labels.apps }}"
annotations:
summary: "keepalived 已关闭"
description: "keepalived 已关闭,当前值: {{ $value }}(0异常|1正常)"
value: "{{ $value }}"
- alert: Keepalived vip has changed
expr: keepalived_become_master_total{state="MASTER"} == 0
for: 5m
labels:
severity: critital
instance: "{{ $labels.instance }}"
apps: "{{ $labels.apps }}"
annotations:
summary: "keepalived vip 已经变更"
description: "keepalived vip 已经变更,当前值: {{ $value }}(0已变更|1未变更)"
value: "{{ $value }}"
- alert: Keepalived Check script status
expr: keepalived_script_status == 0
for: 5m
labels:
severity: critital
instance: "{{ $labels.instance }}"
apps: "{{ $labels.apps }}"
annotations:
summary: "keepalived 检查脚本状态"
description: "keepalived 检查脚本状态,当前值: {{ $value }}(0异常|1正常)"
value: "{{ $value }}"