下载告警安装包
wget https://github.com/prometheus/alertmanager/releases/download/v0.23.0/alertmanager-0.23.0.linux-amd64.tar.gz
tar xzvf alertmanager-0.23.0.linux-amd64.tar.gz -C /usr/local/
cd /usr/local
mv alertmanager-0.23.0.linux-amd64/ alertmanager
cd alertmanager/
vim alertmanager.yml #编辑告警配置
global: # 全局的配置
resolve_timeout: 5m # 解析的超时时间
smtp_smarthost: 'smtp.163.com:465'
smtp_from: 'ktid24@163.com'
smtp_auth_username: 'ktid24@163.com'
smtp_auth_password: 'TDWVKOBCDMLOKZGXABC'
smtp_require_tls: false
route: # 将告警具体怎么发送
group_by: ['alertname'] # 根据标签进行分组
group_wait: 10s # 发送告警等待时间
group_interval: 10s # 发送告警邮件的间隔时间
repeat_interval: 1h # 重复的告警发送时间
receiver: 'email' # 接收者是谁
receivers: # 将告警发送给谁
- name: 'email'
email_configs:
- send_resolved: true
to: '1678712889@qq.com'
inhibit_rules: # 抑制告警
- source_match:
severity: 'critical' # 当收到同一台机器发送的critical时候,屏蔽掉warning类型的告警
target_match:
severity: 'warning'
equal: ['alertname', 'severity', 'instance'] # 根据这些标签来定义抑制
#加入systemd
cat > /lib/systemd/system/alertmanager.service <<-'EOF'
[Unit]
Description=alertmanager.service
[Service]
ExecStart=/usr/local/alertmanager/alertmanager --config.file=/usr/local/alertmanager/alertmanager.yml
Restart=on-failure
[Install]
WantedBy=multi-user.target
EOF
./amtool check-config alertmanager.yml #检查告警配置文件
vim /usr/local/prometheus/prometheus.yml #去掉注释
mkdir /usr/local/prometheus/rules #告警规则在这里更改
vim rules.yml #编写规则配置
groups:
- name: node-alert
rules:
- alert: disk-full
expr: 100 - ((node_filesystem_avail_bytes{mountpoint="/",fstype=~"ext4|xfs"} * 100) / node_filesystem_size_bytes {mountpoint="/",fstype=~"ext4|xfs"}) > 8
for: 1m
labels:
serverity: page
annotations:
summary: "{{ $labels.instance }} disk full "
description: "{{ $labels.instance }} disk > {{ $value }} "
systemctl restart prometheus
systemctl restart alertmanager
查看配置的规则,以及Alerts