部署alertmanager
1 部署完成elk集群或者单节点
2 创建用户
# useradd prometheus
# chown -R prometheus:prometheus /usr/local/alertmanager
# vim /usr/lib/systemd/system/alertmanager.service
[Unit]
Description=Alertmanager
After=network.target
[Service]
Type=simple
User=prometheus
ExecStart=/usr/local/alertmanager/alertmanager --config.file=/usr/local/alertmanager/alertmanager.yml --storage.path=/usr/local/alertmanager/data
Restart=on-failure
[Install]
WantedBy=multi-user.target
启动
# systemctl enable alertmanager.service
# systemctl start alertmanager.service
3 配置报警规则
编写报警规则:(举例)
修改alertmanager的配置文件如下格式
global:
resolve_timeout: 5m
route:
group_by: ['alertname']
group_wait: 10s
group_interval: 10s
repeat_interval: 1h
# receiver: 'web.hook'
receiver: 'default-receiver'
receivers:
#- name: 'web.hook'
# webhook_configs:
# - url: 'http://127.0.0.1:5001/'
- name: 'default-receiver'
inhibit_rules:
- source_match:
severity: 'critical'
target_match:
severity: 'warning'
equal: ['alertname', 'dev', 'instance']
修改prometheus的配置文件的格式如下所示:
groups:
- name: memorytrule
rules:
- alert: "memoryalert"
expr: 100 - ((node_memory_MemAvailable * 100) / node_memory_MemTotal) > 10
for: 1s
labels:
severity: warning
annotations:
summary: "service:{{$labels.alertname}}"
description: "alert: {{ $value }}"
value: "{{ $value }}"