1:检查配置文件和ruler编写是否正确
./promtool check config prometheus.yml
2:编写ruler
cat > /usr/local/prometheus/rules/nodedown.yml << 'EOF'
groups:
- name: operations #触发器名字
rules:
- alert: node-down #告警标题
expr: up{env="operations"} != 1 #触发器的key
for: 1m #抑制多长时间发送告警出来
labels:
status: High #告警级别
team: operations
annotations:
description: "Environment: {{ $labels.env }} Instance: {{ $labels.instance }} is Down ! ! !" #描述信息
value: '{{ $value }}'
summary: "The host node was down 20 minutes ago"