[root@prometheus ~]# cd /usr/local/alertmanager/alert-config/[root@prometheus alert-config]# ls
alert_rules prometheus.yml targets
[root@prometheus alert-config]# cat prometheus.yml # my global config# Author: MageEdu <mage@magedu.com># Repo: http://gitlab.magedu.com/MageEdu/prometheus-configs/
global:
scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.# scrape_timeout is set to the global default (10s).# Alertmanager configuration
alerting:
alertmanagers:
- file_sd_configs:
- files:
- "targets/alertmanagers*.yaml"# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
- "rules/*.yaml"
- "alert_rules/*.yaml"# A scrape configuration containing exactly one endpoint to scrape:# Here it's Prometheus itself.
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
- job_name: 'prometheus'# metrics_path defaults to '/metrics'# scheme defaults to 'http'.
static_configs:
file_sd_configs:
- files:
- targets/prometheus-*.yaml
refresh_interval: 2m
# All nodes
- job_name: 'nodes'
file_sd_configs:
- files:
- targets/nodes-*.yaml
refresh_interval: 2m
- job_name: 'alertmanagers'
file_sd_configs:
- files:
- targets/alertmanagers*.yaml
refresh_interval: 2m
[root@prometheus alert-config]#
[root@prometheus alert-config]# ls
alert_rules prometheus.yml targets
[root@prometheus alert-config]# cd targets/[root@prometheus targets]# cat alertmanagers.yaml
- targets:
- 192.168.28.5:9093
labels:
app: alertmanager
[root@prometheus targets]# cat nodes-linux.yaml
- targets:
- 192.168.28.10:9100
- 192.168.28.100:9100
labels:
app: node-exporter
job: node[root@prometheus targets]# cat prometheus-servers.yaml
- targets:
- 192.168.28.5:9090
labels:
app: prometheus
job: prometheus
[root@prometheus targets]# [root@prometheus targets]# cd ..[root@prometheus alert-config]# cd alert_rules/[root@prometheus alert_rules]# cat instance_down.yaml
groups:
- name: AllInstances
rules:
- alert: InstanceDown
# Condition for alerting
expr: up ==0
for: 20s
# Annotation - additional informational labels to store more information
annotations:
title: 'Instance down'
description: Instance has been down formore than 20 seconds.'
# Labels - additional labels to be attached to the alert
labels:
severity: 'critical'
[root@prometheus alert_rules]#