软件组件:
prometheus
alertmanager
prometheus-webhook-dingtalk
nginx-vts-exporter
nginx
(###--add-module=../nginx-module-vts/)
1、安装prometheus
2、安装alertmanager
3、nginx-vts-exporter
#cat docker-compose.yml
version: '2'
services:
alertmanager:
image: docker.io/prom/alertmanager
container_name: alertmanager
ports:
- 9093
volumes:
- /etc/prometheus/alertmanager.yml:/etc/prometheus/alertmanager.yml
network_mode: host
prometheus:
image: prom/prometheus
container_name: prometheus
ports:
- 9090
volumes:
- /etc/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml
- /data/prometheus-data:/prometheus-data
- /etc/prometheus/rules.yml:/etc/prometheus/rules.yml
network_mode: host
nginx-vts-exporter:
image: sophos/nginx-vts-exporter
container_name: nginx-vts-exporter
ports:
- 9913
environment:
- NGINX_STATUS=http://127.0.0.1:11111/vt-status/format/json
network_mode: host
##cat /etc/prometheus/prometheus.yml
# my global config
global:
scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
# scrape_timeout is set to the global default (10s).
# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets: [ "127.0.0.1:9093"]
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
- "/etc/prometheus/rules.yml"
# - "second_rules.yml"
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
- job_name: 'prometheus'
# metrics_path defaults to '/metrics'
# scheme defaults to 'http'.
static_configs:
- targets: ['localhost:9090']
- job_name: 'prometheus_nginx'
static_configs:
- targets: ['127.0.0.1:9913']
- job_name: 'docker'
static_configs:
- targets: ['192.168.0.200:9999']
#cat /etc/prometheus/rules.yml
groups:
- name: test-rule
rules:
- alert: NodeFilesystemUsage
expr: (node_filesystem_size{device="rootfs"} - node_filesystem_free{device="rootfs"}) / node_filesystem_size{device="rootfs"} * 100 > 80
for: 2m
labels:
team: node
annotations:
summary: "{{$labels.instance}}: High Filesystem usage detected"
description: "{{$labels.instance}}: Filesystem usage is above 80% (current value is: {{ $value }}"
- alert: NodeMemoryUsage
expr: nginx_server_bytes > 100
for: 2m
labels:
team: node
annotations:
summary: "nginx_server_bytes too 100"
description: "{{$labels.instance}}: nginx_server_bytes (current value is: {{ $value }}"
#cat /etc/prometheus/alertmanager.yml
global:
resolve_timeout: 5m
route:
group_by: ['alertname']
group_wait: 10s
group_interval: 10s
repeat_interval: 5m
receiver: 'send_to_dingding_webhook1'
receivers:
- name: 'send_to_dingding_webhook1'
webhook_configs:
- send_resolved: true
url: 'http://127.0.0.1:8060/dingtalk/webhook1/send'
inhibit_rules:
- source_match:
severity: 'critical'
target_match:
severity: 'warning'
equal: ['alertname', 'dev', 'instance']
#
nohup ./prometheus-webhook-dingtalk --web.listen-address=":8060" --ding.profile="webhook1=https://oapi.dingtalk.com/robot/send?access_token=682fdfb7c9b83670f32a0a1549187ff16fe36ebc2dcfab9747cd3122dbefa6dd" &
##启动
docker-compose up -d
4、安装nginx (略)
--user=nginx --group=nginx --prefix=/opt/app/nginx --with-http_v2_module --with-http_ssl_module --with-http_sub_module --with-http_stub_status_module --with-http_gzip_static_module --with-pcre --add-module=../nginx-module-vts/ --with-http_image_filter_module=dynamic
配置nginx.conf
http {
vhost_traffic_status_zone;
vhost_traffic_status_filter_by_host on;
...
}
server{
listen 11111;
server_name 127.0.0.1;
location /vt-status {
stub_status on;
access_log off;
}
location /vt-status {
vhost_traffic_status_display;
vhost_traffic_status_display_format html;
}
}