blackbox_exporter 极速操作
github地址
https://github.com/prometheus/blackbox_exporter/
1、配置文件
cat /usr/local/blackbox_exporter/blackbox.yml
modules:
http_2xx:
prober: http
http_post_2xx:
prober: http
http:
method: POST
tcp_connect:
prober: tcp
pop3s_banner:
prober: tcp
tcp:
query_response:
- expect: "^+OK"
tls: true
tls_config:
insecure_skip_verify: false
grpc:
prober: grpc
grpc:
tls: true
preferred_ip_protocol: "ip4"
grpc_plain:
prober: grpc
grpc:
tls: false
service: "service1"
ssh_banner:
prober: tcp
tcp:
query_response:
- expect: "^SSH-2.0-"
- send: "SSH-2.0-blackbox-ssh-check"
irc_banner:
prober: tcp
tcp:
query_response:
- send: "NICK prober"
- send: "USER prober prober prober :prober"
- expect: "PING :([^ ]+)"
send: "PONG ${1}"
- expect: "^:[^ ]+ 001"
icmp:
prober: icmp
icmp_ttl5:
prober: icmp
timeout: 5s
icmp:
ttl: 5
2、启动, 如果你需要开 debug,在最后的加 --log.level=debug
docker run --rm -d -p 9115:9115 --name blackbox_exporter -v /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro -v /usr/local/blackbox_exporter/blackbox.yml:/config/blackbox.yml quay.io/prometheus/blackbox-exporter:latest --config.file=/config/blackbox.yml
3、promethues.yml 增加
- job_name: 'tcp_status'
metrics_path: /probe
params:
module: [tcp_connect]
file_sd_configs:
- files:
- /usr/local/prometheus/process.yml
refresh_interval: 20s
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: 127.0.0.1:9115
- job_name: 'http_status'
metrics_path: /probe
params:
module: [http_2xx]
file_sd_configs:
- files:
- /usr/local/prometheus/http_status.yml
refresh_interval: 20s
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: 127.0.0.1:9115
4、tcp端口文件 process.yml
- targets: ['192.168.0.2:8088','192.168.0.3:8089']
labels:
systemname: 'tomcat'
env: 'test'
name: 'xxx-backend'
5、域名响应200端口文件 http_status.yml
#http
- targets:
- https://baidu.com
labels:
systemname: 'baidu'
env: 'test'
name: 'baidu'
告警规则
process.yml
groups:
- name: Linux
rules:
- alert: "进程异常"
expr: probe_success{job="tcp_status"} == 0
for: 3m
labels:
severity: Error
annotations:
value: "{{ $labels.instance }} 进程已经超过60s未响应"
description: "进程异常"
http_code.yml
groups:
- name: Linux
rules:
- alert: "网站异常"
expr: probe_success{job="http_status"} == 0
for: 1m
labels:
severity: Error
annotations:
value: "{{ $labels.instance }} 域名异常"
description: "访问域名返回200异常"