目录
2.prometheus-webhook-dingtalk.service
Prometheus配置
1.prometheus.yml
# my global config
global:
scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
# scrape_timeout is set to the global default (10s).# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets:
- 127.0.0.1:9093# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
# - "first_rules.yml"
# - "second_rules.yml"
- "./rules/*.yml"# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
- job_name: 'server-test'
metrics_path: '/prometheus'
static_configs:
- targets: ['10.255.11.123:8120']
labels:
group: test
- job_name: 'server-prod'
metrics_path: '/prometheus'
static_configs:
- targets: ['10.255.5.10:8120','10.255.5.14:8120','10.255.5.15:8120']
labels:
group: prod
- job_name: 'node_exporter-test'
scrape_interval: 5s
static_configs:
- targets: ['10.255.11.123:9100']
labels:
group: test
2.server_rule.yml
groups:
- name: app-server
rules:
- alert: service not running
expr: up == 0
for: 1m
labels:
severity: critical
team: server
annotations:
summary: "实例 {{$labels.app_profile}}#{{$labels.job}} 已停止"
description: "服务实例 {{$labels.app_profile}}#{{$labels.job}}#{{$labels.instance}} 已经停止超过1分钟."
value: "{{$labels.instance}}"
校验配置文件 ./promtool check config prometheus.yml
Checking prometheus.yml
SUCCESS: 1 rule files found
SUCCESS: prometheus.yml is valid prometheus config file syntaxChecking rules/server_rule.yml
SUCCESS: 1 rules found
3.prometheus.service
vi /usr/lib/systemd/system/prometheus.service
[Unit]
Description=Prometheus
After=network.target[Service]
Restart=on-failure
ExecStart=/usr/local/prometheus/prometheus-2.43.0.linux-amd64/prometheus --config.file=/usr/local/prometheus/prometheus-2.43.0.linux-amd64/prometheus.yml --storage.tsdb.path=/usr/local/prometheus/prometheus-2.43.0.linux-amd64/data[Install]
WantedBy=multi-user.target
systemctl daemon-reload
systemctl start prometheus
systemctl status prometheus
systemctl enable prometheus
Alertmanager配置
1.alertmanager.yml
# 路由分组
route:
receiver: ops
group_wait: 30s
group_interval: 1m
repeat_interval: 24h
group_by: [ alertname ]# 接收器指定发送人以及发送渠道
receivers:
- name: ops
webhook_configs:
# 填写prometheus-webhook的webhook1 url
- url: http://localhost:8060/dingtalk/webhook1/send
# 在恢复后是否发送恢复消息给接收人
send_resolved: true
# html: '{{ template "ops.html" . }}' #使用自定义的模板发送# 抑制器配置
inhibit_rules: # 抑制规则
- source_match: # 源标签警报触发时抑制含有目标标签的警报,在当前警报匹配 status: 'High'
status: 'critical'
target_match:
status: 'Warning'
equal: [ 'alertname', 'dev', 'instance' ]
校验配置文件 ./amtool check-config alertmanager.yml
Checking 'alertmanager.yml' SUCCESS
Found:
- global config
- route
- 1 inhibit rules
- 1 receivers
- 0 templates
2.alertmanager.service
vi /usr/lib/systemd/system/alertmanager.service
[Unit]
Description=Alertmanager
After=network.target[Service]
Restart=on-failure
ExecStart=/usr/local/prometheus/alertmanager/alertmanager --config.file=/usr/local/prometheus/alertmanager/alertmanager.yml[Install]
WantedBy=multi-user.target
systemctl start alertmanager
systemctl status alertmanager
systemctl enable alertmanager
systemctl restart prometheus
prometheus-webhook-dingtalk
1.config.yml
## Request timeout
# timeout: 5s## Uncomment following line in order to write template from scratch (be careful!)
#no_builtin_template: true## Customizable templates path
templates:
# - contrib/templates/legacy/template.tmpl
- /usr/local/prometheus/alertmanager/dingding.tmpl## You can also override default template using `default_message`
## The following example to use the 'legacy' template from v0.3.0
#default_message:
# title: '{{ template "legacy.title" . }}'
# text: '{{ template "legacy.content" . }}'## Targets, previously was known as "profiles"
targets:
webhook1:
url: https://oapi.dingtalk.com/robot/send?access_token=xxx
# secret for signature
secret: xxx
message:
title: '{{ template "ops.title" . }}'
text: '{{ template "ops.html" . }}'
2.prometheus-webhook-dingtalk.service
vi /usr/lib/systemd/system/prometheus-webhook-dingtalk.service
[Unit]
Description=prometheus-webhook-dingtalk
After=network.target[Service]
Restart=on-failure
ExecStart=/usr/local/prometheus/prometheus-webhook-dingtalk-2.1.0.linux-amd64/prometheus-webhook-dingtalk --config.file=/usr/local/prometheus/prometheus-webhook-dingtalk-2.1.0.linux-amd64/config.yml[Install]
WantedBy=multi-user.target
systemctl start prometheus-webhook-dingtalk
systemctl status prometheus-webhook-dingtalk
systemctl enable prometheus-webhook-dingtalk
systemctl restart alertmanager
systemctl restart prometheus
需保证能访问通https://oapi.dingtalk.com,不然发不了钉钉告警
SpringBoot配置
1.pom
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-actuator</artifactId>
</dependency>
<dependency>
<groupId>io.micrometer</groupId>
<artifactId>micrometer-registry-prometheus</artifactId>
</dependency>
2.application.yml
management:
metrics:
export:
prometheus:
enabled: true
tags:
application: ${spring.application.name}
endpoints:
web:
exposure:
include: health,metrics,httptrace,prometheus
endpoint:
health:
show-details: always
server:
port: 8120