让开发在springboot项目集成Micrometer
参考链接:
Micrometer集成 Prometheus 监控 Java 应用性能 - 灰信网(软件开发博客聚合)
完成后访问试试
# curl http://ip:port/actuator/prometheus
会出现很多数据就是正常的
prometheus集成
# java
- job_name: java
scrape_interval: 10s
metrics_path: '/actuator/prometheus'
static_configs:
- targets: ['172.30.0.8:8986',
'172.30.0.11:8986',
'172.30.0.21:8986']
labels:
service_name: 'aaa'
- targets: ['172.30.0.18:18278',
'172.30.0.25:18278',
'172.30.0.36:18278']
labels:
service_name: 'bbb'
grafana图形码
6756
12856
主用12856,中间插点6756
告警规则
# cat /data/prometheus_dir/rules/java-rules.yaml
groups:
- name: JAVA服务-监控告警
rules:
- alert: Java 服务停止告警
expr: up{job="java"} == 0
for: 1m
labels:
severity: warning
status: 非常严重
annotations:
summary: "服务停止:{{$.Labels.service_name}}--{{$.Labels.instance}}"
description: "服务停止:{{$.Labels.service_name}}--{{$.Labels.instance}},(当前
:{{$value}})"
- alert: Java 接口延迟告警
expr: irate(http_server_requests_seconds_sum{ job="java",exception="None", uri!~".*actuator.*"}[1m]) / irate(http_server_requests_seconds_count{ job="java",exception="None", uri!~".*actuator.*"}[1m]) > 3
for: 1m
labels:
severity: warning
status: 非常严重
annotations:
summary: "接口延迟:{{$.Labels.job}}"
description: "接口延迟:{{$.Labels.service_name}}--{{$.Labels.instance}} > 3s,(当前:{{$value}})"
- alert: Java 接口状态码告警
expr: http_server_requests_seconds_count{job="java",uri!="/**",status!='200'}
for: 1m
labels:
severity: warning
status: 非常严重
annotations:
summary: "接口状态码异常:{{$.Labels.service_name}}--{{$.Labels.instance}}"
description: "接口状态码异常:{{$.Labels.service_name}}--{{$.Labels.instance}}--{{$.Labels.method}}--{{$.Labels.uri}},(当前:{{$.Labels.status}})"
- alert: Java GC次数告警
expr: irate(jvm_gc_pause_seconds_count{job="java"}[1m]) > 5
labels:
severity: warning
status: 告警
annotations:
summary: "GC次数告警:{{$.Labels.service_name}}--{{$.Labels.instance}}"
description: "1分钟平均GC次数告警:{{$.Labels.service_name}}--{{$.Labels.instance}}--{{$.Labels.cause}} > 5,(当前:{{$value}})"
- alert: Java error日志告警
expr: irate(logback_events_total{level="error"}[1m]) > 50
labels:
severity: warning
status: 告警
annotations:
summary: "error日志告警:{{$.Labels.service_name}}--{{$.Labels.instance}}"
description: "1分钟平均error日志数量过多:{{$.Labels.service_name}}--{{$.Labels.instance}} > 50,(当前
:{{$value}})"