prometheus host+mysql+pgsql告警规则

host主机告警规则

  - alert: hostMemUsageAlert
    expr: (node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes)/node_memory_MemTotal_bytes > 0.90
    for: 1m
    labels:
      severity: page
    annotations:
      summary: "实例 {{ $labels.instance }} 内存使用率过高"
      description: "实例 {{ $labels.instance }} 内存使用率 90% (当前值为: {{ $value }})"

  - alert: 主机磁盘空间不足
    expr: (node_filesystem_avail_bytes{mountpoint="/rootfs"}  * 100) / node_filesystem_size_bytes{mountpoint="/rootfs"} < 10
    for: 5m
    labels:
      severity: warning
    annotations:
      summary: "主机磁盘空间不足 (instance {{ $labels.instance }})"
      description: "磁盘几乎满了 (< 10% left)\n  VALUE = {{ $value }}\n  LABELS: {{ $labels }}"

  - alert: 主机异常磁盘读取延迟
    expr: rate(node_disk_read_time_seconds_total[1m]) / rate(node_disk_reads_completed_total[1m]) > 100
    for: 5m
    labels:
      severity: warning
    annotations:
      summary: "主机异常磁盘读取延迟 (instance {{ $labels.instance }})"
      description: "磁盘延迟正在增长 (read operations > 100ms)\n  VALUE = {{ $value }}\n  LABELS: {{ $labels }}"

  - alert: 主机Cpu高负载
    expr: 100 - (avg by(instance) (irate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 99
    for: 5m
    labels:
      severity: warning
    annotations:
      summary: "主机Cpu高负载 (instance {{ $labels.instance }})"
      description: "Cpu 负载 > 99%\n  VALUE = {{ $value }}\n  LABELS: {{ $labels }}"

  - alert: 主机网络接口可能正在发送过多数据
    expr: sum by (instance) (irate(node_network_transmit_bytes_total[2m])) / 1024 / 1024 > 100
    for: 5m
    labels:
      severity: warning
    annotations:
      summary: " 主机 (instance {{ $labels.instance }}) 网络吞吐量较大"
      description: "主机网络接口可能发送了太多的数据 (> 100 MB/s)\n  VALUE = {{ $value }}\n  LABELS: {{ $labels }}"

mysql告警规则

- name: MySQL_Alert
  rules:
  - alert: MySQL status
    expr: mysql_up == 0
    for: 5s
    labels:
      severity: critical
    annotations:
      summary: "Instance {{ $labels.instance }} MySQL宕机"
      description: "MySQL 数据库宕机,需要立即采取行动!"

  - alert: MySQL连接太多
    expr: avg by (instance) (max_over_time(mysql_global_status_threads_connected[5m])) / avg by (instance) (mysql_global_variables_max_connections) * 100 > 80
    for: 5m
    labels:
      severity: warning
    annotations:
      summary: "{{ $labels.instance }} 实例的Mysql连接太多"
      description: "超过80%的MySQL连接在 {{ $labels.instance }}上\n  VALUE = {{ $value }}\n  LABELS: {{ $labels }}"

  - alert: MySQL高线程运行
    expr: avg by (instance) (max_over_time(mysql_global_status_threads_running[5m])) / avg by (instance) (mysql_global_variables_max_connections) * 100 > 60
    for: 5m
    labels:
      severity: warning
    annotations:
      summary: "{{ $labels.instance }} 实例的Mysql高线程运行"
      description: "超过60%的MySQL连接连接处于运行状态 {{ $labels.instance }}\n  VALUE = {{ $value }}\n  LABELS: {{ $labels }}"

  - alert: MySQL查询速度慢
    expr: mysql_global_status_slow_queries > 3
    for: 5m
    labels:
      severity: warning
    annotations:
      summary: "{{ $labels.instance }} 实例的Mysql查询速度慢"
      description: "MySQL服务有一些慢查询.\n  VALUE = {{ $value }}\n  LABELS: {{ $labels }}"

pgsql 告警规则

- name: PostgreSQL_Alert
  rules:
  - alert: PostgreSQL 数据库挂掉了
    expr: pg_up == 0
    for: 5m
    labels:
      severity: critical
    annotations:
      summary: "{{ $labels.instance }} 实例的 PostgreSQL 数据库挂掉了"
      description: "PostgreSQL宕机,需立即处理!"

  - alert: PostgreSQL重新启动
    expr: time() - pg_postmaster_start_time_seconds < 60
    for: 5m
    labels:
      severity: critical
    annotations:
      summary: "{{ $labels.instance }} 实例的PostgreSQL重启"
      description: "Postgresql 刚刚重新启动,不到一分钟前在\n"

  - alert: PostgreSQL的连接数不足
    expr: sum by (datname) (pg_stat_activity_count{datname!~"template.*|postgres"}) > pg_settings_max_connections * 0.9
    for: 5m
    labels:
      severity: warning
    annotations:
      summary: "Postgresql 的连接数不足10% {{ $labels.instance }}"
      description: "PostgreSQL instance has too many connections\n  VALUE = {{ $value }}\n  LABELS: {{ $labels }}"

    
  - alert: PostgreSQL死锁
    expr: rate(pg_stat_database_deadlocks{datname!~"template.*|postgres"}[1m]) > 0
    for: 5m
    labels:
      severity: warning
    annotations:
      summary: "{{ $labels.instance }}实例的Postgresql死锁"
      description: "PostgreSQL已死锁\n  VALUE = {{ $value }}\n  LABELS: {{ $labels }}"

  - alert: PostgreSQL慢查询
    expr: pg_slow_queries > 3
    for: 5m
    labels:
      severity: warning
    annotations:
      summary: "{{ $labels.instance }} 实例的Postgresql慢速查询"
      description: "PostgreSQL执行查询缓慢\n  VALUE = {{ $value }}\n  LABELS: {{ $labels }}"

http 告警规则

  - alert: Web访问异常
    expr: probe_http_status_code <= 199 OR probe_http_status_code >= 400
    for: 20s
    labels:
      severity: critical
    annotations:
      summary: "{{ $labels.instance }} HTTP请求失败"
      description: "HTTP 状态码 {{ $value }}\n  LABELS: {{ $labels }}"

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值