vmware告警规则
1.1、新增规则路径
mkdir -p /opt/prometheus/rules
1.2、新建文件*.yaml
内容如下(按照自己需要配置)
groups:
- name: vmware status
rules:
- alert: 虚拟机快照快照数量多模板
expr: vmware_vm_snapshots > 5
for: 30m
labels:
severity: warning
annotations:
summary: 虚拟机快照数量过多
description: "{{ $labels.instance }}中虚拟机={{ $labels.vm_name }}快照数量为:{{ $value }}个"
- alert: 存在90天以上的快照
expr: (time() - vmware_vm_snapshot_timestamp_seconds) / (60 * 60 * 24) >= 90
for: 30m
labels:
severity: warning
annotations:
summary: 虚拟机存在超过90天的快照
description: "{{ $labels.instance }}中虚拟机={{ $labels.vm_name }} 存在保留了{{ $value | printf \"%.0f\"}}天的快照"
- alert: 虚拟机内存使用率超80%
expr: vmware_vm_mem_usage_average / 100 >= 80 and vmware_vm_mem_usage_average / 100 < 90
for: 30m
labels:
severity: warning
annotations:
summary: 虚拟机({{ $labels.vm_name }})内存使用率超80%
description: "{{ $labels.instance }}中虚拟机={{ $labels.vm_name }} 内存使用率为{{ $value | printf \"%.2f\"}}%"
- alert: 虚拟机内存使用率超90%
expr: vmware_vm_mem_usage_average / 100 >= 90
for: 1m
labels:
severity: error
annotations:
summary: 虚拟机{{ $labels.vm_name }}内存使用率超90%
description: "{{ $labels.instance }}中虚拟机={{ $labels.vm_name }} 内存使用率为{{ $value | printf \"%.2f\" }}%"
- alert: 虚拟机CPU使用率超80%
expr: vmware_vm_cpu_usage_average / 100 >= 80 and vmware_vm_cpu_usage_average / 100 < 90
for: 30m
labels:
severity: warning
annotations:
summary: 虚拟机({{ $labels.vm_name }})CPU使用率超80%
description: "{{ $labels.instance }}中虚拟机{{ $labels.vm_name }} CPU使用率为{{ $value | printf \"%.2f\" }}%"
- alert: 虚拟机CPU使用率超90%
expr: vmware_vm_cpu_usage_average/ 100 >= 90
for: 1m
labels:
severity: error
annotations:
summary: 虚拟机({{ $labels.vm_name }})CPU使用率超90%
description: "{{ $labels.instance }}中虚拟机{{ $labels.vm_name }} CPU使用率为{{ $value | printf \"%.2f\" }}%"
- alert: esxi主机内存使用率超75%
expr: ((vmware_host_memory_usage / vmware_host_memory_max) * 100) > 75
for: 10m
labels:
severity: warning
annotations:
summary: ({{ $labels.host_name }})内存使用率超过75%
description: "{{ $labels.instance }}的主机 {{ $labels.host_name }} 内存使用率为{{ $value | printf \"%.2f\" }}%"
- alert: esxi主机cpu使用率超75%
expr: ((vmware_host_cpu_usage / vmware_host_cpu_max) * 100) > 75
for: 10m
labels:
severity: warning
annotations:
summary: ({{ $labels.host_name }})CPU使用率超过75%
description: "{{ $labels.instance }} 的主机 {{ $labels.host_name }} CPU使用率为{{ $value | printf \"%.2f\" }}%"
- alert: datestore使用率超70%
expr: ((1-(vmware_datastore_freespace_size / vmware_datastore_capacity_size) )*100)> 70
for: 5m
labels:
severity: warning
annotations:
summary: ({{ $labels.ds_name }}) 存储使用率超过70%
description: "{{ $labels.instance }} 的 {{ $labels.ds_name }} 存储使用率为{{ $value | printf \"%.2f\" }}%"
- alert: 虚拟机磁盘使用率超80%
expr: ((1-(vmware_vm_guest_disk_free / vmware_vm_guest_disk_capacity)) * 100) >= 80 and ((1-(vmware_vm_guest_disk_free / vmware_vm_guest_disk_capacity)) * 100) <90
for: 30m
labels:
severity: warning
annotations:
summary: 虚拟机({{ $labels.vm_name }}) 磁盘使用率超过80%
description: "{{ $labels.instance }} 的 {{ $labels.vm_name }} 路径为 {{ $labels.partition}} 磁盘使用率 {{ $value | printf \"%.2f\" }}%
"
- alert: 虚拟机磁盘使用率超90%
expr: ((1-(vmware_vm_guest_disk_free / vmware_vm_guest_disk_capacity)) * 100) >=90
for: 5m
labels:
severity: error
annotations:
summary: 虚拟机({{ $labels.vm_name }}) 磁盘使用率超过90%
description: "{{ $labels.instance }} 的 {{ $labels.vm_name }} 路径为 {{ $labels.partition}} 磁盘使用率 {{ $value | printf \"%.2f\" }}%"
1.3、修改prometheus.yml
vim /opt/prometheus/prometheus.yml
添加告警规则:
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
# - "first_rules.yml"
# - "second_rules.yml"
- "/opt/prometheus/rules/vmware.yaml"
1.4、重启prometheus
systemctl restart prometheus
systemctl status prometheus
1.5、效果展示
网页打开ip:9090