Kube-prometheus Deploy Prometheus

Kube-prometheus Deploy Prometheus

部署 Prometheus

下载 Kube-prometheus

wget -c https://github.com/prometheus-operator/kube-prometheus/archive/refs/tags/v0.11.0.tar.gz

查看镜像

grep -rn 'image: '

examples/example-app/example-app.yaml:36:        image: quay.io/fabxc/prometheus_demo_service
examples/thanos-sidecar.jsonnet:11:          image: 'quay.io/thanos/thanos:v0.19.0',
experimental/metrics-server/metrics-server-deployment.yaml:21:        image: gcr.io/google_containers/metrics-server-amd64:v0.2.0

manifests/alertmanager-alertmanager.yaml:13:  image: quay.io/prometheus/alertmanager:v0.24.0
manifests/blackboxExporter-deployment.yaml:33:        image: quay.io/prometheus/blackbox-exporter:v0.21.0
manifests/blackboxExporter-deployment.yaml:60:        image: jimmidyson/configmap-reload:v0.5.0
manifests/blackboxExporter-deployment.yaml:88:        image: quay.io/brancz/kube-rbac-proxy:v0.12.0
manifests/grafana-deployment.yaml:33:        image: grafana/grafana:8.5.5
manifests/kubeStateMetrics-deployment.yaml:35:        image: k8s.gcr.io/kube-state-metrics/kube-state-metrics:v2.5.0
manifests/kubeStateMetrics-deployment.yaml:56:        image: quay.io/brancz/kube-rbac-proxy:v0.12.0
manifests/kubeStateMetrics-deployment.yaml:82:        image: quay.io/brancz/kube-rbac-proxy:v0.12.0
manifests/nodeExporter-daemonset.yaml:38:        image: quay.io/prometheus/node-exporter:v1.3.1
manifests/nodeExporter-daemonset.yaml:74:        image: quay.io/brancz/kube-rbac-proxy:v0.12.0
manifests/prometheus-prometheus.yaml:21:  image: quay.io/prometheus/prometheus:v2.36.1
manifests/prometheusAdapter-deployment.yaml:40:        image: k8s.gcr.io/prometheus-adapter/prometheus-adapter:v0.9.1
manifests/prometheusOperator-deployment.yaml:33:        image: quay.io/prometheus-operator/prometheus-operator:v0.57.0
manifests/prometheusOperator-deployment.yaml:56:        image: quay.io/brancz/kube-rbac-proxy:v0.12.0

镜像 tag 重置及推送镜像到 harbor

# docker pull gcr.io/google_containers/metrics-server-amd64:v0.2.0
# Error response from daemon: Get "https://gcr.io/v2/": dial tcp 74.125.203.82:443: connect: connection timed out
# 使用阿里云仓库
docker pull registry.aliyuncs.com/google_containers/metrics-server-amd64:v0.2.0

docker pull quay.io/prometheus/alertmanager:v0.24.0
docker pull quay.io/prometheus/blackbox-exporter:v0.21.0
docker pull jimmidyson/configmap-reload:v0.5.0
docker pull quay.io/brancz/kube-rbac-proxy:v0.12.0
docker pull grafana/grafana:8.5.5

# docker pull k8s.gcr.io/kube-state-metrics/kube-state-metrics:v2.5.0
# 使用阿里云仓库
# docker pull registry.cn-hangzhou.aliyuncs.com/google_containers/kube-state-metrics/kube-state-metrics:v2.5.0
# docker pull registry.aliyuncs.com/google_containers/kube-state-metrics/kube-state-metrics:v2.5.0
# dockerhub 拉取
docker pull landv1001/kube-state-metrics:v2.5.0

docker pull quay.io/brancz/kube-rbac-proxy:v0.12.0
docker pull quay.io/brancz/kube-rbac-proxy:v0.12.0
docker pull quay.io/prometheus/node-exporter:v1.3.1
docker pull quay.io/brancz/kube-rbac-proxy:v0.12.0
docker pull quay.io/prometheus/prometheus:v2.36.1

# docker pull k8s.gcr.io/prometheus-adapter/prometheus-adapter:v0.9.1
# 使用阿里云仓库
# docker pull registry.aliyuncs.com/google_containers/prometheus-adapter/prometheus-adapter:v0.9.1
# dockerhub 拉取
docker pull v5cn/prometheus-adapter:v0.9.1

docker pull quay.io/prometheus-operator/prometheus-operator:v0.57.0
docker pull quay.io/brancz/kube-rbac-proxy:v0.12.0

docker tag registry.aliyuncs.com/google_containers/metrics-server-amd64:v0.2.0  10.83.195.8:1443/prometheus/metrics-server-amd64:v0.2.0
docker tag quay.io/prometheus/alertmanager:v0.24.0 10.83.195.8:1443/prometheus/alertmanager:v0.24.0
docker tag quay.io/prometheus/blackbox-exporter:v0.21.0 10.83.195.8:1443/prometheus/blackbox-exporter:v0.21.0
docker tag jimmidyson/configmap-reload:v0.5.0 10.83.195.8:1443/prometheus/configmap-reload:v0.5.0
docker tag quay.io/brancz/kube-rbac-proxy:v0.12.0 10.83.195.8:1443/prometheus/kube-rbac-proxy:v0.12.0
docker tag grafana/grafana:8.5.5 10.83.195.8:1443/prometheus/grafana:8.5.5
docker tag landv1001/kube-state-metrics:v2.5.0 10.83.195.8:1443/prometheus/kube-state-metrics:v2.5.0
docker tag quay.io/prometheus/node-exporter:v1.3.1 10.83.195.8:1443/prometheus/node-exporter:v1.3.1
docker tag quay.io/prometheus/prometheus:v2.36.1 10.83.195.8:1443/prometheus/prometheus:v2.36.1
docker tag v5cn/prometheus-adapter:v0.9.1 10.83.195.8:1443/prometheus/prometheus-adapter:v0.9.1
docker tag quay.io/prometheus-operator/prometheus-operator:v0.57.0 10.83.195.8:1443/prometheus/prometheus-operator:v0.57.0

docker push 10.83.195.8:1443/prometheus/metrics-server-amd64:v0.2.0
docker push 10.83.195.8:1443/prometheus/alertmanager:v0.24.0
docker push 10.83.195.8:1443/prometheus/blackbox-exporter:v0.21.0
docker push 10.83.195.8:1443/prometheus/configmap-reload:v0.5.0
docker push 10.83.195.8:1443/prometheus/kube-rbac-proxy:v0.12.0
docker push 10.83.195.8:1443/prometheus/grafana:8.5.5
docker push 10.83.195.8:1443/prometheus/kube-state-metrics:v2.5.0
docker push 10.83.195.8:1443/prometheus/node-exporter:v1.3.1
docker push 10.83.195.8:1443/prometheus/prometheus:v2.36.1
docker push 10.83.195.8:1443/prometheus/prometheus-adapter:v0.9.1
docker push 10.83.195.8:1443/prometheus/prometheus-operator:v0.57.0

docker pull bitnami/pushgateway:1.8.0
docker tag bitnami/pushgateway:1.8.0 10.83.195.8:1443/prometheus/pushgateway:1.8.0
docker push 10.83.195.8:1443/prometheus/pushgateway:1.8.0

docker pull quay.io/prometheus-operator/prometheus-config-reloader:v0.57.0
docker tag quay.io/prometheus-operator/prometheus-config-reloader:v0.57.0 10.83.195.8:1443/prometheus/prometheus-config-reloader:v0.57.0
docker push 10.83.195.8:1443/prometheus/prometheus-config-reloader:v0.57.0

替换为 harbor 镜像仓库

# sed 's@quay.io/prometheus@10.83.195.8:1443/prometheus@g' manifests/alertmanager-alertmanager.yaml | grep '10.83.195.8:1443/prometheus
sed -i 's@quay.io/prometheus/alertmanager:v0.24.0@10.83.195.8:1443/prometheus/alertmanager:v0.24.0@g' manifests/alertmanager-alertmanager.yaml
sed -i 's@quay.io/prometheus/blackbox-exporter:v0.21.0@10.83.195.8:1443/prometheus/blackbox-exporter:v0.21.0@g' manifests/blackboxExporter-deployment.yaml
sed -i 's@jimmidyson/configmap-reload:v0.5.0@10.83.195.8:1443/prometheus/configmap-reload:v0.5.0@g' manifests/blackboxExporter-deployment.yaml
sed -i 's@quay.io/brancz/kube-rbac-proxy:v0.12.0@10.83.195.8:1443/prometheus/kube-rbac-proxy:v0.12.0@g' manifests/blackboxExporter-deployment.yaml
sed -i 's@grafana/grafana:8.5.5@10.83.195.8:1443/prometheus/grafana:8.5.5@g' manifests/grafana-deployment.yaml
sed -i 's@k8s.gcr.io/kube-state-metrics/kube-state-metrics:v2.5.0@10.83.195.8:1443/prometheus/kube-state-metrics:v2.5.0@g' manifests/kubeStateMetrics-deployment.yaml
sed -i 's@quay.io/brancz/kube-rbac-proxy:v0.12.0@10.83.195.8:1443/prometheus/kube-rbac-proxy:v0.12.0@g' manifests/kubeStateMetrics-deployment.yaml
sed -i 's@quay.io/prometheus/node-exporter:v1.3.1@10.83.195.8:1443/prometheus/node-exporter:v1.3.1@g' manifests/nodeExporter-daemonset.yaml
sed -i 's@quay.io/brancz/kube-rbac-proxy:v0.12.0@10.83.195.8:1443/prometheus/kube-rbac-proxy:v0.12.0@g' manifests/nodeExporter-daemonset.yaml
sed -i 's@quay.io/prometheus/prometheus:v2.36.1@10.83.195.8:1443/prometheus/prometheus:v2.36.1@g' manifests/prometheus-prometheus.yaml
sed -i 's@k8s.gcr.io/prometheus-adapter/prometheus-adapter:v0.9.1@10.83.195.8:1443/prometheus/prometheus-adapter:v0.9.1@g' manifests/prometheusAdapter-deployment.yaml
sed -i 's@quay.io/prometheus-operator/prometheus-operator:v0.57.0@10.83.195.8:1443/prometheus/prometheus-operator:v0.57.0@g' manifests/prometheusOperator-deployment.yaml
sed -i 's@quay.io/brancz/kube-rbac-proxy:v0.12.0@10.83.195.8:1443/prometheus/kube-rbac-proxy:v0.12.0@g' manifests/prometheusOperator-deployment.yaml
sed -i 's@quay.io/prometheus-operator/prometheus-config-reloader:v0.57.0@10.83.195.8:1443/prometheus/prometheus-config-reloader:v0.57.0@g' manifests/prometheusOperator-deployment.yaml
vim manifests/grafana-config.yaml
# 增加匿名访问,grafana数据存储到mysql
# 密码不需要base64编码

  grafana.ini: |
    [security]
    allow_embedding = true
    [date_formats]
    default_timezone = UTC
    [auth.anonymous]
    enabled = true
    [database]
    type = mysql
    host = xx:3306
    name = grafana
    user = grafana
    password = xx
type: Opaque

配置 Ingress


# vim manifests/alertmanager-ingress.yaml
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
  name: alertmanager-ingress
  namespace: monitoring
  annotations:
    kubernetes.io/ingress.class: "nginx"
    prometheus.io/http_probe: "true"
spec:
  rules:
  - host: bigdata-dev-alertmanager.ky-tech.com.cn
    http:
      paths:
      - path: /
        pathType: Prefix
        backend:
          service:
            name: alertmanager-main
            port:
              number: 9093
  tls:
  - hosts:
    - bigdata-dev-alertmanager.ky-tech.com.cn  # https域名
    secretName: prometheus-secret


# vim manifests/grafana-ingress.yaml
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
  name: grafana-ingress
  namespace: monitoring
  annotations:
    kubernetes.io/ingress.class: "nginx"
    prometheus.io/http_probe: "true"
spec:
  rules:
  - host: bigdata-dev-grafana.ky-tech.com.cn 
    http:
      paths:
      - path: /
        pathType: Prefix
        backend:
          service:
            name: grafana
            port:
              number: 3000
  tls:
  - hosts:
    - bigdata-dev-grafana.ky-tech.com.cn  # https域名
    secretName: prometheus-secret


# vim manifests/prometheus-ingress.yaml
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
  name: prometheus-ingress
  namespace: monitoring
  annotations:
    kubernetes.io/ingress.class: "nginx"
    prometheus.io/http_probe: "true"
spec:
  rules:
  - host: bigdata-dev-prometheus.ky-tech.com.cn
    http:
      paths:
      - path: /
        pathType: Prefix
        backend:
          service:
            name: prometheus-k8s
            port:
              number: 9090
  tls:
  - hosts:
    - bigdata-dev-prometheus.ky-tech.com.cn  # https域名
    secretName: prometheus-secret
kubectl create ns monitoring

kubectl create secret tls prometheus-secret --key /data/harbor_helm/stl/ky-tech.com.cn_nginx/ky-tech.com.cn.key --cert /data/harbor_helm/stl/ky-tech.com.cn_nginx/ky-tech.com.cn_bundle.crt -n monitoring

一般Prometheus使用共享存储(如ceph等)

# 创建 StorageClass

# 创建pvc
# vim prometheus-pvc.yaml
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  name: prometheus-pvc
  namespace: monitoring
spec:
  accessModes:
  - ReadWriteMany
  resources:
    requests:
      storage: 4Ti
  storageClassName: bigdata-cephfs
  volumeMode: Filesystem
status:
  accessModes:
  - ReadWriteMany
  capacity:
    storage: 4Ti
  phase: Boun
# vim prometheus-prometheus.yaml

spec:
  retention: 90d
  storage:
    volumeClaimTemplate:
      spec:
        storageClassName: bigdata-cephfs
        resources:
          requests:
            storage: 4Ti

安装

# 部署
kubectl apply --server-side -f manifests/setup

kubectl wait \
	--for condition=Established \
	--all CustomResourceDefinition \
	--namespace=monitoring
	
kubectl apply -f manifests/


# 卸载
# kubectl delete --ignore-not-found=true -f manifests/ -f manifests/setup

# 彻底删除namespace
kubectl get namespace monitoring -o json \
| tr -d "\n" | sed "s/\"finalizers\": \[[^]]\+\]/\"finalizers\": []/" \
| kubectl replace --raw /api/v1/namespaces/monitoring/finalize -f -

# 报错查看日志 
kubectl describe po trino-dockerhub-coordinator-c4779585d-tdldt  -n monitoring 
kubectl logs node-exporter-fb6ql  -n monitoring  --all-containers

# (combined from similar events): MountVolume.SetUp failed for volume "grafana-dashboard-persistentvolumesusage" : failed to sync configmap cache: timed out waiting for the condition
kubectl delete networkpolicy --all -n monitoring

访问

https://bigdata-dev-alertmanager.ky-tech.com.cn 
https://bigdata-dev-grafana.ky-tech.com.cn    admin/admin@13
https://bigdata-dev-prometheus.ky-tech.com.cn

Kube-prometheus 集成 pushgateway

apiVersion: apps/v1
kind: Deployment
metadata:
  labels:
    app.kubernetes.io/name: pushgateway-sdp
  name: pushgateway-sdp
  namespace: monitoring
spec:
  replicas: 1
  selector:
    matchLabels:
      app.kubernetes.io/name: pushgateway-sdp
  template:
    metadata:
      labels:
        app.kubernetes.io/name: pushgateway-sdp
        app: pushgateway
    spec:
      nodeSelector:
        kubernetes.io/system: monitor
      containers:
      - args:
        - --web.enable-admin-api
        - --push.disable-consistency-check
        image: 10.83.195.8:1443/prometheus/pushgateway:1.8.0
        livenessProbe:
          failureThreshold: 10
          httpGet:
            path: /-/healthy
            port: 9091
            scheme: HTTP
          initialDelaySeconds: 10
          periodSeconds: 10
          successThreshold: 1
          timeoutSeconds: 30
        name: pushgateway-sdp
        ports:
        - containerPort: 9091
          name: http
        readinessProbe:
          failureThreshold: 15
          httpGet:
            path: /-/ready
            port: 9091
            scheme: HTTP
          initialDelaySeconds: 10
          periodSeconds: 10
          successThreshold: 1
          timeoutSeconds: 30
        resources:
          limits:
            cpu: 16
            memory: 32Gi
          requests:
            cpu: 4
            memory: 16G
      restartPolicy: Always
---
apiVersion: v1
kind: Service
metadata:
  labels:
    app.kubernetes.io/name: pushgateway-sdp
  name: pushgateway-sdp
  namespace: monitoring
spec:
  type: NodePort
  ports:
  - name: http
    nodePort: 30091
    port: 9091
    protocol: TCP
    targetPort: http
  selector:
    app.kubernetes.io/name: pushgateway-sdp
---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
  labels:
    app.kubernetes.io/name: pushgateway-sdp
  name: pushgateway-sdp
  namespace: monitoring
spec:
  endpoints:
  - interval: 60s
    scrapeTimeout: 50s
    path: /metrics
    port: http
    scheme: http
    relabelings:
    - sourceLabels:
      - pod 
      targetLabel: pod
      regex: "(.*)"
      replacement: pushgateway-sdp
    - sourceLabels:
      - instance
      targetLabel: instance
      regex: "(.*)"
      replacement: pushgateway-sdp
  selector:
    matchLabels:
      app.kubernetes.io/name: pushgateway-sdp

监控 K8S 集群外节点

---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
  labels:
    app.kubernetes.io/name: node-exporter
    cluster: hadoop-sdp
  name: hadoop-sdp
  namespace: monitoring
spec:
  endpoints:
  - interval: 60s
    port: metrcis
    relabelings:
    - sourceLabels:
      - __meta_kubernetes_service_label_dn_flag
      action: replace
      targetLabel: dn_flag
      regex: (.*)
    - sourceLabels:
      - __meta_kubernetes_service_label_nm_flag
      action: replace
      targetLabel: nm_flag
      regex: (.*)
    - sourceLabels:
      - __meta_kubernetes_service_label_cluster
      targetLabel: cluster
    - sourceLabels:
      - __address__
      targetLabel: ip
      regex: "(.*):(.*)"
      replacement: $1
  selector:
    matchLabels:
      app.kubernetes.io/name: node-exporter
      cluster: hadoop-sdp
  namespaceSelector:
    matchNames:
    - monitoring
---
apiVersion: v1
kind: Service
metadata:
  name: hadoop-sdp-dn
  labels:
    app.kubernetes.io/name: node-exporter
    dn_flag: "1"
    cluster: hadoop-sdp
  namespace: monitoring
spec:
  type: ClusterIP
  clusterIP: None
  ports:
  - name: metrcis
    port: 9100
    targetPort: 9100
    protocol: TCP
---
apiVersion: v1
kind: Endpoints
metadata:
  name: hadoop-sdp-dn
  labels:
    app.kubernetes.io/name: node-exporter
    dn_flag: "1"
    cluster: hadoop-sdp
  namespace: monitoring
subsets:
  - addresses:
    - ip: 10.83.192.9
    ports:
    - name: metrcis
      port: 9100
      protocol: TCP
---
apiVersion: v1
kind: Service
metadata:
  name: hadoop-sdp-nm
  labels:
    app.kubernetes.io/name: node-exporter
    nm_flag: "1"
    cluster: hadoop-sdp
  namespace: monitoring
spec:
  type: ClusterIP
  clusterIP: None
  ports:
  - name: metrcis
    port: 9100
    targetPort: 9100
    protocol: TCP
---
apiVersion: v1
kind: Endpoints
metadata:
  name: hadoop-sdp-nm
  labels:
    app.kubernetes.io/name: node-exporter
    nm_flag: "1"
    cluster: hadoop-sdp
  namespace: monitoring
subsets:
  - addresses:
    - ip: 10.83.192.9
    ports:
    - name: metrcis
      port: 9100
      protocol: TCP
---
apiVersion: v1
kind: Service
metadata:
  name: hadoop-sdp-nn
  labels:
    app.kubernetes.io/name: node-exporter
    cluster: hadoop-sdp
  namespace: monitoring
spec:
  type: ClusterIP
  clusterIP: None
  ports:
  - name: metrcis
    port: 9100
    targetPort: 9100
    protocol: TCP
---
apiVersion: v1
kind: Endpoints
metadata:
  name: hadoop-sdp-nn
  labels:
    app.kubernetes.io/name: node-exporter
    cluster: hadoop-sdp
  namespace: monitoring
subsets:
  - addresses:
    - ip: 10.83.192.6
    - ip: 10.83.192.7
    ports:
    - name: metrcis
      port: 9100
      protocol: TCP

集成 AlterManager

python接收alertmanager告警服务

# !/usr/bin/env python
# -*-coding:utf-8 -*-

"""
@ File       : 0513.py
@ Time       : 2024-05-13 15:43
@ Author     : Shylin Zhang
@ version    : python 3.6
@ Description:
"""
import logging
from flask import Flask, request
import json
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s',
    filename="./alertmanager-webhook-run.log"
)

app = Flask(__name__)


@app.route("/send/", methods=["POST"])
def send():
    try:
        data = json.loads(request.data)
        logging.info(data)
        alerts = data['alerts']
        for i in alerts:
            logging.info('SEND SMS: ' + str(i))
    except Exception as e:
        logging.error(e)
    return 'ok'


if __name__ == '__main__':
    app.run(host='0.0.0.0', port=8088)
# vim manifests/alertmanager-secret.yaml

apiVersion: v1
kind: Secret
metadata:
  labels:
    app.kubernetes.io/component: alert-router
    app.kubernetes.io/instance: main
    app.kubernetes.io/name: alertmanager
    app.kubernetes.io/part-of: kube-prometheus
    app.kubernetes.io/version: 0.24.0
  name: alertmanager-main
  namespace: monitoring
stringData:
  alertmanager.yaml: |-
    "global":
      "resolve_timeout": "5m"
    "inhibit_rules":
    - "equal":
      - "namespace"
      - "alertname"
      "source_matchers":
      - "severity = critical"
      "target_matchers":
      - "severity =~ warning|info"
    - "equal":
      - "namespace"
      - "alertname"
      "source_matchers":
      - "severity = warning"
      "target_matchers":
      - "severity = info"
    - "equal":
      - "namespace"
      "source_matchers":
      - "alertname = InfoInhibitor"
      "target_matchers":
      - "severity = info"
    "receivers":
    - "name": "alert-webhook"
      "webhook_configs":
      - "url": "http://10.83.195.6:8088/send/"  # python接收alertmanager告警服务
        "send_resolved": true
        "max_alerts": 0
    - "name": "Default"
    - "name": "Watchdog"
    - "name": "Critical"
    - "name": "null"
    "route":
      "group_by":
      - "namespace"
      - "alertname"
      "group_interval": "5m"
      "group_wait": "30s"
      "receiver": "alert-webhook"
      "repeat_interval": "24h"
      "routes":
      - "matchers":
        - "severity = critical"
        "receiver": "alert-webhook"
      - "matchers":
        - "severity = warning"
        "receiver": "alert-webhook"
      - "matchers":
        - "alertname = Watchdog"
        "receiver": "Watchdog"
      - "matchers":
        - "alertname = InfoInhibitor"
        "receiver": "null"
type: Opaque

配置告警规则 rule

# vim manifests/rules/hadoop-server.rule

apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
  labels:
    prometheus: k8s
    role: alert-rules
  name: prometheus-hadoop-rules
  namespace: monitoring
spec:
  groups:
  - name: hadoop.rules
    rules:
    - alert: 服务器内存使用率 > 80%
      annotations:
        description: 集群[ {{ $labels.cluster}} ] 节点[ {{ $labels.ip}} ] 内存使用率 {{ printf "%.2f" $value }}%
      expr: |
        (1 - (node_memory_MemAvailable_bytes{cluster=~"hadoop-sdp"} / (node_memory_MemTotal_bytes{cluster=~"hadoop-sdp"}))) * 100 > 50
      for: 30s
      labels:
        severity: critical
kubectl -n monitoring logs -l 'alertmanager=main' -c alertmanager
# vim manifests/rules/hadoop-server.rule

apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
  labels:
    prometheus: k8s
    role: alert-rules
  name: prometheus-hadoop-rules
  namespace: monitoring
spec:
  groups:
  - name: hadoop.rules
    rules:
    - alert: 服务器内存使用率 > 80%
      annotations:
        description: 集群[ {{ $labels.cluster}} ] 节点[ {{ $labels.ip}} ] 内存使用率 {{ printf "%.2f" $value }}%
      expr: |
        (1 - (node_memory_MemAvailable_bytes{cluster=~"hadoop-sdp"} / (node_memory_MemTotal_bytes{cluster=~"hadoop-sdp"}))) * 100 > 50
      for: 30s
      labels:
        severity: critical
kubectl -n monitoring logs -l 'alertmanager=main' -c alertmanager
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值