用pod部署prometheus

最新推荐文章于 2024-10-03 13:08:11 发布

星殇曦落

最新推荐文章于 2024-10-03 13:08:11 发布

阅读量690

点赞数 12

文章标签： prometheus kubernetes 容器 docker

本文链接：https://blog.csdn.net/m0_71178834/article/details/142360882

版权

用pod部署prometheus

node_exporter

节点数据收集器

daemonset ————> 保证每个节点都有一个收集器

prometheus————>监控主程序

grafana————>图形化

altermanager————>告警模块

[root@master01 ~]# kubectl create ns monitor-sa
namespace/monitor-sa created
[root@master01 opt]# mkdir prometheus
[root@master01 opt]# cd prometheus/
[root@master01 prometheus]# vim node_exporter.yaml

apiVersion: apps/v1
kind: DaemonSet
metadata:
  name: node-exporter
  namespace: monitor-sa
  labels:
    name: node-exporter
spec:
  selector:
    matchLabels:
     name: node-exporter
  template:
    metadata:
      labels:
        name: node-exporter
    spec:
      hostPID: true
      hostIPC: true
      hostNetwork: true
      containers:
      - name: node-exporter
        image: prom/node-exporter:v1
        ports:
        - containerPort: 9100
        resources:
          limits:
            cpu: "0.5"
        securityContext:
          privileged: true
        args:
        - --path.procfs
        - /host/proc
        - --path.sysfs
        - /host/sys
        - --collector.filesystem.ignored-mount-points
        - '"^/(sys|proc|dev|host|etc)($|/)"'
        volumeMounts:
        - name: dev
          mountPath: /host/dev
        - name: proc
          mountPath: /host/proc
        - name: sys
          mountPath: /host/sys
        - name: rootfs
          mountPath: /rootfs
      volumes:
        - name: proc
          hostPath:
            path: /proc
        - name: dev
          hostPath:
            path: /dev
        - name: sys
          hostPath:
            path: /sys
        - name: rootfs
          hostPath:
            path: /
[root@master01 prometheus]# kubectl apply -f node_exporter.yaml 
daemonset.apps/node-exporter created
[root@master01 prometheus]# kubectl get pod -n monitor-sa -o wide    
node-exporter-99vhd   1/1     Running   0          15s   192.168.60.120   node01     <none>           <none>
node-exporter-c6md9   1/1     Running   0          15s   192.168.60.130   node02     <none>           <none>
node-exporter-f29fh   1/1     Running   0          15s   192.168.60.110   master01   <none>           <none>

#创建两个账号
[root@master01 prometheus]# kubectl create serviceaccount monitor -n monitor-sa
[root@master01 prometheus]# kubectl create clusterrolebinding monitor-clusterrolebinding -n monitor-sa --clusterrole=cluster-admin  --serviceaccount=monitor-sa:monitor

[root@master01 prometheus]# kubectl create clusterrolebinding monitor-clusterrolebinding -n monitor-sa --clusterrole=cluster-admin  --serviceaccount=monitor-sa:monitor
[root@master01 prometheus]# rz -E
rz waiting to receive.
[root@master01 prometheus]# ls
node_exporter.yaml  prometheus-alertmanager-cfg.yaml
[root@master01 prometheus]# vim prometheus-alertmanager-cfg.yaml
      - targets: ['192.168.60.110:10251']
    - job_name: 'kubernetes-controller-manager'
      scrape_interval: 5s
      static_configs:
      - targets: ['192.168.60.110:10252']
    - job_name: 'kubernetes-kube-proxy'
      scrape_interval: 5s
      static_configs:
      - targets: ['192.168.60.110:10249','192.168.60.120:10249','192.168.60.130:10249']
    - job_name: 'kubernetes-etcd'
      scheme: https
      tls_config:
        ca_file: /var/run/secrets/kubernetes.io/k8s-certs/etcd/ca.crt
        cert_file: /var/run/secrets/kubernetes.io/k8s-certs/etcd/server.crt
        key_file: /var/run/secrets/kubernetes.io/k8s-certs/etcd/server.key
      scrape_interval: 5s
      static_configs:
      - targets: ['192.168.60.110:2379']

      - alert: HighPodCpuUsage
#告警邮件的标题
        expr: sum(rate(container_cpu_usage_seconds_total{namespace="default", pod=~".+"}[5m])) by (pod) > 0.9
#收集指标数据
        for: 5m
#占用90%cpu的持续时间5M。告警
        labels:
          severity: warning
        annotations:
#告警的内容

[root@master01 prometheus]# vim alter-mail.yaml
kind: ConfigMap
apiVersion: v1
metadata:
  name: alertmanager
  namespace: monitor-sa
data:
  alertmanager.yml: |-
    global:
      resolve_timeout: 1m
      smtp_smarthost: 'smtp.qq.com:25'
      smtp_from: '1647629457@qq.com'
      smtp_auth_username: '1647629457@qq.com'
      smtp_auth_password: 'mhmjbfjydbuhecea'
      smtp_require_tls: false
    route:
      group_by: [alertname]
      group_wait: 10s
      group_interval: 10s
      repeat_interval: 10m 
      receiver: default-receiver
    receivers:
    - name: 'default-receiver'
      email_configs:
      - to: '1647629457@qq.com'
        send_resolved: true

[root@master01 prometheus]# vim prometheus-svc.yaml
apiVersion: v1
kind: Service
metadata:
  name: prometheus
  namespace: monitor-sa
  labels:
    app: prometheus
spec:
  type: NodePort
  ports:
    - port: 9090
      targetPort: 9090
      protocol: TCP
  selector:
    app: prometheus
    component: server
    
[root@master01 prometheus]# vim prometheus-alter.yaml
apiVersion: v1
kind: Service
metadata:
  labels:
    name: prometheus
    kubernetes.io/cluster-service: 'true'
  name: alertmanager
  namespace: monitor-sa
spec:
  ports:
  - name: alertmanager
    nodePort: 30066
    port: 9093
    protocol: TCP
    targetPort: 9093
  selector:
    app: prometheus
  sessionAffinity: None
  type: NodePort
  
[root@master01 prometheus]# vim prometheus-deploy.yaml  
apiVersion: apps/v1
kind: Deployment
metadata:
  name: prometheus-server
  namespace: monitor-sa
  labels:
    app: prometheus
spec:
  replicas: 1
  selector:
    matchLabels:
      app: prometheus
      component: server
  template:
    metadata:
      labels:
        app: prometheus
        component: server
      annotations:
        prometheus.io/scrape: 'false'
    spec:
      serviceAccountName: monitor
      initContainers:
      - name: init-chmod
        image: busybox:latest
        command: ['sh','-c','chmod -R 777 /prometheus;chmod -R 777 /etc']
        volumeMounts:
        - mountPath: /prometheus
          name: prometheus-storage-volume
        - mountPath: /etc/localtime
          name: timezone
      containers:
      - name: prometheus
        image: prom/prometheus:v2.45.0
        command:
          - prometheus
          - --config.file=/etc/prometheus/prometheus.yml
          - --storage.tsdb.path=/prometheus
          - --storage.tsdb.retention=720h
          - --web.enable-lifecycle
        ports:
        - containerPort: 9090
        volumeMounts:
        - name: prometheus-config
          mountPath: /etc/prometheus/
        - mountPath: /prometheus/
          name: prometheus-storage-volume
        - name: timezone
          mountPath: /etc/localtime
        - name: k8s-certs
          mountPath: /var/run/secrets/kubernetes.io/k8s-certs/etcd/
      - name: alertmanager
        image: prom/alertmanager:v0.20.0
        args:
        - "--config.file=/etc/alertmanager/alertmanager.yml"
        - "--log.level=debug"
        ports:
        - containerPort: 9093
          protocol: TCP
          name: alertmanager
        volumeMounts:
        - name: alertmanager-config
          mountPath: /etc/alertmanager
        - name: alertmanager-storage
          mountPath: /alertmanager
        - name: localtime
          mountPath: /etc/localtime
      volumes:
        - name: prometheus-config
          configMap:
            name: prometheus-config
            defaultMode: 0777
        - name: prometheus-storage-volume
          hostPath:
            path: /data
            type: DirectoryOrCreate
        - name: k8s-certs
          secret:
            secretName: etcd-certs
        - name: timezone
          hostPath:
            path: /usr/share/zoneinfo/Asia/Shanghai
        - name: alertmanager-config
          configMap:
            name: alertmanager
        - name: alertmanager-storage
          hostPath:
            path: /data/alertmanager
            type: DirectoryOrCreate
        - name: localtime
          hostPath:
            path: /usr/share/zoneinfo/Asia/Shanghai
            
kubectl apply -f prometheus-deploy.yaml
kubectl apply -f prometheus-svc.yaml
kubectl apply -f prometheus-alter.yaml

#生成证书
kubectl -n monitor-sa create secret generic etcd-certs --from-file=/etc/kubernetes/pki/etcd/server.key --from-file=/etc/kubernetes/pki/etcd/server.crt --from-file=/etc/kubernetes/pki/etcd/ca.crt

[root@master01 prometheus]# kubectl get pod -n monitor-sa -o wide
NAME                                 READY   STATUS    RESTARTS   AGE     IP               NODE       NOMINATED NODE   READINESS GATES
node-exporter-99vhd                  1/1     Running   0          140m    192.168.60.120   node01     <none>           <none>
node-exporter-c6md9                  1/1     Running   0          140m    192.168.60.130   node02     <none>           <none>
node-exporter-f29fh                  1/1     Running   0          140m    192.168.60.110   master01   <none>           <none>
prometheus-server-55d866cb44-wrrbx   2/2     Running   0          5m29s   10.244.2.34      node02     <none>           <none>
[root@master01 prometheus]# kubectl get svc -n monitor-sa -o wide
NAME           TYPE       CLUSTER-IP      EXTERNAL-IP   PORT(S)          AGE    SELECTOR
alertmanager   NodePort   10.96.153.49    <none>        9093:30066/TCP   6m     app=prometheus
prometheus     NodePort   10.96.215.253   <none>        9090:31758/TCP   6m1s   app=prometheus,component=server

[root@master01 prometheus]# vim pro-gra.yml
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  name: grafana
  namespace: kube-system
spec:
  accessModes:
    - ReadWriteMany
  storageClassName: nfs-client-storageclass
  resources:
    requests:
      storage: 2Gi
---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: monitoring-grafana
  namespace: kube-system
spec:
  replicas: 1
  selector:
    matchLabels:
      task: monitoring
      k8s-app: grafana
  template:
    metadata:
      labels:
        task: monitoring
        k8s-app: grafana
    spec:
      containers:
      - name: grafana
        image: grafana/grafana:7.5.11
        securityContext:
          runAsUser: 104
          runAsGroup: 107
        ports:
        - containerPort: 3000
          protocol: TCP
        volumeMounts:
        - mountPath: /etc/ssl/certs
          name: ca-certificates
          readOnly: false
        - mountPath: /var
          name: grafana-storage
        - mountPath: /var/lib/grafana
          name: graf-test
        env:
        - name: INFLUXDB_HOST
          value: monitoring-influxdb
        - name: GF_SERVER_HTTP_PORT
          value: "3000"
        - name: GF_AUTH_BASIC_ENABLED
          value: "false"
        - name: GF_AUTH_ANONYMOUS_ENABLED
          value: "true"
        - name: GF_AUTH_ANONYMOUS_ORG_ROLE
          value: Admin
        - name: GF_SERVER_ROOT_URL
          value: /
      volumes:
      - name: ca-certificates
        hostPath:
          path: /etc/ssl/certs
      - name: grafana-storage
        emptyDir: {}
      - name: graf-test
        persistentVolumeClaim:
          claimName: grafana
---
apiVersion: v1
kind: Service
metadata:
  labels:
  name: monitoring-grafana
  namespace: kube-system
spec:
  ports:
  - port: 80
    targetPort: 3000
  selector:
    k8s-app: grafana
  type: NodePort
  
[root@master01 prometheus]# kubectl apply -f pro-gra.yml 
[root@master01 prometheus]# kubectl get svc -n kube-system 
NAME                 TYPE        CLUSTER-IP      EXTERNAL-IP   PORT(S)                  AGE
kube-dns             ClusterIP   10.96.0.10      <none>        53/UDP,53/TCP,9153/TCP   16d
monitoring-grafana   NodePort    10.96.220.147   <none>        80:31771/TCP             12s

//处理 kube-proxy 监控告警
kubectl edit configmap kube-proxy -n kube-system
......
metricsBindAddress: "0.0.0.0:10249"
#因为 kube-proxy 默认端口10249是监听在 127.0.0.1 上的，需要改成监听到物理节点上

#重新启动 kube-proxy
kubectl get pods -n kube-system | grep kube-proxy |awk '{print $1}' | xargs kubectl delete pods -n kube-system

测试：

[root@master01 prometheus]# vim ylce.yml

apiVersion: apps/v1
kind: Deployment
metadata:
  name: hpa-test
  labels:
    hpa: test
spec:
  replicas: 1
  selector:
    matchLabels:
      hpa: test
  template:
    metadata:
      labels:
        hpa: test
    spec:
      containers:
      - name: centos
        image: centos:7
        command: ["/bin/bash", "-c", "yum install -y stress --nogpgcheck && sleep 3600"]
        volumeMounts:
        - name: yum
          mountPath: /etc/yum.repos.d/
      volumes:
      - name: yum
        hostPath:
          path: /etc/yum.repos.d/