node_exporte:节点数据收集器
daemonset:保证每个节点都有一个收集器
prometheus:监控主程序
grafana:图形化
altermanager:告警模块
部署
首先创建命名空间,部署
首先创建目录prometheus再创建node_exporter
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: node-exporter
namespace: monitor-sa
labels:
name: node-exporter
spec:
selector:
matchLabels:
name: node-exporter
template:
metadata:
labels:
name: node-exporter
spec:
hostPID: true
hostIPC: true
hostNetwork: true
containers:
- name: node-exporter
image: prom/node-exporter
ports:
- containerPort: 9100
resources:
limits:
cpu: "0.5"
securityContext:
privileged: true
args:
- --path.procfs
- /host/proc
- --path.sysfs
- /host/sys
- --collector.filesystem.ignored-mount-points
- '"^/(sys|proc|dev|host|etc)($|/)"'
volumeMounts:
- name: dev
mountPath: /host/dev
- name: proc
mountPath: /host/proc
- name: sys
mountPath: /host/sys
- name: rootfs
mountPath: /rootfs
volumes:
- name: proc
hostPath:
path: /proc
- name: dev
hostPath:
path: /dev
- name: sys
hostPath:
path: /sys
- name: rootfs
hostPath:
path: /
拉不下镜像用这个
vim /etc/docker/daemon.json
查看
创建账号后绑定admin
kubectl create serviceaccount monitor -n monitor-sa
kubectl create clusterrolebinding monitor-clusterrolebinding -n monitor-sa --clusterrole=cluster-admin --serviceaccount=monitor-sa:monitor
配告警规则文件
配告警邮件
创建prometheus
apiVersion: v1
kind: Service
metadata:
name: prometheus
namespace: monitor-sa
labels:
app: prometheus
spec:
type: NodePort
ports:
- port: 9090
targetPort: 9090
protocol: TCP
selector:
app: prometheus
component: server
apiVersion: v1
kind: Service
metadata:
labels:
name: prometheus
kubernetes.io/cluster-service: 'true'
name: alertmanager
namespace: monitor-sa
spec:
ports:
- name: alertmanager
nodePort: 30066
port: 9093
protocol: TCP
targetPort: 9093
selector:
app: prometheus
sessionAffinity: None
type: NodePort
apiVersion: apps/v1 kind: Deployment metadata: name: prometheus-server namespace: monitor-sa labels: app: prometheus spec: replicas: 1 selector: matchLabels: app: prometheus component: server template: metadata: labels: app: prometheus component: server annotations: prometheus.io/scrape: 'false' spec: serviceAccountName: monitor initContainers: - name: init-chmod image: busybox:latest command: ['sh','-c','chmod -R 777 /prometheus;chmod -R 777 /etc'] volumeMounts: - mountPath: /prometheus name: prometheus-storage-volume - mountPath: /etc/localtime name: timezone containers: - name: prometheus image: prom/prometheus:v2.45.0 command: - prometheus - --config.file=/etc/prometheus/prometheus.yml - --storage.tsdb.path=/prometheus - --storage.tsdb.retention=720h - --web.enable-lifecycle ports: - containerPort: 9090 volumeMounts: - name: prometheus-config mountPath: /etc/prometheus/ - mountPath: /prometheus/ name: prometheus-storage-volume - name: timezone mountPath: /etc/localtime - name: k8s-certs mountPath: /var/run/secrets/kubernetes.io/k8s-certs/etcd/ - name: alertmanager image: prom/alertmanager:v0.20.0 args: - "--config.file=/etc/alertmanager/alertmanager.yml" - "--log.level=debug" ports: - containerPort: 9093 protocol: TCP name: alertmanager volumeMounts: - name: alertmanager-config mountPath: /etc/alertmanager - name: alertmanager-storage mountPath: /alertmanager - name: localtime mountPath: /etc/localtime volumes: - name: prometheus-config configMap: name: prometheus-config defaultMode: 0777 - name: prometheus-storage-volume hostPath: path: /data type: DirectoryOrCreate - name: k8s-certs secret: secretName: etcd-certs - name: timezone hostPath: path: /usr/share/zoneinfo/Asia/Shanghai - name: alertmanager-config configMap: name: alertmanager - name: alertmanager-storage hostPath: path: /data/alertmanager type: DirectoryOrCreate - name: localtime hostPath: path: /usr/share/zoneinfo/Asia/Shanghai
创建 secret
部署grafana
apiVersion: v1 kind: PersistentVolumeClaim metadata: name: grafana namespace: kube-system spec: accessModes: - ReadWriteMany storageClassName: nfs-client-storageclass resources: requests:
storage: 2Gi
apiVersion: apps/v1 kind: Deployment metadata: name: monitoring-grafana namespace: kube-system spec: replicas: 1 selector: matchLabels: task: monitoring k8s-app: grafana template: metadata: labels: task: monitoring k8s-app: grafana spec: containers: - name: grafana image: grafana/grafana:7.5.11 securityContext: runAsUser: 104 runAsGroup: 107 ports: - containerPort: 3000 protocol: TCP volumeMounts: - mountPath: /etc/ssl/certs name: ca-certificates readOnly: false - mountPath: /var name: grafana-storage - mountPath: /var/lib/grafana name: graf-test env: - name: INFLUXDB_HOST value: monitoring-influxdb - name: GF_SERVER_HTTP_PORT value: "3000" - name: GF_AUTH_BASIC_ENABLED value: "false" - name: GF_AUTH_ANONYMOUS_ENABLED value: "true" - name: GF_AUTH_ANONYMOUS_ORG_ROLE value: Admin - name: GF_SERVER_ROOT_URL value: / volumes: - name: ca-certificates hostPath: path: /etc/ssl/certs - name: grafana-storage emptyDir: {} - name: graf-test persistentVolumeClaim:
claimName: grafana
apiVersion: v1 kind: Service metadata: labels: name: monitoring-grafana namespace: kube-system spec: ports:
-
port: 80 targetPort: 3000 selector: k8s-app: grafana type: NodePort
自动发现pod
apiVersion: apps/v1
kind: Deployment
metadata:
name: hpa-test
labels:
hpa: test
spec:
replicas: 1
selector:
matchLabels:
hpa: test
template:
metadata:
labels:
hpa: test
spec:
containers:
- name: centos
image: centos:7 command: ["/bin/bash", "-c", "yum install -y stress --nogpgcheck && sleep 3600"]
volumeMounts:
- name: yum
mountPath: /etc/yum.repos.d/
volumes:
- name: yum