Nightingale-k8s组件监控实践
夜莺官网监控k8s的链接:Kubernetes监控专栏
使用prometheus-agent进行指标采集
创建认证授权信息RBAC
创建一个namespace来做夜莺监控采集指标
[root@ak8s-01 ~]# kubectl create ns flashcat
创建一个sa名为categraf,绑定resources的verbs权限,让categraf有足够的权限来获取k8s的各个组件的指标
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: categraf
rules:
- apiGroups: [""]
resources:
- nodes
- nodes/metrics
- nodes/stats
- nodes/proxy
- services
- endpoints
- pods
verbs: ["get", "list", "watch"]
- apiGroups:
- extensions
- networking.k8s.io
resources:
- ingresses
verbs: ["get", "list", "watch"]
- nonResourceURLs: ["/metrics", "/metrics/cadvisor"]
verbs: ["get"]
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: categraf
namespace: flashcat
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: categraf
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: categraf
subjects:
- kind: ServiceAccount
name: categraf
namespace: flashcat
配置Prometheus的配置文件
apiVersion: v1
kind: ConfigMap
metadata:
name: prometheus-agent-conf
labels:
name: prometheus-agent-conf
namespace: flashcat
data:
prometheus.yml: |-
global:
scrape_interval: 15s
evaluation_interval: 15s
scrape_configs:
# 监控 APIserver
- job_name: 'apiserver'
kubernetes_sd_configs:
- role: endpoints
scheme: https
tls_config:
insecure_skip_verify: true
authorization:
credentials_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
action: keep
regex: default;kubernetes;https
# 监控 controller-manager
- job_name: 'controller-manager'
kubernetes_sd_configs:
- role: endpoints
scheme: https
tls_config:
insecure_skip_verify: true
authorization:
credentials_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
action: keep
regex: kube-system;kube-controller-manager;https-metrics
# 监控 scheduler
- job_name: 'scheduler'
kubernetes_sd_configs:
- role: endpoints
scheme: https
tls_config:
insecure_skip_verify: true
authorization:
credentials_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
action: keep
regex: kube-system;kube-scheduler;https
# 监控 etcd
- job_name: 'etcd'
kubernetes_sd_configs:
- role: endpoints
scheme: http
relabel_configs:
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
action: keep
regex: kube-system;etcd;http
# 监控 kubelet
- job_name: 'kubelet'
kubernetes_sd_configs:
- role: endpoints
scheme: https
tls_config:
insecure_skip_verify: true
authorization:
credentials_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
action: keep
regex: kube-system;kube-kubelet;https
# 监控 kube-proxy
- job_name: 'kube-proxy'
kubernetes_sd_configs:
- role: endpoints
scheme: http
relabel_configs:
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
action: keep
regex: kube-system;kube-proxy;http
remote_write:
- url: 'http://192.168.100.16:17000/prometheus/v1/write' # 夜莺的地址
部署Prometehus Agent
apiVersion: apps/v1
kind: Deployment
metadata:
name: prometheus-agent
namespace: flashcat
labels:
app: prometheus-agent
spec:
replicas: 1
selector:
matchLabels:
app: prometheus-agent
template:
metadata:
labels:
app: prometheus-agent
spec:
serviceAccountName: categraf
containers:
- name: prometheus
image: prom/prometheus
args:
- "--config.file=/etc/prometheus/prometheus.yml"
- "--web.enable-lifecycle"
- "--enable-feature=agent" # 表示启动的是 agent 模式。
ports:
- containerPort: 9090
resources:
requests:
cpu: 500m
memory: 500M
limits:
cpu: 1
memory: 1Gi
volumeMounts:
- name: prometheus-config-volume
mountPath: /etc/prometheus/
- name: prometheus-storage-volume
mountPath: /prometheus/
volumes:
- name: prometheus-config-volume
configMap:
defaultMode: 420
name: prometheus-agent-conf
- name: prometheus-storage-volume
emptyDir: {}
更改的bind-address k8s是kubeadm安装的,要去manifests文件把bind-address更改为0.0.0.0)
[root@ak8s-01]# vim /etc/kubernetes/manifests/kube-controller-manager.yaml
....
- --bind-address=0.0.0.0 ##找到bind-address 把127.0.0.1 改为 0.0.0.0
[root@ak8s-01]# vim /etc/kubernetes/manifests/kube-scheduler.yaml
......
- --bind-address=0.0.0.0 # 更改为0.0.0.0即可
[root@ak8s-01]# vim /etc/kubernetes/manifests/etcd.yaml
······
- --listen-metrics-urls=http://0.0.0.0:2381 # 改为0.0.0.0
[root@ak8s-01]# kubectl edit cm -n kube-system kube-proxy
······
metricsBindAddress: "0.0.0.0"
创建service
# vim controller-manager-service.yaml
apiVersion: v1
kind: Service
metadata:
annotations:
labels:
k8s-app: kube-controller-manager
name: kube-controller-manager
namespace: kube-system
spec:
clusterIP: None
ports:
- name: https-metrics
port: 10257
protocol: TCP
targetPort: 10257
selector:
component: kube-controller-manager
sessionAffinity: None
type: ClusterIP
# vim scheduler-service.yaml
apiVersion: v1
kind: Service
metadata:
labels:
k8s-app: kube-scheduler
name: kube-scheduler
namespace: kube-system
spec:
clusterIP: None
ports:
- name: https
port: 10259
protocol: TCP
targetPort: 10259
selector:
component: kube-scheduler
sessionAffinity: None
type: ClusterIP
# vim etcd-service.yaml
apiVersion: v1
kind: Service
metadata:
namespace: kube-system
name: etcd
labels:
k8s-app: etcd
spec:
selector:
component: etcd
type: ClusterIP
clusterIP: None
ports:
- name: http
port: 2381
targetPort: 2381
protocol: TCP
# vim kubelet-service.yaml
apiVersion: v1
kind: Service
metadata:
labels:
k8s-app: kubelet
name: kube-kubelet
namespace: kube-system
spec:
clusterIP: None
ports:
- name: https
port: 10250
protocol: TCP
targetPort: 10250
sessionAffinity: None
type: ClusterIP
---
apiVersion: v1
kind: Endpoints
metadata:
labels:
k8s-app: kubelet
name: kube-kubelet
namespace: kube-system
subsets:
- addresses:
- ip: 192.168.100.10
- ip: 192.168.100.11
ports:
- name: https
port: 10250
protocol: TCP
# vim kube-proxy-service.yaml
kind: Service
metadata:
labels:
k8s-app: proxy
name: kube-proxy
namespace: kube-system
spec:
clusterIP: None
selector:
k8s-app: kube-proxy
ports:
- name: http
port: 10249
protocol: TCP
targetPort: 10249
sessionAffinity: None
type: ClusterIP
监控大盘json文件链接
apiserver: https://github.com/flashcatcloud/categraf/blob/main/k8s/apiserver-dash.json
ControllerManager :https://github.com/flashcatcloud/categraf/blob/main/k8s/cm-dash.json
Scheduler:https://github.com/flashcatcloud/categraf/blob/main/k8s/scheduler-dash.json
etcd:https://github.com/flashcatcloud/categraf/blob/main/k8s/etcd-dash.json
kubelet:https://github.com/flashcatcloud/categraf/blob/main/inputs/kubelet/dashboard-by-ident.json
kube-proxy:https://github.com/flashcatcloud/categraf/blob/main/inputs/kube_proxy/dashboard-by-ident.json