下载 prometheus
https://github.com/prometheus/prometheus/releases
把程序放到 /usr/bin/ 下即可
systemd启动脚本
启动脚本设置了数据存储在 /var/lib/prometheus/data,配置文件在 /etc/prometheus/prometheus.yml
[Unit]
Description=Prometheus service
After=network.target
Wants=network.target
[Service]
ExecStart=/usr/bin/prometheus --log.level=warn \
--web.enable-admin-api \
--storage.tsdb.path=/var/lib/prometheus/data \
--config.file=/etc/prometheus/prometheus.yml
ExecReload=/usr/bin/kill -HUP $MAINPID
#PIDFile=/var/run/prometheus.pid
WorkingDirectory=/home/s/apps/prometheus
Restart=always
RestartSec=20
TimeoutSec=300
User=nobody
Group=nobody
LimitNOFILE=655360
StandardOutput=journal
StandardError=journal
[Install]
WantedBy=multi-user.target
监控 K8S
准备
k8s证书
k8s的证书,在 /etc/kubernetes/pki/ 下,需要把 CA 证书,还有 apiserver 的 client 证书 ( apiserver-kubelet-client.{crt,key} ) 拷贝到 /etc/prometheus/ssl/cluster/ 下,路径同配置文件内的路径。
k8s管理员token
创建管理员用户
# cat admin-user.yaml
apiVersion: v1
kind: ServiceAccount
metadata:
name: admin-user
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: admin-user
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: cluster-admin
subjects:
- kind: ServiceAccount
name: admin-user
namespace: kube-system
# kubectl apply -f admin-user.yaml
获取token
# token=`kubectl describe secret -n kube-system $(kubectl get secret -n kube-system | egrep admin-user | awk '{print $1}') | egrep 'token:' | awk '{print $2}'`
将token写入到文件中
# echo -n $token | sudo tee /etc/prometheus/ssl/cluster/token
准备配置文件
global:
scrape_interval: 30s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
scrape_timeout: 28s
evaluation_interval: 30s # Evaluate rules every 15 seconds. The default is every 1 minute.
# scrape_timeout is set to the global default (10s).
# Attach these labels to any time series or alerts when communicating with
# external systems (federation, remote storage, Alertmanager).
external_labels:
monitor: 'codelab-monitor'
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
- job_name: 'prometheus'
# metrics_path defaults to '/metrics'
# scheme defaults to 'http'.
static_configs:
- targets: ['localhost:9090']
- job_name: 'kubernetes-cluster'
scheme: https
tls_config:
ca_file: ssl/cluster/ca.crt
cert_file: ssl/cluster/apiserver-kubelet-client.crt
key_file: ssl/cluster/apiserver-kubelet-client.key
kubernetes_sd_configs:
- api_server: https://10.252.16.24:6443 # apiserver 地址,参考kubeconfig
bearer_token_file: ssl/cluster/token
tls_config:
ca_file: ssl/cluster/ca.crt
role: node
relabel_configs:
- target_label: __address__
replacement: 10.252.16.24:6443 # apiserver 地址,参考kubeconfig
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics
- target_label: env
replacement: cluster
- job_name: 'kubernetes-cluster-pods'
scheme: https
tls_config:
ca_file: ssl/cluster/ca.crt
cert_file: ssl/cluster/apiserver-kubelet-client.crt
key_file: ssl/cluster/apiserver-kubelet-client.key
insecure_skip_verify: true
kubernetes_sd_configs:
- api_server: https://10.252.16.24:6443 # apiserver 地址,参考kubeconfig
role: pod
tls_config:
ca_file: ssl/cluster/ca.crt
cert_file: ssl/cluster/apiserver-kubelet-client.crt
key_file: ssl/cluster/apiserver-kubelet-client.key
relabel_configs:
- target_label: __metrics_path__
replacement: /metrics/cadvisor
- source_labels: [__meta_kubernetes_pod_node_name]
target_label: __address__
regex: (.+)
replacement: ${1}:10250
- source_labels: [__meta_kubernetes_pod_node_name]
target_label: instance
- target_label: env
replacement: cluster
启动 prometheus
# systemctl daemon-reload
# systemctl start prometheus