1、下载git
yum -y install git
2、拉取prometheus
git clone https://github.com/prometheus-operator/kube-prometheus.git -b release-0.11
### kubenetes的版本为:kubectl version ----- "v1.23.6"
###拉取完成后在:当前路径下的kube-prometheus中
cd kube-prometheus/
### 注:在release-0.11版本之后新增了NetworkPolicy
默认是允许自己访问,如果了解NetworkPolicy可以修改一下默认的规则,可以用查看 ls *networkPolicy*,如果不修改的话则会影响到修改NodePort类型也无法访问
不会Networkpolicy可以直接删除/备份
cd kube-prometheus/manifests
ls *networkPolicy*
mv *networkPolicy.yaml /tmp #移动到tmp目录下,备份
3、查看namespace
cd kube-prometheus/manifests/setup
cat namespace.yaml
apiVersion: v1
kind: Namespace
metadata:
name: monitoring
### 使用monitoring的网络命名空间
4、安装部署
[root@master kube-prometheus]#
kubectl apply --server-side -f manifests/setup
until kubectl get servicemonitors --all-namespaces ; do date; sleep 1; echo ""; done
kubectl apply -f manifests/
### [root@master kube-prometheus]# kubectl apply --server-side -f manifests/setup
customresourcedefinition.apiextensions.k8s.io/alertmanagerconfigs.monitoring.coreos.com serverside-applied
customresourcedefinition.apiextensions.k8s.io/alertmanagers.monitoring.coreos.com serverside-applied
customresourcedefinition.apiextensions.k8s.io/podmonitors.monitoring.coreos.com serverside-applied
customresourcedefinition.apiextensions.k8s.io/probes.monitoring.coreos.com serverside-applied
customresourcedefinition.apiextensions.k8s.io/prometheuses.monitoring.coreos.com serverside-applied
customresourcedefinition.apiextensions.k8s.io/prometheusrules.monitoring.coreos.com serverside-applied
customresourcedefinition.apiextensions.k8s.io/servicemonitors.monitoring.coreos.com serverside-applied
customresourcedefinition.apiextensions.k8s.io/thanosrulers.monitoring.coreos.com serverside-applied
namespace/monitoring serverside-applied
### [root@master kube-prometheus]# until kubectl get servicemonitors --all-namespaces ; do date; sleep 1; echo ""; done
No resources found
### [root@master kube-prometheus]# kubectl apply -f manifests/
alertmanager.monitoring.coreos.com/main created
networkpolicy.networking.k8s.io/alertmanager-main created
poddisruptionbudget.policy/alertmanager-main created
prometheusrule.monitoring.coreos.com/alertmanager-main-rules created
secret/alertmanager-main created
service/alertmanager-main created
serviceaccount/alertmanager-main created
servicemonitor.monitoring.coreos.com/alertmanager-main created
clusterrole.rbac.authorization.k8s.io/blackbox-exporter created
clusterrolebinding.rbac.authorization.k8s.io/blackbox-exporter created
configmap/blackbox-exporter-configuration created
deployment.apps/blackbox-exporter created
networkpolicy.networking.k8s.io/blackbox-exporter created
service/blackbox-exporter created
serviceaccount/blackbox-exporter created
servicemonitor.monitoring.coreos.com/blackbox-exporter created
secret/grafana-config created
secret/grafana-datasources created
configmap/grafana-dashboard-alertmanager-overview created
configmap/grafana-dashboard-apiserver created
configmap/grafana-dashboard-cluster-total created
configmap/grafana-dashboard-controller-manager created
configmap/grafana-dashboard-grafana-overview created
configmap/grafana-dashboard-k8s-resources-cluster created
configmap/grafana-dashboard-k8s-resources-namespace created
configmap/grafana-dashboard-k8s-resources-node created
configmap/grafana-dashboard-k8s-resources-pod created
configmap/grafana-dashboard-k8s-resources-workload created
configmap/grafana-dashboard-k8s-resources-workloads-namespace created
configmap/grafana-dashboard-kubelet created
configmap/grafana-dashboard-namespace-by-pod created
configmap/grafana-dashboard-namespace-by-workload created
configmap/grafana-dashboard-node-cluster-rsrc-use created
configmap/grafana-dashboard-node-rsrc-use created
configmap/grafana-dashboard-nodes created
configmap/grafana-dashboard-persistentvolumesusage created
configmap/grafana-dashboard-pod-total created
configmap/grafana-dashboard-prometheus-remote-write created
configmap/grafana-dashboard-prometheus created
configmap/grafana-dashboard-proxy created
configmap/grafana-dashboard-scheduler created
configmap/grafana-dashboard-workload-total created
configmap/grafana-dashboards created
deployment.apps/grafana created
networkpolicy.networking.k8s.io/grafana created
prometheusrule.monitoring.coreos.com/grafana-rules created
service/grafana created
serviceaccount/grafana created
servicemonitor.monitoring.coreos.com/grafana created
prometheusrule.monitoring.coreos.com/kube-prometheus-rules created
clusterrole.rbac.authorization.k8s.io/kube-state-metrics created
clusterrolebinding.rbac.authorization.k8s.io/kube-state-metrics created
deployment.apps/kube-state-metrics created
networkpolicy.networking.k8s.io/kube-state-metrics created
prometheusrule.monitoring.coreos.com/kube-state-metrics-rules created
service/kube-state-metrics created
serviceaccount/kube-state-metrics created
servicemonitor.monitoring.coreos.com/kube-state-metrics created
prometheusrule.monitoring.coreos.com/kubernetes-monitoring-rules created
servicemonitor.monitoring.coreos.com/kube-apiserver created
servicemonitor.monitoring.coreos.com/coredns created
servicemonitor.monitoring.coreos.com/kube-controller-manager created
servicemonitor.monitoring.coreos.com/kube-scheduler created
servicemonitor.monitoring.coreos.com/kubelet created
clusterrole.rbac.authorization.k8s.io/node-exporter created
clusterrolebinding.rbac.authorization.k8s.io/node-exporter created
daemonset.apps/node-exporter created
networkpolicy.networking.k8s.io/node-exporter created
prometheusrule.monitoring.coreos.com/node-exporter-rules created
service/node-exporter created
serviceaccount/node-exporter created
servicemonitor.monitoring.coreos.com/node-exporter created
clusterrole.rbac.authorization.k8s.io/prometheus-k8s created
clusterrolebinding.rbac.authorization.k8s.io/prometheus-k8s created
networkpolicy.networking.k8s.io/prometheus-k8s created
poddisruptionbudget.policy/prometheus-k8s created
prometheus.monitoring.coreos.com/k8s created
prometheusrule.monitoring.coreos.com/prometheus-k8s-prometheus-rules created
rolebinding.rbac.authorization.k8s.io/prometheus-k8s-config created
rolebinding.rbac.authorization.k8s.io/prometheus-k8s created
rolebinding.rbac.authorization.k8s.io/prometheus-k8s created
rolebinding.rbac.authorization.k8s.io/prometheus-k8s created
role.rbac.authorization.k8s.io/prometheus-k8s-config created
role.rbac.authorization.k8s.io/prometheus-k8s created
role.rbac.authorization.k8s.io/prometheus-k8s created
role.rbac.authorization.k8s.io/prometheus-k8s created
service/prometheus-k8s created
serviceaccount/prometheus-k8s created
servicemonitor.monitoring.coreos.com/prometheus-k8s created
apiservice.apiregistration.k8s.io/v1beta1.metrics.k8s.io created
clusterrole.rbac.authorization.k8s.io/prometheus-adapter created
clusterrole.rbac.authorization.k8s.io/system:aggregated-metrics-reader created
clusterrolebinding.rbac.authorization.k8s.io/prometheus-adapter created
clusterrolebinding.rbac.authorization.k8s.io/resource-metrics:system:auth-delegator created
clusterrole.rbac.authorization.k8s.io/resource-metrics-server-resources created
configmap/adapter-config created
deployment.apps/prometheus-adapter created
networkpolicy.networking.k8s.io/prometheus-adapter created
poddisruptionbudget.policy/prometheus-adapter created
rolebinding.rbac.authorization.k8s.io/resource-metrics-auth-reader created
service/prometheus-adapter created
serviceaccount/prometheus-adapter created
servicemonitor.monitoring.coreos.com/prometheus-adapter created
clusterrole.rbac.authorization.k8s.io/prometheus-operator created
clusterrolebinding.rbac.authorization.k8s.io/prometheus-operator created
deployment.apps/prometheus-operator created
networkpolicy.networking.k8s.io/prometheus-operator created
prometheusrule.monitoring.coreos.com/prometheus-operator-rules created
service/prometheus-operator created
serviceaccount/prometheus-operator created
servicemonitor.monitoring.coreos.com/prometheus-operator created
5、查看是否都running:
[root@master manifests]# kubectl get pod -n monitoring
NAME READY STATUS RESTARTS AGE
alertmanager-main-0 2/2 Running 0 22m
alertmanager-main-1 2/2 Running 0 22m
alertmanager-main-2 2/2 Running 0 22m
blackbox-exporter-746c64fd88-dk4qt 3/3 Running 0 23m
grafana-5fc7f9f55d-n7szn 1/1 Running 0 23m
kube-state-metrics-6c8846558c-d8j62 2/3 ImagePullBackOff 0 23m
node-exporter-h7f27 2/2 Running 0 23m
node-exporter-j8lhq 2/2 Running 0 23m
node-exporter-w65kk 2/2 Running 0 23m
prometheus-adapter-6455646bdc-8c26s 0/1 ImagePullBackOff 0 23m
prometheus-adapter-6455646bdc-nwnsl 0/1 ImagePullBackOff 0 23m
prometheus-k8s-0 2/2 Running 0 22m
prometheus-k8s-1 2/2 Running 0 22m
prometheus-operator-f59c8b954-fvgxr 2/2 Running 0 23m
6、拉取失败的,修改镜像(根据失败镜像的名称,在kube-prometheus/manifests找对应的文件,以deployment.yaml结尾)
[root@master manifests]# vim kubeStateMetrics-deployment.yaml
image: bitnami/kube-state-metrics:2.5.0
[root@master manifests]# kubectl apply -f kubeStateMetrics-deployment.yaml
[root@master manifests]# vim prometheusAdapter-deployment.yaml
image: willdockerhub/prometheus-adapter:v0.9.1
[root@master manifests]# :kubectl apply -f prometheusAdapter-deployment.yaml
[root@master manifests]# kubectl get pod -n monitoring
NAME READY STATUS RESTARTS AGE
alertmanager-main-0 2/2 Running 0 83m
alertmanager-main-1 2/2 Running 0 83m
alertmanager-main-2 2/2 Running 0 83m
blackbox-exporter-746c64fd88-dk4qt 3/3 Running 0 85m
grafana-5fc7f9f55d-n7szn 1/1 Running 0 85m
kube-state-metrics-6d7746678c-stq6g 3/3 Running 0 2m34s
node-exporter-h7f27 2/2 Running 0 85m
node-exporter-j8lhq 2/2 Running 0 85m
node-exporter-w65kk 2/2 Running 0 85m
prometheus-adapter-c76fb84d7-6fq47 1/1 Running 0 60s
prometheus-adapter-c76fb84d7-dqs2k 0/1 Running 0 60s
prometheus-k8s-0 2/2 Running 0 83m
prometheus-k8s-1 2/2 Running 0 83m
prometheus-operator-f59c8b954-fvgxr 2/2 Running 0 85m
7、查看service(svc)
[root@master manifests]# kubectl get service -n monitoring
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
alertmanager-main ClusterIP 10.97.204.202 <none> 9093/TCP,8080/TCP 93m
alertmanager-operated ClusterIP None <none> 9093/TCP,9094/TCP,9094/UDP 92m
blackbox-exporter ClusterIP 10.97.242.103 <none> 9115/TCP,19115/TCP 93m
grafana ClusterIP 10.100.15.208 <none> 3000/TCP 93m
kube-state-metrics ClusterIP None <none> 8443/TCP,9443/TCP 93m
node-exporter ClusterIP None <none> 9100/TCP 93m
prometheus-adapter ClusterIP 10.107.77.167 <none> 443/TCP 93m
prometheus-k8s ClusterIP 10.96.2.27 <none> 9090/TCP,8080/TCP 93m
prometheus-operated ClusterIP None <none> 9090/TCP 92m
prometheus-operator ClusterIP None <none> 8443/TCP 93m
8、修改prometheus-service.yaml ------ 在spec下,添加 type、nodePort
[root@master manifests]# vim prometheus-service.yaml
apiVersion: v1
kind: Service
metadata:
labels:
app.kubernetes.io/component: prometheus
app.kubernetes.io/instance: k8s
app.kubernetes.io/name: prometheus
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 2.36.1
name: prometheus-k8s
namespace: monitoring
spec:
type: NodePort ### 添加type,注意格式与ports和selector同级
ports:
- name: web
port: 9090
targetPort: web
nodePort: 30100 ### 指定端口,没有指定就自动生成
#- name: reloader-web
# port: 8080
# targetPort: reloader-web
selector:
app.kubernetes.io/component: prometheus
app.kubernetes.io/instance: k8s
app.kubernetes.io/name: prometheus
app.kubernetes.io/part-of: kube-prometheus
sessionAffinity: ClientIP
[root@master manifests]# kubectl apply -f prometheus-service.yaml
service/prometheus-k8s configured
9、修改grafana-service.yaml ------ 在spec下,添加 type、nodePort
[root@master manifests]# vim grafana-service.yaml
apiVersion: v1
kind: Service
metadata:
labels:
app.kubernetes.io/component: grafana
app.kubernetes.io/name: grafana
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 8.5.5
name: grafana
namespace: monitoring
spec:
type: NodePort ### 添加type,注意格式与ports和selector同级
ports:
- name: http
port: 3000
targetPort: http
nodePort: 30200 ### 指定端口,没有指定就自动生成
selector:
app.kubernetes.io/component: grafana
app.kubernetes.io/name: grafana
app.kubernetes.io/part-of: kube-prometheus
[root@master manifests]# kubectl apply -f grafana-service.yaml
service/grafana configured
10、修改alertmanager-service.yaml ------ 在spec下,添加 type、nodePort
[root@master manifests]# vim alertmanager-service.yaml
apiVersion: v1
kind: Service
metadata:
labels:
app.kubernetes.io/component: alert-router
app.kubernetes.io/instance: main
app.kubernetes.io/name: alertmanager
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 0.24.0
name: alertmanager-main
namespace: monitoring
spec:
type: NodePort ### 添加type,注意格式与ports和selector同级
ports:
- name: web
port: 9093
targetPort: web
nodePort: 30300 ### 指定端口,没有指定就自动生成
# - name: reloader-web
# port: 8080
# targetPort: reloader-web
selector:
app.kubernetes.io/component: alert-router
app.kubernetes.io/instance: main
app.kubernetes.io/name: alertmanager
app.kubernetes.io/part-of: kube-prometheus
sessionAffinity: ClientIP
[root@master manifests]# kubectl apply -f alertmanager-service.yaml
service/alertmanager-main configured
11、再次查看service,出现外部访问的端口了
[root@master manifests]# kubectl get service -n monitoring
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
alertmanager-main NodePort 10.97.204.202 <none> 9093:30300/TCP,8080:31672/TCP 117m
alertmanager-operated ClusterIP None <none> 9093/TCP,9094/TCP,9094/UDP 116m
blackbox-exporter ClusterIP 10.97.242.103 <none> 9115/TCP,19115/TCP 117m
grafana NodePort 10.100.15.208 <none> 3000:30200/TCP 117m
kube-state-metrics ClusterIP None <none> 8443/TCP,9443/TCP 117m
node-exporter ClusterIP None <none> 9100/TCP 117m
prometheus-adapter ClusterIP 10.107.77.167 <none> 443/TCP 117m
prometheus-k8s NodePort 10.96.2.27 <none> 9090:30100/TCP,8080:30543/TCP 117m
prometheus-operated ClusterIP None <none> 9090/TCP 116m
prometheus-operator ClusterIP None <none> 8443/TCP 117m
12、访问,使用集群的地址(192.168.2.x:30100/30200/30300)
[root@master manifests]# kubectl get nodes -o wide
NAME STATUS ROLES AGE VERSION INTERNAL-IP EXTERNAL-IP OS-IMAGE KERNEL-VERSION CONTAINER-RUNTIME
master Ready control-plane,master 34d v1.23.6 192.168.2.6 <none> CentOS Linux 7 (Core) 3.10.0-1160.el7.x86_64 docker://20.10.17
node1 Ready <none> 34d v1.23.6 192.168.2.3 <none> CentOS Linux 7 (Core) 3.10.0-1160.el7.x86_64 docker://20.10.17
node2 Ready <none> 34d v1.23.6 192.168.2.7 <none> CentOS Linux 7 (Core) 3.10.0-1160.el7.x86_64 docker://20.10.17
13、grafana首次登陆用户和密码
admin
admin