1.问题现状
通过命令行工具kubectl 获取异常容器
[root@master ~]# kubectl get pods -n kube-system | grep -v Running
NAME READY STATUS RESTARTS AGE
prometheus-tim-3864503240-rwpq5 0/1 CrashLoopBackOff 2516 8d
2.查找问题
[root@master ~]# kubectl describe pod prometheus-tim-3864503240-rwpq5 -n kube-system
...
spec.containers{prometheus} Warning BackOff Back-off restarting failed container
8d 4s 59160 kubelet, test-95 Warning FailedSync Error syncing pod
然后通过查看异常pod的日志,就知道是什么问题了
[root@master-64 ~]# kubectl logs prometheus-tim-3864503240-rwpq5 -n kube-system
level=info ts=2018-01-29T03:11:57.888337607Z caller=main.go:215 msg="Starting Prometheus" version="(version=2.0.0, branch=HEAD, revision=0a74f98628a0463dddc90528220c94de5032d1a0)"
level=info ts=2018-01-29T03:11:57.888422278Z caller=main.go:216 build_context="(go=go1.9.2, user=root@615b82cb36b6, date=20171108-07:11:59)"
level=info ts=2018-01-29T03:11:57.888440579Z caller=main.go:217 host_details="(Linux 3.10.0-514.el7.x86_64 #1 SMP Tue Nov 22 16:42:41 UTC 2016 x86_64 prometheus-tim-3864503240-rwpq5 (none))"
level=info ts=2018-01-29T03:11:57.890277632Z caller=web.go:380 component=web msg="Start listening for connections" address=0.0.0.0:9090
level=info ts=2018-01-29T03:11:57.890325752Z caller=targetmanager.go:71 component="target manager" msg="Starting target manager..."
level=info ts=2018-01-29T03:11:57.8908479Z caller=main.go:314 msg="Starting TSDB"
level=info ts=2018-01-29T03:11:57.903340116Z caller=main.go:326 msg="TSDB started"
level=info ts=2018-01-29T03:11:57.9034449Z caller=main.go:394 msg="Loading configuration file" filename=/etc/prometheus/prometheus.yaml
level=error ts=2018-01-29T03:11:57.903503914Z caller=main.go:356 msg="Error loading config" err="couldn't load configuration (--config.file=/etc/prometheus/prometheus.yaml): open /etc/prometheus/prometheus.yaml: no such file or directory"