研发反馈他们那边一套集群有台master文件系统损坏无法开机,他们是三台openstack上的虚机,是虚拟化宿主机故障导致的虚机文件系统损坏。三台机器是master+node,指导他修复后开机,修复过程和我之前文章opensuse的一次救援步骤一样
起来后我上去看,因为做了 HA 的,所以只有这个node有问题,集群没影响
[root@k8s-m1 ~]# kubectl get node -o wide
NAME STATUS ROLES AGE VERSION INTERNAL-IP EXTERNAL-IP OS-IMAGE KERNEL-VERSION CONTAINER-RUNTIME
10.252.146.104 NotReady <none> 30d v1.16.9 10.252.146.104 <none> CentOS Linux 8 (Core) 4.18.0-193.6.3.el8_2.x86_64 docker://19.3.11
10.252.146.105 Ready <none> 30d v1.16.9 10.252.146.105 <none> CentOS Linux 8 (Core) 4.18.0-193.6.3.el8_2.x86_64 docker://19.3.11
10.252.146.106 Ready <none> 30d v1.16.9 10.252.146.106 <none> CentOS Linux 8 (Core) 4.18.0-193.6.3.el8_2.x86_64 docker://19.3.11
启动docker试试
[root@k8s-m1 ~]# systemctl start docker
Job for docker.service canceled.
无法启动,查看下启动失败的服务
[root@k8s-m1 ~]# systemctl --failed
UNIT LOAD ACTIVE SUB DESCRIPTION
● containerd.service loaded failed failed containerd container runtime
查看下containerd的日志
[root@k8s-m1 ~]# journalctl -xe -u containerd
Jul 23 11:20:11 k8s-m1 containerd[9186]: time="2020-07-23T11:20:11.481459735+08:00" level=info msg="loading plugin "io.containerd.service.v1.snapshots-service"..." type=io.containerd.service.v1
Jul 23 11:20:11 k8s-m1 containerd[9186]: time="2020-07-23T11:20:11.481472223+08:00" level=info msg="loading plugin "io.containerd.runtime.v1.linux"..." type=io.containerd.runtime.v1
Jul 23 11:20:11 k8s-m1 containerd[9186]: time="2020-07-23T11:20:11.481517630+08:00" level=info msg="loading plugin "io.containerd.runtime.v2.task"..." type=io.containerd.runtime.v2
Jul 23 11:20:11 k8s-m1 containerd[9186]: time="2020-07-23T11:20:11.481562176+08:00" level=info msg="loading plugin "io.containerd.monitor.v1.cgroups"..." type=io.containerd.monitor.v1
Jul 23 11:20:11 k8s-m1 containerd[9186]: time="2020-07-23T11:20:11.481964349+08:00" level=info msg="loading plugin "io.containerd.service.v1.tasks-service"..." type=io.containerd.service.v1
Jul 23 11:20:11 k8s-m1 containerd[9186]: time="2020-07-23T11:20:11.481996158+08:00" level=info msg="loading plugin "io.containerd.internal.v1.restart"..." type=io.containerd.internal.v1
Jul 23 11:20:11 k8s-m1 containerd[9186]: time="2020-07-23T11:20:11.482048208+08:00" level=info msg="loading plugin "io.containerd.grpc.v1.containers"..." type=io.containerd.grpc.v1
Jul 23 11:20:11 k8s-m1 containerd[9186]: time="2020-07-23T11:20:11.482081110+08:00" level=info msg="loading plugin "io.containerd.grpc.v1.content"..." type=io.containerd.grpc.v1
Jul 23 11:20:11 k8s-m1 containerd[9186]: time="2020-07-23T11:20:11.482096598+08:00" level=info msg="loading plugin "io.containerd.grpc.v1.diff"..." type=io.containerd.grpc.v1
Jul 23 11:20:11 k8s-m1 containerd[9186]: time="2020-07-23T11:20:11.482112263+08:00" level=info msg="loading plugin "io.containerd.grpc.v1.events"..." type=io.containerd.grpc.v1
Jul 23 11:20:11 k8s-m1 containerd[9186]: time="2020-07-23T11:20:11.482123307+08:00" level=info msg="loading plugin "io.containerd.grpc.v1.healthcheck"..." type=io.containerd.grpc.v1
Jul 23 11:20:11 k8s-m1 containerd[9186]: time="2020-07-23T11:20:11.482133477+08:00" level=info msg="loading plugin "io.containerd.grpc.v1.images"..." type=io.containerd.grpc.v1
Jul 23 11:20:11 k8s-m1 containerd[9186]: time="2020-07-23T11:20:11.482142943+08:00" level=info msg="loading plugin "io.containerd.grpc.v1.leases"..." type=io.containerd.grpc.v1
Jul 23 11:20:11 k8s-m1 containerd[9186]: time="2020-07-23T11:20:11.482151644+08:00" level=info msg="loading plugin "io.containerd.grpc.v1.namespaces"..." type=io.containerd.grpc.v1
Jul 23 11:20:11 k8s-m1 containerd[9186]: time="2020-07-23T11:20:11.482160741+08:00" level=info msg="loading plugin "io.containerd.internal.v1.opt"..." type=io.containerd.internal.v1
Jul 23 11:20:11 k8s-m1 containerd[9186]: time="2020-07-23T11:20:11.482184201+08:00" level=info msg="loading plugin "io.containerd.grpc.v1.snapshots"..." type=io.containerd.grpc.v1
Jul 23 11:20:11 k8s-m1 containerd[9186]: time="2020-07-23T11:20:11.482194643+08:00" level=info msg="loading plugin "io.containerd.grpc.v1.tasks"..." type=io.containerd.grpc.v1
Jul 23 11:20:11 k8s-m1 containerd[9186]: time="2020-07-23T11:20:11.482206871+08:00" level=info msg="loading plugin "io.containerd.grpc.v1.version"..." type=io.containerd.grpc.v1
Jul 23 11:20:11 k8s-m1 containerd[9186]: time="2020-07-23T11:20:11.482215454+08:00" level=info msg="loading plugin "io.containerd.grpc.v1.introspection"..." type=io.containerd.grpc.v1
Jul 23 11:20:11 k8s-m1 containerd[9186]: time="2020-07-23T11:20:11.482365838+08:00" level=info msg=serving... address="/run/containerd/containerd.sock"
Jul 23 11:20:11 k8s-m1 containerd[9186]: time="2020-07-23T11:20:11.482404139+08:00" level=info msg="containerd successfully booted in 0.003611s"
Jul 23 11:20:11 k8s-m1 containerd[9186]: panic: runtime error: invalid memory address or nil pointer dereference
Jul 23 11:20:11 k8s-m1 containerd[9186]: [signal SIGSEGV: segmentation violation code=0x1 addr=0x8 pc=0x5626b983c259]
Jul 23 11:20:11 k8s-m1 containerd[9186]: goroutine 55 [running]:
Jul 23 11:20:11 k8s-m1 containerd[9186]: github.com/containerd/containerd/vendor/go.etcd.io/bbolt.(*Bucket).Cursor(...)
Jul 23 11:20:11 k8s-m1 containerd[9186]: /go/src/github.com/containerd/containerd/vendor/go.etcd.io/bbolt/bucket.go:84
Jul 23 11:20:11 k8s-m1 containerd[9186]: github.com/containerd/containerd/vendor/go.etcd.io/bbolt.(*Bucket).Get(0x0, 0x5626bb7e3f10, 0xb, 0xb, 0x0, 0x2, 0x4)
Jul 23 11:20:11 k8s-m1 containerd[9186]: /go/src/github.com/containerd/containerd/vendor/go.etcd.io/bbolt/bucket.go:260 +0x39
Jul 23 11:20:11 k8s-m1 containerd[9186]: github.com/containerd/containerd/metadata.scanRoots.func6(0x7fe557c63020, 0x2, 0x2, 0x0, 0x0, 0x0, 0x0, 0x5626b95eec72)
Jul 23 11:20:11 k8s-m1 containerd[9186]: /go/src/github.com/containerd/containerd/metadata/gc.go:222 +0xcb
Jul 23 11:20:11 k8s-m1 containerd[9186]: github.com/containerd/containerd/vendor/go.etcd.io/bbolt.(*Bucket).ForEach(0xc0003d1780, 0xc00057b640, 0xa, 0xa)
Jul 23 11:20:11