简述:无端多了一个网桥网卡,导致calico的pod一致处于CrachLoopBackOff状态
[root@koonsuenmaster01 network-scripts]# kubectl get pods -A
NAMESPACE NAME READY STATUS RESTARTS AGE
kube-system calico-kube-controllers-6949477b58-qg98n 1/1 Running 1 123m
kube-system calico-node-8jhqr 0/1 CrashLoopBackOff 16 59m
kube-system calico-node-bgjtg 1/1 Running 0 61m
kube-system coredns-7f89b7bc75-7cvp4 1/1 Running 1 125m
kube-system coredns-7f89b7bc75-fxxv2 1/1 Running 1 125m
kube-system etcd-koonsuenmaster01 1/1 Running 2 17h
kube-system kube-apiserver-koonsuenmaster01 1/1 Running 3 17h
kube-system kube-controller-manager-koonsuenmaster01 1/1 Running 2 17h
kube-system kube-proxy-2p8gw 1/1 Running 2 17h
kube-system kube-proxy-zlxnp 1/1 Running 0 59m
kube-system kube-scheduler-koonsuenmaster01 1/1 Running 2 17h
排查过程简略说下,把node都退出再加入,calico网络pod还是一样。于是再看日志。
主要是看这个异常pod的日志,日志指出是有个192.168.9.1的IP被占用,查看下个台机的IP,发现三台机都有一个网桥网卡IP是192.168.9.1,如下,第一个网卡,可能是以前做其他实验留下的。
[root@koonsuenmaster01 ~]# ifconfig
br-b52b1f407530: flags=4099<UP,BROADCAST,MULTICAST> mtu 1500
inet 192.168.9.1 netmask 255.255.255.0 broadcast 192.168.9.255
ether 02:42:a1:51:74:6a txqueuelen 0 (Ethernet)
RX packets 0 bytes 0 (0.0 B)
RX errors 0 dropped 0 overruns 0 frame 0
TX packets 0 bytes 0 (0.0 B)
TX errors 0 dropped 0 overruns 0 carrier 0 collisions 0
cali4e931556073: flags=4163<UP,BROADCAST,RUNNING,MULTICAST> mtu 1480
inet6 fe80::ecee:eeff:feee:eeee prefixlen 64 scopeid 0x20<link>
ether ee:ee:ee:ee:ee:ee txqueuelen 0 (Ethernet)
RX packets 0 bytes 0 (0.0 B)
RX errors 0 dropped 0 overruns 0 frame 0
TX packets 0 bytes 0 (0.0 B)
TX errors 0 dropped 0 overruns 0 carrier 0 collisions 0
cali593602a6e73: flags=4163<UP,BROADCAST,RUNNING,MULTICAST> mtu 1480
inet6 fe80::ecee:eeff:feee:eeee prefixlen 64 scopeid 0x20<link>
ether ee:ee:ee:ee:ee:ee txqueuelen 0 (Ethernet)
RX packets 0 bytes 0 (0.0 B)
RX errors 0 dropped 0 overruns 0 frame 0
TX packets 0 bytes 0 (0.0 B)
TX errors 0 dropped 0 overruns 0 carrier 0 collisions 0
calibc8e666b15a: flags=4163<UP,BROADCAST,RUNNING,MULTICAST> mtu 1480
inet6 fe80::ecee:eeff:feee:eeee prefixlen 64 scopeid 0x20<link>
ether ee:ee:ee:ee:ee:ee txqueuelen 0 (Ethernet)
RX packets 0 bytes 0 (0.0 B)
RX errors 0 dropped 0 overruns 0 frame 0
TX packets 0 bytes 0 (0.0 B)
TX errors 0 dropped 0 overruns 0 carrier 0 collisions 0
docker0: flags=4099<UP,BROADCAST,MULTICAST> mtu 1500
inet 172.17.0.1 netmask 255.255.0.0 broadcast 172.17.255.255
ether 02:42:b9:6b:b0:ca txqueuelen 0 (Ethernet)
RX packets 0 bytes 0 (0.0 B)
RX errors 0 dropped 0 overruns 0 frame 0
TX packets 0 bytes 0 (0.0 B)
TX errors 0 dropped 0 overruns 0 carrier 0 collisions 0
ens33: flags=4163<UP,BROADCAST,RUNNING,MULTICAST> mtu 1500
inet 192.168.8.11 netmask 255.255.255.0 broadcast 192.168.8.255
inet6 fe80::20c:29ff:fe5a:e933 prefixlen 64 scopeid 0x20<link>
ether 00:0c:29:5a:e9:33 txqueuelen 1000 (Ethernet)
RX packets 2054 bytes 309148 (301.9 KiB)
RX errors 0 dropped 0 overruns 0 frame 0
TX packets 2625 bytes 2436877 (2.3 MiB)
TX errors 0 dropped 0 overruns 0 carrier 0 collisions 0
通过命令删除这个网卡 br-b52b1f407530
[root@koonsuenmaster01 ~]# brctl show
bridge name bridge id STP enabled interfaces
br-b52b1f407530 8000.0242a151746a no
docker0 8000.0242b96bb0ca no
virbr0 8000.5254006b8e0d yes virbr0-nic
[root@koonsuenmaster01 network-scripts]# brctl delbr br-b52b1f407530
bridge br-b52b1f407530 is still up; can't delete it
[root@koonsuenmaster01 network-scripts]# ifconfig br-b52b1f407530 down
您在 /var/spool/mail/root 中有新邮件
[root@koonsuenmaster01 network-scripts]# brctl delbr br-b52b1f407530
[root@koonsuenmaster01 network-scripts]# brctl show
bridge name bridge id STP enabled interfaces
docker0 8000.0242b96bb0ca no
virbr0 8000.5254006b8e0d yes virbr0-nic
删除完毕后,节点再次加入群集,结果如下,一切恢复正常
[root@koonsuenmaster01 ~]# kubectl get pods -n kube-system
NAME READY STATUS RESTARTS AGE
calico-kube-controllers-6949477b58-qg98n 1/1 Running 1 134m
calico-node-bgjtg 1/1 Running 0 72m
calico-node-f4n6j 1/1 Running 0 3m22s
calico-node-hv9x9 1/1 Running 0 44s
coredns-7f89b7bc75-7cvp4 1/1 Running 1 136m
coredns-7f89b7bc75-fxxv2 1/1 Running 1 136m
etcd-koonsuenmaster01 1/1 Running 2 17h
kube-apiserver-koonsuenmaster01 1/1 Running 3 17h
kube-controller-manager-koonsuenmaster01 1/1 Running 2 17h
kube-proxy-2p8gw 1/1 Running 2 17h
kube-proxy-6w9pc 1/1 Running 0 3m22s
kube-proxy-sk6zs 1/1 Running 0 44s
kube-scheduler-koonsuenmaster01 1/1 Running 2 17h