master节点:master
node节点:node1
由于是ubuntu系统,参考两个博客配置
安装vmware搭建k8s集群(亲试无坑)-CSDN博客 该博客是centos系统,所以稍微有点区别结合另一篇博客一起参考
# 启动chronyd服务
systemctl start chronyd
#设置chronyd服务开机自启
systemctl enable chronyd
# chronyd服务启动稍等几秒钟,就可以使用date命令验证时间了
date
可能报错:
root@hcss-ecs-ed4e:/# systemctl enable chronyd
Failed to enable unit: Refusing to operate on alias name or linked unit file: chronyd.service
解决步骤:
#先找其具体位置
root@hcss-ecs-ed4e:/# find /etc/systemd/system -name chronyd.service
/etc/systemd/system/chronyd.service
#尝试直接启用文件 仍报错 可能是个别名或链接到另一个单元文件
root@hcss-ecs-ed4e:/# systemctl enable /usr/lib/systemd/system/chronyd.service
Failed to enable unit: Unit file /usr/lib/systemd/system/chronyd.service does not exist.
#使用 file 命令查看文件实际类型 可以看到是一个链接
root@hcss-ecs-ed4e:/# file /etc/systemd/system/chronyd.service
/etc/systemd/system/chronyd.service: symbolic link to /lib/systemd/system/chrony.service
#再次直接启用其连接地址 成功 未报错
root@hcss-ecs-ed4e:/# systemctl enable /lib/systemd/system/chrony.service
禁用交换分区和防火墙
root@hcss-ecs-ed4e:/# swapoff -a
root@hcss-ecs-ed4e:/# swapon --show
root@hcss-ecs-ed4e:/# systemctl stop ufw
root@hcss-ecs-ed4e:/# systemctl disable ufw
Synchronizing state of ufw.service with SysV service script with /lib/systemd/systemd-sysv-install.
Executing: /lib/systemd/systemd-sysv-install disable ufw
Removed /etc/systemd/system/multi-user.target.wants/ufw.service.
关于报错Port 10250 is in use解决办法:
[init] Using Kubernetes version: v1.28.2
[preflight] Running pre-flight checks
error execution phase preflight: [preflight] Some fatal errors occurred:
[ERROR Port-10250]: Port 10250 is in use
[preflight] If you know what you are doing, you can make a check non-fatal with `--ignore-preflight-errors=...`
To see the stack trace of this error execute with --v=5 or higher
root@hcss-ecs-ed4e:/# systemctl restart kubelet
root@hcss-ecs-ed4e:/# netstat -ntpl | grep 10250
tcp6 0 0 :::10250 :::* LISTEN 718166/kubelet
root@hcss-ecs-ed4e:/# kubeadm reset
root@ubt:/sudo systemctl restart kubelet
root@ubt:/newdir2/opt/hertzbeat/config# journalctl -f -u kubelet.service
几个排查错误命令
1.查看所有集群中所有pod
root@hcss-ecs-ed4e:/# kubectl get pod --all-namespaces -o wide
NAMESPACE NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
default my-nginx-5ckf8 1/1 Running 0 16h 10.244.2.7 ubt <none> <none>
default my-nginx-69drn 1/1 Running 0 16h 10.244.2.5 ubt <none> <none>
default my-nginx-6r5gz 1/1 Running 0 16h 10.244.2.9 ubt <none> <none>
default my-nginx-7sjc8 1/1 Running 0 16h 10.244.2.6 ubt <none> <none>
default my-nginx-r2wdm 1/1 Running 0 16h 10.244.2.4 ubt <none> <none>
default my-nginx-swm2p 1/1 Running 0 16h 10.244.2.8 ubt <none> <none>
kube-flannel kube-flannel-ds-ptvqn 1/1 Running 0 15h 192.168.7.218 hcss-ecs-ed4e <none> <none>
kube-flannel kube-flannel-ds-z8mxj 1/1 Running 0 15h 192.168.5.86 ubt <none> <none>
kube-system coredns-66f779496c-478fx 1/1 Running 0 23h 10.244.0.5 hcss-ecs-ed4e <none> <none>
kube-system coredns-66f779496c-dvggf 1/1 Running 0 23h 10.244.0.4 hcss-ecs-ed4e <none> <none>
kube-system etcd-hcss-ecs-ed4e 1/1 Running 3 23h 192.168.7.218 hcss-ecs-ed4e <none> <none>
kube-system kube-apiserver-hcss-ecs-ed4e 1/1 Running 0 16h 192.168.7.218 hcss-ecs-ed4e <none> <none>
kube-system kube-controller-manager-hcss-ecs-ed4e 1/1 Running 5 (16h ago) 23h 192.168.7.218 hcss-ecs-ed4e <none> <none>
kube-system kube-proxy-k4p7x 1/1 Running 0 23h 192.168.7.218 hcss-ecs-ed4e <none> <none>
kube-system kube-proxy-wvvvs 1/1 Running 1 22h 192.168.5.86 ubt <none> <none>
kube-system kube-scheduler-hcss-ecs-ed4e 1/1 Running 5 (16h ago) 23h 192.168.7.218 hcss-ecs-ed4e <none> <none>
kubernetes-dashboard dashboard-metrics-scraper-5b86c8dbf8-qdhh6 1/1 Running 0 16h 10.244.2.3 ubt <none> <none>
kubernetes-dashboard kubernetes-dashboard-77db789568-wrhbs 1/1 Running 11 (15h ago) 16h 10.244.2.2 ubt <none> <none>
root@hcss-ecs-ed4e:/# kubectl get pod -owide --namespace kubernetes-dashboard
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
dashboard-metrics-scraper-5b86c8dbf8-qdhh6 1/1 Running 0 17h 10.244.2.3 ubt <none> <none>
kubernetes-dashboard-77db789568-wrhbs 1/1 Running 11 (15h ago) 17h 10.244.2.2 ubt <none> <none>
root@hcss-ecs-ed4e:/# kubectl get svc -A |grep kubernetes-dashboard
kubernetes-dashboard dashboard-metrics-scraper ClusterIP 10.111.211.15 <none> 8000/TCP 18h
kubernetes-dashboard kubernetes-dashboard NodePort 10.102.46.49 <none> 443:31996/TCP 18h
2.查看日志
root@hcss-ecs-ed4e:/# kubectl logs kubernetes-dashboard-77db789568-wrhbs -n kubernetes-dashboard
Error from server: Get "https://192.168.5.86:10250/containerLogs/kubernetes-dashboard/kubernetes-dashboard-77db789568-wrhbs/kubernetes-dashboard": dial tcp 192.168.5.86:10250: connect: no route to host
3.重新加入集群
#master重新获取token
root@hcss-ecs-ed4e:/# kubeadm token create --print-join-command
#node 重置并加入
root@ubt:/# kubeadm reset
#scp将网络相关文件从master复制到node
root@ubt:/# scp root@master_ip:/etc/cni/net.d/* /etc/cni/net.d/
#加入master 从master获取的token
root@ubt:/# kubeadm join ***** --token br1pv5.1m7l627il41efn2n --discovery-token-ca-cert-hash sha256:358dde62ac2155b0da48d74d62698fbdb25498a77436e93568c28a76e236c730