【Kubernetes】centos安装Kubernetes集群
集群安装完后【go-zero】api与rpc使用k8s服务发现和部署
1、环境准备
系统centos7
配置yum源参考文章 Centos系统换yum源
yum -y update
步骤1-3是所有主机都要配置,主机名和hosts配置完后可以使用工具命令同步
1.1 主机
一主二从
主机名 | ip |
---|---|
k8smaster | 192.168.59.148 |
k8snode1 | 192.168.59.149 |
k8snode2 | 192.168.59.150 |
分别设置主机名并添加hosts映射
hostnamectl set-hostname k8smaster
vim /etc/hosts
192.168.59.148 k8smaster
192.168.59.149 k8snode1
192.168.59.150 k8snode2
配置参考,127.0.0.1 也要加上当前主机名
测试
1.2 关闭selinux和firewalld
systemctl stop firewalld
systemctl disable firewalld
sed -i 's/enforcing/disabled/' /etc/selinux/config
setenforce 0
1.3 禁止swap分区
swapoff -a
1.4 将桥接的IPv4流量传递到iptables的链
cat > /etc/sysctl.d/k8s.conf << EOF
net.ipv4.ip_forward = 1
net.ipv4.tcp_tw_recycle = 0
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
EOF
sysctl --system
2、安装部署docker
安装推荐文章 Linux环境下docker安装
简单的docker安装
yum install ca-certificates curl -y
yum install docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin -y
配置参考
vim /etc/docker/daemon.json
{
"registry-mirrors": ["https://hub-mirror.c.163.com","https://registry.aliyuncs.com","https://registry.docker-cn.com","https://docker.mirrors.ustc.edu.cn"],
"data-root": "/data/docker",
"exec-opts": ["native.cgroupdriver=systemd"],
"log-driver": "json-file",
"log-opts": { "max-size": "300m","max-file": "3" },
"live-restore": true
}
#查看是否启动
service docker status
service docker start #启动
#设置开机自启
systemctl enable docker && systemctl restart docker && systemctl status docker
#基本信息
docker info
docker-compose安装 docker-compose版本要自己去github看
containerd 配置文件参考
vim /etc/containerd/conf.toml
disabled_plugins = []
imports = []
oom_score = 0
plugin_dir = ""
required_plugins = []
root = "/var/lib/containerd"
state = "/run/containerd"
temp = ""
version = 2
[cgroup]
path = ""
[debug]
address = ""
format = ""
gid = 0
level = ""
uid = 0
[grpc]
address = "/run/containerd/containerd.sock"
gid = 0
max_recv_message_size = 16777216
max_send_message_size = 16777216
tcp_address = ""
tcp_tls_ca = ""
tcp_tls_cert = ""
tcp_tls_key = ""
uid = 0
[metrics]
address = ""
grpc_histogram = false
[plugins]
[plugins."io.containerd.gc.v1.scheduler"]
deletion_threshold = 0
mutation_threshold = 100
pause_threshold = 0.02
schedule_delay = "0s"
startup_delay = "100ms"
[plugins."io.containerd.grpc.v1.cri"]
device_ownership_from_security_context = false
disable_apparmor = false
disable_cgroup = false
disable_hugetlb_controller = true
disable_proc_mount = false
disable_tcp_service = true
enable_selinux = false
enable_tls_streaming = false
enable_unprivileged_icmp = false
enable_unprivileged_ports = false
ignore_image_defined_volumes = false
max_concurrent_downloads = 3
max_container_log_line_size = 16384
netns_mounts_under_state_dir = false
restrict_oom_score_adj = false
sandbox_image = "registry.aliyuncs.com/google_containers/pause:3.6"
selinux_category_range = 1024
stats_collect_period = 10
stream_idle_timeout = "4h0m0s"
stream_server_address = "127.0.0.1"
stream_server_port = "0"
systemd_cgroup = false
tolerate_missing_hugetlb_controller = true
unset_seccomp_profile = ""
[plugins."io.containerd.grpc.v1.cri".cni]
bin_dir = "/opt/cni/bin"
conf_dir = "/etc/cni/net.d"
conf_template = ""
ip_pref = ""
max_conf_num = 1
[plugins."io.containerd.grpc.v1.cri".containerd]
default_runtime_name = "runc"
disable_snapshot_annotations = true
discard_unpacked_layers = false
ignore_rdt_not_enabled_errors = false
no_pivot = false
snapshotter = "overlayfs"
[plugins."io.containerd.grpc.v1.cri".containerd.default_runtime]
base_runtime_spec = ""
cni_conf_dir = ""
cni_max_conf_num = 0
container_annotations = []
pod_annotations = []
privileged_without_host_devices = false
runtime_engine = ""
runtime_path = ""
runtime_root = ""
runtime_type = ""
[plugins."io.containerd.grpc.v1.cri".containerd.default_runtime.options]
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes]
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc]
base_runtime_spec = ""
cni_conf_dir = ""
cni_max_conf_num = 0
container_annotations = []
pod_annotations = []
privileged_without_host_devices = false
runtime_engine = ""
runtime_path = ""
runtime_root = ""
runtime_type = "io.containerd.runc.v2"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options]
BinaryName = ""
CriuImagePath = ""
CriuPath = ""
CriuWorkPath = ""
IoGid = 0
IoUid = 0
NoNewKeyring = false
NoPivotRoot = false
Root = ""
ShimCgroup = ""
SystemdCgroup = true
[plugins."io.containerd.grpc.v1.cri".containerd.untrusted_workload_runtime]
base_runtime_spec = ""
cni_conf_dir = ""
cni_max_conf_num = 0
container_annotations = []
pod_annotations = []
privileged_without_host_devices = false
runtime_engine = ""
runtime_path = ""
runtime_root = ""
runtime_type = ""
[plugins."io.containerd.grpc.v1.cri".containerd.untrusted_workload_runtime.options]
[plugins."io.containerd.grpc.v1.cri".image_decryption]
key_model = "node"
[plugins."io.containerd.grpc.v1.cri".registry]
config_path = ""
[plugins."io.containerd.grpc.v1.cri".registry.auths]
[plugins."io.containerd.grpc.v1.cri".registry.configs]
[plugins."io.containerd.grpc.v1.cri".registry.configs."k8smaster:5000".tls]
insecure_skip_verify = true
[plugins."io.containerd.grpc.v1.cri".registry.headers]
[plugins."io.containerd.grpc.v1.cri".registry.mirrors]
[plugins."io.containerd.grpc.v1.cri".registry.mirrors."k8smaster:5000"]
endpoint = ["http://k8smaster:5000"]
[plugins."io.containerd.grpc.v1.cri".x509_key_pair_streaming]
tls_cert_file = ""
tls_key_file = ""
[plugins."io.containerd.internal.v1.opt"]
path = "/opt/containerd"
[plugins."io.containerd.internal.v1.restart"]
interval = "10s"
[plugins."io.containerd.internal.v1.tracing"]
sampling_ratio = 1.0
service_name = "containerd"
[plugins."io.containerd.metadata.v1.bolt"]
content_sharing_policy = "shared"
[plugins."io.containerd.monitor.v1.cgroups"]
no_prometheus = false
[plugins."io.containerd.runtime.v1.linux"]
no_shim = false
runtime = "runc"
runtime_root = ""
shim = "containerd-shim"
shim_debug = false
[plugins."io.containerd.runtime.v2.task"]
platforms = ["linux/amd64"]
sched_core = false
[plugins."io.containerd.service.v1.diff-service"]
default = ["walking"]
[plugins."io.containerd.service.v1.tasks-service"]
rdt_config_file = ""
[plugins."io.containerd.snapshotter.v1.aufs"]
root_path = ""
[plugins."io.containerd.snapshotter.v1.btrfs"]
root_path = ""
[plugins."io.containerd.snapshotter.v1.devmapper"]
async_remove = false
base_image_size = ""
discard_blocks = false
fs_options = ""
fs_type = ""
pool_name = ""
root_path = ""
[plugins."io.containerd.snapshotter.v1.native"]
root_path = ""
[plugins."io.containerd.snapshotter.v1.overlayfs"]
root_path = ""
upperdir_label = false
[plugins."io.containerd.snapshotter.v1.zfs"]
root_path = ""
[plugins."io.containerd.tracing.processor.v1.otlp"]
endpoint = ""
insecure = false
protocol = ""
[proxy_plugins]
[stream_processors]
[stream_processors."io.containerd.ocicrypt.decoder.v1.tar"]
accepts = ["application/vnd.oci.image.layer.v1.tar+encrypted"]
args = ["--decryption-keys-path", "/etc/containerd/ocicrypt/keys"]
env = ["OCICRYPT_KEYPROVIDER_CONFIG=/etc/containerd/ocicrypt/ocicrypt_keyprovider.conf"]
path = "ctd-decoder"
returns = "application/vnd.oci.image.layer.v1.tar"
[stream_processors."io.containerd.ocicrypt.decoder.v1.tar.gzip"]
accepts = ["application/vnd.oci.image.layer.v1.tar+gzip+encrypted"]
args = ["--decryption-keys-path", "/etc/containerd/ocicrypt/keys"]
env = ["OCICRYPT_KEYPROVIDER_CONFIG=/etc/containerd/ocicrypt/ocicrypt_keyprovider.conf"]
path = "ctd-decoder"
returns = "application/vnd.oci.image.layer.v1.tar+gzip"
[timeouts]
"io.containerd.timeout.bolt.open" = "0s"
"io.containerd.timeout.shim.cleanup" = "5s"
"io.containerd.timeout.shim.load" = "5s"
"io.containerd.timeout.shim.shutdown" = "3s"
"io.containerd.timeout.task.state" = "2s"
[ttrpc]
address = ""
gid = 0
uid = 0
3、部署k8s基础命令
3.1 添加k8s阿里云的yum源
cat > /etc/yum.repos.d/kubernetes.repo << EOF[kubernetes]
name=Kubernetes
baseurl=https://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64
enabled=1
gpgcheck=0
repo_gpgcheck=0
gpgkey=https://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg https://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpg
EOF
或者用vim
vim /etc/yum.repos.d/kubernetes.repo
[kubernetes]
name=Kubernetes
baseurl=https://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64
enabled=1
gpgcheck=0
repo_gpgcheck=0
gpgkey=https://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg https://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpg
3.2 查看最新可安装的软件
yum --disablerepo="*" --enablerepo="kubernetes" list available
3.3 安装kubeadm、kubectl、kubelet
我这里装的版本是1.28.2
yum install -y kubelet-1.28.2 kubeadm-1.28.2 kubectl-1.28.2
systemctl start kubelet
systemctl enable kubelet
#查看错误日志
journalctl -u kubelet
4、部署集群
查询各个组件的版本
kubeadm config images list
4.1 初始化master
只要在主节点上执行
kubeadm init --kubernetes-version=1.28.13 \
--apiserver-advertise-address=192.168.59.148 \
--image-repository registry.aliyuncs.com/google_containers \
--service-cidr=10.140.0.0/16 \
--pod-network-cidr=10.244.0.0/16
参数注释:
–apiserver-advertise-address
指明用Master的哪个interface与Cluster 的其他节点通信。 如果Master有多个interface, 建议明确指定, 如果 不指定, kubeadm会自动选择有默认网关的interface。
–pod-network-cidr
选择一个Pod网络插件,并检查它是否需要在初始化Master时指定一些参数,它的值取决于你在下一步选择的哪个网络网络插件,这里选择Flannel的网络插件参数为 10.244.0.0/16。Calico网络为192.168.0.0/16。参考:Installing a pod network add-on
-service-cidr
选择service网络
–image-repository
使用kubeadm config images pull来预先拉取初始化需要用到的镜像,用来检查是否能连接到Kubenetes的Registries。Kubenetes默认Registries地址是k8s.gcr.io,很明显,在国内并不能访问gcr.io,因此在kubeadm v1.13之前的版本,安装起来非常麻烦,但是在1.13版本中终于解决了国内的痛点,其增加了一个–image-repository参数,默认值是k8s.gcr.io,我们将其指定为国内镜像地址:registry.aliyuncs.com/google_containers。
–kubernetes-version
默认值是stable-1,会导致从https://dl.k8s.io/release/stable-1.txt下载最新的版本号,我们可以将其指定为固定版本来跳过网络请求。
4.2 报错以及问题处理
查看报错命令
journalctl -xeu kubelet
问题一
node节点也要注释掉
[init] Using Kubernetes version: v1.28.13
[preflight] Running pre-flight checks
error execution phase preflight: [preflight] Some fatal errors occurred:
[ERROR CRI]: container runtime is not running: output: time=“2024-09-12T14:01:03+08:00” level=fatal msg=“validate service connection: CRI v1 runtime API is not implemented for endpoint “unix:///var/run/containerd/containerd.sock”: rpc error: code = Unimplemented desc = unknown service runtime.v1.RuntimeService”
, error: exit status 1
[preflight] If you know what you are doing, you can make a check non-fatal with--ignore-preflight-errors=...
To see the stack trace of this error execute with --v=5 or higher
查看版本没问题,看看有没有启动
[root@localhost home]# containerd -v
containerd containerd.io 1.6.33 d2d58213f83a351ca8f528a95fbd145f5654e957
[root@localhost home]# docker -v
Docker version 26.1.4, build 5650f9b
编辑以下文件,将下面一行内容注释掉
vim /etc/containerd/config.toml
#disabled_plugins = [“cri”]
原因:containerd安装的默认禁用(重点)
使用安装包安装的containerd会默认禁用作为容器运行时的功能,即安装包安装containerd后默认禁用containerd作为容器运行时
这个时候使用k8s就会报错了,因为没有容器运行时可以用
开启方法就是将/etc/containerd/config.toml文件中的disabled_plugins的值的列表中不包含cri
修改后重启containerd才会生效
systemctl restart containerd
问题二
如果kubernets初始化时失败后,第二次再次执行会初始化命令会报错,这时需要进行重置
[root@localhost home]# kubeadm init --kubernetes-version=1.28.13 --apiserver-advertise-address=192.168.59.148 --image-repository registry.aliyuncs.com/google_containers --service-cidr=10.140.0.0/16 --pod-network-cidr=10.244.0.0/16
[init] Using Kubernetes version: v1.28.13
[preflight] Running pre-flight checks
error execution phase preflight: [preflight] Some fatal errors occurred:
[ERROR Port-6443]: Port 6443 is in use
[ERROR Port-10259]: Port 10259 is in use
[ERROR Port-10257]: Port 10257 is in use
[ERROR FileAvailable–etc-kubernetes-manifests-kube-apiserver.yaml]: /etc/kubernetes/manifests/kube-apiserver.yaml already exists
[ERROR FileAvailable–etc-kubernetes-manifests-kube-controller-manager.yaml]: /etc/kubernetes/manifests/kube-controller-manager.yaml already exists
[ERROR FileAvailable–etc-kubernetes-manifests-kube-scheduler.yaml]: /etc/kubernetes/manifests/kube-scheduler.yaml already exists
[ERROR FileAvailable–etc-kubernetes-manifests-etcd.yaml]: /etc/kubernetes/manifests/etcd.yaml already exists
[ERROR Port-10250]: Port 10250 is in use
[ERROR Port-2379]: Port 2379 is in use
[ERROR Port-2380]: Port 2380 is in use
[ERROR DirAvailable–var-lib-etcd]: /var/lib/etcd is not empty
[preflight] If you know what you are doing, you can make a check non-fatal with--ignore-preflight-errors=...
To see the stack trace of this error execute with --v=5 or higher
解决方法
kubeadm reset
问题三
驱动加载 这个问题我没遇到
执行下面这两个命令
modprobe br_netfilter
bridge
问题四
Unfortunately, an error has occurred:
timed out waiting for the conditionThis error is likely caused by:
- The kubelet is not running
- The kubelet is unhealthy due to a misconfiguration of the node in some way (required cgroups disabled)
If you are on a systemd-powered system, you can try to troubleshoot the error with the following commands:
- ‘systemctl status kubelet’
- ‘journalctl -xeu kubelet’
Additionally, a control plane component may have crashed or exited when started by the container runtime.
To troubleshoot, list all containers using your preferred container runtimes CLI.
Here is one example how you may list all running Kubernetes containers by using crictl:
- ‘crictl --runtime-endpoint unix:///var/run/containerd/containerd.sock ps -a | grep kube | grep -v pause’
Once you have found the failing container, you can inspect its logs with:
- ‘crictl --runtime-endpoint unix:///var/run/containerd/containerd.sock logs CONTAINERID’
error execution phase wait-control-plane: couldn’t initialize a Kubernetes cluster
To see the stack trace of this error execute with --v=5 or higher
使用 journalctl -xeu kubelet 查看报错
failed to resolve reference \"registry.k8s.io/pause:3.6
解决方法:
#生成 containerd 的默认配置文件
containerd config default > /etc/containerd/config.toml
#查看 sandbox 的默认镜像仓库在文件中的第几行
cat /etc/containerd/config.toml | grep -n “sandbox_image”
#使用 vim 编辑器 定位到 sandbox_image,将 仓库地址修改成 registry.aliyuncs.com/google_containers/pause:3.6
vim /etc/containerd/config.toml
sandbox_image = “registry.aliyuncs.com/google_containers/pause:3.6”
#重启 containerd 服务
systemctl daemon-reload
systemctl restart containerd.service
记得要
kubeadm reset
4.3执行成功
Your Kubernetes control-plane has initialized successfully!
To start using your cluster, you need to run the following as a regular user:
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
Alternatively, if you are the root user, you can run:
export KUBECONFIG=/etc/kubernetes/admin.conf
You should now deploy a pod network to the cluster.
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
https://kubernetes.io/docs/concepts/cluster-administration/addons/
Then you can join any number of worker nodes by running the following on each as root:
kubeadm join 192.168.59.148:6443 --token 3otopj.v2r7x7gcpa4j1tv3 \
--discovery-token-ca-cert-hash sha256:b881ce5117a2ed28cb4f86963b462cc77976194c33c9314dbf4647f011354dc1
初始化完成后会生成一串命令用于node节点的加入
4.4关于token
token一般24小时候就会过期
查看当前token
[root@localhost home]# kubeadm token list
TOKEN TTL EXPIRES USAGES DESCRIPTION EXTRA GROUPS
3otopj.v2r7x7gcpa4j1tv3 23h 2024-09-13T06:41:42Z authentication,signing The default bootstrap token generated by 'kubeadm init'. system:bootstrappers:kubeadm:default-node-token
查看本机sha256值
openssl x509 -pubkey -in /etc/kubernetes/pki/ca.crt | openssl rsa -pubin -outform der
重新生成token
kubeadm token create
重新生成token并打印出join命令
kubeadm token create --print-join-command
如果要加入master节点,需要先生成certificate-key(1.16版本前参数为–experimental-upload-certs,1.16及1.16版本以后为–upload-certs)
kubeadm init phase upload-certs --upload-certs
结合上面join和certs的(同样,1.16版本前参数为–experimental-control-plane --certificate-key ,1.16及1.16版本以后为–control-plane --certificate-key)
kubeadm join 192.168.59.148:6443 --token fpjwdf.p9bnbqf7cpvf1amc --discovery-token-ca-cert-hash sha256:dd3cb5208a4ca032e85a5a30b9b02f963aff2fece13045cf8c74d7b9ed7f6098 --control-plane --certificate-key 820908fa5d83b9a7314a58147b80d0dc81b4f7469c9c8f72fb49b4fba2652c29
4.5配置kubectl
执行上面返回的命令
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
root用户执行永久生效
echo "export KUBECONFIG=/etc/kubernetes/admin.conf" > /etc/profile.d/kubeconfig.sh
source /etc/profile.d/kubeconfig.sh
不然就临时生效
export KUBECONFIG=/etc/kubernetes/admin.conf
将admin.conf拷贝到其他需要使用kunectl命令的node节点
scp /etc/kubernetes/admin.conf root@192.168.59.149:/etc/kubernetes/
scp /etc/kubernetes/admin.conf root@192.168.59.150:/etc/kubernetes/
一样执行生效
export KUBECONFIG=/etc/kubernetes/admin.conf
或者
echo “export KUBECONFIG=/etc/kubernetes/admin.conf” > /etc/profile.d/kubeconfig.sh
source /etc/profile.d/kubeconfig.sh
4.6加入节点
在除master外其他node节点执行上面的join命令,加入k8s集群
kubeadm join 192.168.59.148:6443 --token 3otopj.v2r7x7gcpa4j1tv3 --discovery-token-ca-cert-hash sha256:b881ce5117a2ed28cb4f86963b462cc77976194c33c9314dbf4647f011354dc1
加入成功
[root@localhost home]# kubeadm join 192.168.59.148:6443 --token 3otopj.v2r7x7gcpa4j1tv3 --discovery-token-ca-cert-hash sha256:b881ce5117a2ed28cb4f86963b462cc77976194c33c9314dbf4647f011354dc1
[preflight] Running pre-flight checks
[preflight] Reading configuration from the cluster...
[preflight] FYI: You can look at this config file with 'kubectl -n kube-system get cm kubeadm-config -o yaml'
[kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
[kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"
[kubelet-start] Starting the kubelet
[kubelet-start] Waiting for the kubelet to perform the TLS Bootstrap...
This node has joined the cluster:
* Certificate signing request was sent to apiserver and a response was received.
* The Kubelet was informed of the new secure connection details.
Run 'kubectl get nodes' on the control-plane to see this node join the cluster.
查看节点
[root@localhost home]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
k8smaster NotReady control-plane 32m v1.28.2
k8snode1 NotReady <none> 13s v1.28.2
k8snode2 NotReady <none> 5s v1.28.2
4.7移除节点node
不移除的可以直接下一步
[root@localhost flanneld]# kubectl drain k8snode2 --delete-local-data --force --ignore-daemonsets
Flag --delete-local-data has been deprecated, This option is deprecated and will be deleted. Use --delete-emptydir-data.
node/k8snode2 cordoned
Warning: ignoring DaemonSet-managed Pods: kube-system/kube-proxy-p8cxh
evicting pod tigera-operator/tigera-operator-748c69cf45-9clh2
pod/tigera-operator-748c69cf45-9clh2 evicted
node/k8snode2 drained
[root@localhost flanneld]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
k8smaster Ready control-plane 3h13m v1.28.2
k8snode1 NotReady <none> 161m v1.28.2
k8snode2 NotReady,SchedulingDisabled <none> 161m v1.28.2
[root@localhost flanneld]#
[root@localhost flanneld]# kubectl delete node k8snode2
node "k8snode2" deleted
[root@localhost flanneld]# pwd
/data/flanneld
[root@localhost flanneld]# cd /etc/kubernetes/
[root@localhost kubernetes]# ll
总用量 32
-rw-------. 1 root root 5650 9月 12 14:41 admin.conf
-rw-------. 1 root root 5682 9月 12 14:41 controller-manager.conf
-rw-------. 1 root root 1982 9月 12 14:41 kubelet.conf
drwxr-xr-x. 2 root root 113 9月 12 14:41 manifests
drwxr-xr-x. 3 root root 4096 9月 12 14:41 pki
-rw-------. 1 root root 5626 9月 12 14:41 scheduler.conf
[root@localhost kubernetes]# kubeadm reset -f
[reset] Reading configuration from the cluster...
[reset] FYI: You can look at this config file with 'kubectl -n kube-system get cm kubeadm-config -o yaml'
[preflight] Running pre-flight checks
[reset] Deleted contents of the etcd data directory: /var/lib/etcd
[reset] Stopping the kubelet service
[reset] Unmounting mounted directories in "/var/lib/kubelet"
[reset] Deleting contents of directories: [/etc/kubernetes/manifests /var/lib/kubelet /etc/kubernetes/pki]
[reset] Deleting files: [/etc/kubernetes/admin.conf /etc/kubernetes/kubelet.conf /etc/kubernetes/bootstrap-kubelet.conf /etc/kubernetes/controller-manager.conf /etc/kubernetes/scheduler.conf]
The reset process does not clean CNI configuration. To do so, you must remove /etc/cni/net.d
The reset process does not reset or clean up iptables rules or IPVS tables.
If you wish to reset iptables, you must do so manually by using the "iptables" command.
If your cluster was setup to utilize IPVS, run ipvsadm --clear (or similar)
to reset your system's IPVS tables.
The reset process does not clean your kubeconfig files and you must remove them manually.
Please, check the contents of the $HOME/.kube/config file.
[root@localhost kubernetes]# ls
manifests pki
# 重新加入
上面的 kubeadm join
5、安装CNI网络插件
master上执行,安装flannel 网络插件
下载yaml文件,网咯会有波动,可以多wget几次
wget https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml
注意,net-conf.json的Network配置 要是上面init pod-network-cidr的网段地址
vim kube-flannel.yml
安装插件
kubectl apply -f kube-flannel.yml
kubectl get pods -n kube-flannel
kubectl get nodes
出现问题
网络实际没连上
k8s flannel网络插件国内镜像docker拉取不到 docker.io/flannel/flannel:v0.25.6
解决方案:手动到github下载,然后docker构建
下载这两个 根据kube-flannel.yml里面的版本去找
flannel:v0.25.6
flannel-cni-plugin:v1.5.1-flannel2
[root@localhost flanneld]# docker import flannel-v0.25.6-linux-amd64.tar.gz flannel/flannel:v0.25.6
sha256:5c76b00ff15dfc6d452f1dcce31d7508e13363c9ab9beeddd90dd1a6204fcab8
[root@localhost flanneld]# docker import cni-plugin-flannel-linux-amd64-v1.5.1-flannel2.tgz flannel/flannel-cni-plugin:v1.5.1-flannel2
sha256:fd42d9ebb5885a5889bb0211e560b04b18dab401e3b63e777d4d1f358a847df6
构建成功会有两个包
将这两个打成tar包
[root@localhost flanneld]# docker images
REPOSITORY TAG IMAGE ID CREATED SIZE
flannel/flannel-cni-plugin v1.5.1-flannel2 fd42d9ebb588 12 minutes ago 2.54MB
flannel/flannel v0.25.6 5c76b00ff15d 12 minutes ago 42.8MB
[root@localhost flanneld]# docker save flannel/flannel:v0.25.6
cowardly refusing to save to a terminal. Use the -o flag or redirect
[root@localhost flanneld]# docker save flannel/flannel:v0.25.6 -o flannel-v0.25.6.tar
[root@localhost flanneld]# ll
总用量 55832
-rw-r--r--. 1 root root 1080975 9月 12 16:30 cni-plugin-flannel-linux-amd64-v1.5.1-flannel2.tgz
-rw-r--r--. 1 root root 13305488 9月 12 16:15 flannel-v0.25.6-linux-amd64.tar.gz
-rw-------. 1 root root 42772992 9月 12 16:55 flannel-v0.25.6.tar
-rw-r--r--. 1 root root 4345 9月 12 15:41 kube-flannel.yml
[root@localhost flanneld]# docker save flannel/flannel-cni-plugin:v1.5.1-flannel2 -o cni-plugin-flannel-linux-amd64-v1.5.1-flannel2.tar
[root@localhost flanneld]# docker images
REPOSITORY TAG IMAGE ID CREATED SIZE
flannel/flannel-cni-plugin v1.5.1-flannel2 fd42d9ebb588 14 minutes ago 2.54MB
flannel/flannel v0.25.6 5c76b00ff15d 15 minutes ago 42.8MB
[root@localhost flanneld]# ll
总用量 58336
-rw-------. 1 root root 2560512 9月 12 16:56 cni-plugin-flannel-linux-amd64-v1.5.1-flannel2.tar
-rw-r--r--. 1 root root 1080975 9月 12 16:30 cni-plugin-flannel-linux-amd64-v1.5.1-flannel2.tgz
-rw-r--r--. 1 root root 13305488 9月 12 16:15 flannel-v0.25.6-linux-amd64.tar.gz
-rw-------. 1 root root 42772992 9月 12 16:55 flannel-v0.25.6.tar
-rw-r--r--. 1 root root 4345 9月 12 15:41 kube-flannel.yml
[root@localhost flanneld]#
将tar镜像压缩包,导入到containerd的k8s.io命名空间中
[root@localhost flanneld]# ll
总用量 58336
-rw-------. 1 root root 2560512 9月 12 16:56 cni-plugin-flannel-linux-amd64-v1.5.1-flannel2.tar
-rw-r--r--. 1 root root 1080975 9月 12 16:30 cni-plugin-flannel-linux-amd64-v1.5.1-flannel2.tgz
-rw-r--r--. 1 root root 13305488 9月 12 16:15 flannel-v0.25.6-linux-amd64.tar.gz
-rw-------. 1 root root 42772992 9月 12 16:55 flannel-v0.25.6.tar
-rw-r--r--. 1 root root 4345 9月 12 15:41 kube-flannel.yml
[root@localhost flanneld]# sudo ctr -n k8s.io images import cni-plugin-flannel-linux-amd64-v1.5.1-flannel2.tar
unpacking docker.io/flannel/flannel-cni-plugin:v1.5.1-flannel2 (sha256:2e67e1ceda143a11deca57c0bd3145c9a1998d78d1084e3028c26ae6ceea233f)...done
[root@localhost flanneld]# sudo ctr -n k8s.io images import flannel-v0.25.6.tar
unpacking docker.io/flannel/flannel:v0.25.6 (sha256:7dcf8fbbc9e9acbe2e5e3e7321b74aa357a5f4246152f6539da903370fc3f999)...done
[root@localhost flanneld]#
检查是否成功
sudo ctr -n k8s.io i check | grep flannel
然后修改 kube-flannel.yml 文件
---
kind: Namespace
apiVersion: v1
metadata:
name: kube-flannel
labels:
k8s-app: flannel
pod-security.kubernetes.io/enforce: privileged
---
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
labels:
k8s-app: flannel
name: flannel
rules:
- apiGroups:
- ""
resources:
- pods
verbs:
- get
- apiGroups:
- ""
resources:
- nodes
verbs:
- get
- list
- watch
- apiGroups:
- ""
resources:
- nodes/status
verbs:
- patch
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
labels:
k8s-app: flannel
name: flannel
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: flannel
subjects:
- kind: ServiceAccount
name: flannel
namespace: kube-flannel
---
apiVersion: v1
kind: ServiceAccount
metadata:
labels:
k8s-app: flannel
name: flannel
namespace: kube-flannel
---
kind: ConfigMap
apiVersion: v1
metadata:
name: kube-flannel-cfg
namespace: kube-flannel
labels:
tier: node
k8s-app: flannel
app: flannel
data:
cni-conf.json: |
{
"name": "cbr0",
"cniVersion": "0.3.1",
"plugins": [
{
"type": "flannel",
"delegate": {
"hairpinMode": true,
"isDefaultGateway": true
}
},
{
"type": "portmap",
"capabilities": {
"portMappings": true
}
}
]
}
net-conf.json: |
{
"Network": "10.244.0.0/16",
"EnableNFTables": false,
"Backend": {
"Type": "vxlan"
}
}
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: kube-flannel-ds
namespace: kube-flannel
labels:
tier: node
app: flannel
k8s-app: flannel
spec:
selector:
matchLabels:
app: flannel
template:
metadata:
labels:
tier: node
app: flannel
spec:
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: kubernetes.io/os
operator: In
values:
- linux
hostNetwork: true
priorityClassName: system-node-critical
tolerations:
- operator: Exists
effect: NoSchedule
serviceAccountName: flannel
initContainers:
- name: install-cni-plugin
image: docker.io/flannel/flannel-cni-plugin:v1.5.1-flannel2
imagePullPolicy: Never
command:
- cp
args:
- -f
- /flannel
- /opt/cni/bin/flannel
volumeMounts:
- name: cni-plugin
mountPath: /opt/cni/bin
- name: install-cni
image: docker.io/flannel/flannel:v0.25.6
imagePullPolicy: Never
command:
- cp
args:
- -f
- /etc/kube-flannel/cni-conf.json
- /etc/cni/net.d/10-flannel.conflist
volumeMounts:
- name: cni
mountPath: /etc/cni/net.d
- name: flannel-cfg
mountPath: /etc/kube-flannel/
containers:
- name: kube-flannel
image: docker.io/flannel/flannel:v0.25.6
imagePullPolicy: Never
command:
- /opt/bin/flanneld
args:
- --ip-masq
- --kube-subnet-mgr
resources:
requests:
cpu: "100m"
memory: "50Mi"
securityContext:
privileged: false
capabilities:
add: ["NET_ADMIN", "NET_RAW"]
env:
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: EVENT_QUEUE_DEPTH
value: "5000"
volumeMounts:
- name: run
mountPath: /run/flannel
- name: flannel-cfg
mountPath: /etc/kube-flannel/
- name: xtables-lock
mountPath: /run/xtables.lock
volumes:
- name: run
hostPath:
path: /run/flannel
- name: cni-plugin
hostPath:
path: /opt/cni/bin
- name: cni
hostPath:
path: /etc/cni/net.d
- name: flannel-cfg
configMap:
name: kube-flannel-cfg
- name: xtables-lock
hostPath:
path: /run/xtables.lock
type: FileOrCreate
先删除
kubectl delete -f kube-flannel.yml
再构建
kubectl apply -f kube-flannel.yml
还是失败了,最后找了个大佬的github
编辑kube-flannel.yml
加上 m.daocloud.io/ 前缀
[root@k8smaster flanneld]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
k8smaster Ready control-plane 19h v1.28.2
[root@k8smaster flanneld]# kubectl get pods -n kube-flannel
NAME READY STATUS RESTARTS AGE
kube-flannel-ds-g8mng 1/1 Running 0 8m52s
卸载命令 kubectl delete -f kube-flannel.yml
安装calico
我这里直接 kubeadm reset 了 重新来一遍,master和node节点都reset,init的时候–pod-network-cidr=192.168.0.0/16,calico是192.168.0.0
kubectl create -f https://raw.gitmirror.com/projectcalico/calico/v3.27.2/manifests/tigera-operator.yaml
wget https://raw.gitmirror.com/projectcalico/calico/v3.27.2/manifests/custom-resources.yaml
vim custom-resources.yaml
#把里边cidr:ip 更换,根据你pod-network-cidr的参数更换
cidr: 10.244.0.0/16
构建
kubectl create -f custom-resources.yaml
查看
kubectl get pod -A
还是不行,启动不了,也是网络问题
最后还是开科技了
要等一会儿
ok了
6、集群测试
创建完nginx要等一会儿,状态为Running就是ok,kubectl get svc查看端口
[root@k8smaster calico]# kubectl create deployment nginx --image=nginx
deployment.apps/nginx created
[root@k8smaster calico]# kubectl get pods
NAME READY STATUS RESTARTS AGE
nginx-7854ff8877-h45w7 0/1 ContainerCreating 0 6s
[root@k8smaster calico]# kubectl get pods
NAME READY STATUS RESTARTS AGE
nginx-7854ff8877-h45w7 0/1 ContainerCreating 0 11s
[root@k8smaster calico]# kubectl get pods
NAME READY STATUS RESTARTS AGE
nginx-7854ff8877-h45w7 1/1 Running 0 37s
[root@k8smaster calico]# kubectl expose deployment nginx --port=80 --type=NodePort
service/nginx exposed
[root@k8smaster calico]# kubectl get svc
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
kubernetes ClusterIP 10.140.0.1 <none> 443/TCP 14m
nginx NodePort 10.140.132.100 <none> 80:30090/TCP 9s
访问测试
查看另外两个节点
[root@k8snode1 data]# kubectl get svc
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
kubernetes ClusterIP 10.140.0.1 <none> 443/TCP 17m
nginx NodePort 10.140.132.100 <none> 80:30090/TCP 3m23s
ok 没问题了
7、k8s常用命令
查看pod,service,endpoints,secret等等的状态
kubectl get 组件名 # 例如kubectl get pod 查看详细信息可以加上-o wide 其他namespace的指定 -n namespace名
创建,变更一个yaml文件内资源,也可以是目录,目录内包含一组yaml文件(实际使用中都是以yaml文件为主,直接使用命令创建pod的很少,推荐多使用yaml文件)
kubectl apply -f xxx.yaml # 例如kubectl apply -f nginx.yaml 这里是如果没有则创建,如果有则变更,比create好用
删除一个yaml文件内资源,也可以是目录,目录内包含一组yaml文件
kubectl delete -f xxx.yaml # 例如kubectl delete -f nginx.yaml
查看资源状态,比如有一组deployment内的pod没起来,一般用于pod调度过程出现的问题排查
kubectl describe pod pod名 # 先用kubectl get pod查看 有异常的复制pod名使用这个命令
查看pod日志,用于pod状态未就绪的故障排查
kubectl logs pod名 # 先用kubectl get pod查看 有异常的复制pod名使用这个命令
查看node节点或者是pod资源(cpu,内存资源)使用情况
kubectl top 组件名 # 例如kubectl top node kubectl top pod
进入pod内部
kubectl exec -ti pod名 /bin/bash # 先用kubectl get pod查看 有需要的复制pod名使用这个命令