1. 节点disable
kubectl cordon node-host-name |
2. 操作系统
开启cgroupv2
cat <<EOF | sudo tee /etc/modules-load.d/containerd.conf overlay br_netfilter EOF sudo modprobe overlay sudo modprobe br_netfilter # 设置必需的 sysctl 参数,这些参数在重新启动后仍然存在。 cat <<EOF | sudo tee /etc/sysctl.d/ 99 -kubernetes-cri.conf net.bridge.bridge-nf-call-iptables = 1 net.ipv4.ip_forward = 1 net.bridge.bridge-nf-call-ip6tables = 1 EOF # 应用 sysctl 参数而无需重新启动 sudo sysctl --system # 更新grub update-grub |
containerd安装
# 卸载docker等 apt remove docker docker-ce docker-ce-cli containerd.io # 下载 https: //github.com/containerd/containerd/releases/download/v1.5.7/containerd-1.5.7-linux-amd64.tar.gz tar zxvf containerd- 1.5 . 7 -linux-amd64.tar.gz cp bin/* /usr/local/bin containerd config default | sudo tee /etc/containerd/config.toml |
配置订正
containerd配置
.... [plugins. "io.containerd.grpc.v1.cri" .registry] config_path = "/etc/containerd" |
镜像registry配置【自己的私有镜像】
containerd systemd配置
# /lib/systemd/system/containerd.service [Unit] Description=containerd container runtime Documentation=https: //containerd.io After=network.target local-fs.target [Service] ExecStartPre=-/sbin/modprobe overlay ExecStart=/usr/local/bin/containerd Type=notify Delegate=yes KillMode=process Restart=always RestartSec= 5 # Having non-zero Limit*s causes performance problems due to accounting overhead # in the kernel. We recommend using cgroups to do container-local accounting. LimitNPROC=infinity LimitCORE=infinity LimitNOFILE= 1048576 # Comment TasksMax if your systemd version does not supports it. # Only systemd 226 and above support this version. TasksMax=infinity OOMScoreAdjust=- 999 [Install] WantedBy=multi-user.target |
TODO
增加nvidia runtime支持
+++ /etc/containerd/config.toml 2020 - 12 - 18 18 : 23 : 38.137796223 + 0000 @@ - 94 , 6 + 94 , 15 @@ privileged_without_host_devices = false base_runtime_spec = "" [plugins. "io.containerd.grpc.v1.cri" .containerd.runtimes.runc.options] + SystemdCgroup = true + [plugins. "io.containerd.grpc.v1.cri" .containerd.runtimes.nvidia] + privileged_without_host_devices = false + runtime_engine = "" + runtime_root = "" + runtime_type = "io.containerd.runc.v1" + [plugins. "io.containerd.grpc.v1.cri" .containerd.runtimes.nvidia.options] + BinaryName = "/usr/bin/nvidia-container-runtime" + SystemdCgroup = true [plugins. "io.containerd.grpc.v1.cri" .cni] bin_dir = "/opt/cni/bin" conf_dir = "/etc/cni/net.d" |
3. 观测containerd是否正常
4. 订正kubelet配置
# /var/lib/kubelet/kubeadm-flags.env 增加以下配置 KUBELET_EXTRA_ARGS= "--container-runtime=remote --container-runtime-endpoint=unix:///run/containerd/containerd.sock" |
5.重启kubelet和containerd
6. 观测node 是否ready
参考资料:
Installation Guide — NVIDIA Cloud Native Technologies documentation
容器运行时 | Kubernetes
https://gvisor.dev/docs/user_guide/containerd/quick_start/