一、环境
1、修改hostname
hostnamectl set-hostname master
hostnamectl set-hostname node
2、修改hosts文件
cat > /etc/hosts <<EOF
127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4
::1 localhost localhost.localdomain localhost6 localhost6.localdomain6
192.168.1.1 master
192.168.1.2 node
EOF
3、关闭SELinux和交换分区
#关闭SELinux
setenforce 0
sed -i 's#SELINUX=enforcing#SELINUX=disabled#g' /etc/selinux/config
#关闭交换分区
sed -ri 's/.*swap.*/#&/' /etc/fstab
swapoff -a && sysctl -w vm.swappiness=0
cat /etc/fstab
# /dev/mapper/centos-swap swap swap defaults 0 0
4、centos7.6建议升级内核到4.18版本以上
#添加启用源
yum install https://www.elrepo.org/elrepo-release-7.el7.elrepo.noarch.rpm -y
sed -i "s@mirrorlist@#mirrorlist@g" /etc/yum.repos.d/elrepo.repo
# 如果不是阿里云服务器选前两个,是阿里云服务器可以用内网源
sed -i "s@elrepo.org/linux@mirrors.tuna.tsinghua.edu.cn/elrepo@g" /etc/yum.repos.d/elrepo.repo //清华内核源
sed -i "s@elrepo.org/linux@mirrors.aliyun.com/elrepo@g" /etc/yum.repos.d/elrepo.repo //阿里内核源
sed -i "s@elrepo.org/linux@mirrors.cloud.aliyuncs.com/elrepo@g" /etc/yum.repos.d/elrepo.repo //阿里内网内核源
yum --disablerepo="*" --enablerepo="elrepo-kernel" list available
# 安装最新的内核
# 稳定版kernel-ml 更新长期维护版本kernel-lt
yum -y --enablerepo=elrepo-kernel install kernel-ml
# 查看已安装那些内核
rpm -qa | grep kernel
# 查看默认内核
grubby --default-kernel
# 若不是最新的使用命令设置
grubby --set-default $(ls /boot/vmlinuz-* | grep elrepo)
# 重启生效
reboot
5、安装ipvsadm
yum install ipvsadm ipset sysstat conntrack libseccomp -y
cat >> /etc/modules-load.d/ipvs.conf <<EOF
ip_vs
ip_vs_rr
ip_vs_wrr
ip_vs_sh
nf_conntrack
ip_tables
ip_set
xt_set
ipt_set
ipt_rpfilter
ipt_REJECT
ipip
EOF
systemctl restart systemd-modules-load.service
lsmod | grep -e ip_vs -e nf_conntrack
ip_vs_sh 16384 0
ip_vs_wrr 16384 0
ip_vs_rr 16384 0
ip_vs 200704 6 ip_vs_rr,ip_vs_sh,ip_vs_wrr
nf_conntrack 188416 1 ip_vs
nf_defrag_ipv6 24576 2 nf_conntrack,ip_vs
nf_defrag_ipv4 16384 1 nf_conntrack
libcrc32c 16384 2 nf_conntrack,ip_vs
二、部署容器运行时Containerd
1、下载安装包
#1.Containerd二进制包
#github二进制包下载地址:https://github.com/containerd/containerd/releases
wget https://proxy.zyun.vip/https://github.com/containerd/containerd/releases/download/v1.7.0/cri-containerd-cni-1.7.0-linux-amd64.tar.gz
#2.安装网络插件
#github下载地址:https://github.com/opencontainers/runc
wget https://proxy.zyun.vip/https://github.com/opencontainers/runc/releases/download/v1.1.5/runc.amd64
如https://proxy.zyun.vip无法解析,可以换其他的加速站
https://gh.api.99988866.xyz
https://gh.con.sh
https://gh.ddlc.top
https://gh2.yanqishui.work
https://ghdl.feizhuqwq.cf
https://ghproxy.com
https://ghps.cc
https://git.xfj0.cn
https://github.91chi.fun
2、安装Containerd作为Runtime
[root@master ~]# tar -zxvf cri-containerd-cni-1.7.0-linux-amd64.tar.gz -C /
cri-containerd.DEPRECATED.txt
etc/
etc/systemd/
etc/systemd/system/
etc/systemd/system/containerd.service
etc/crictl.yaml
etc/cni/
etc/cni/net.d/
etc/cni/net.d/10-containerd-net.conflist
usr/
usr/local/
usr/local/sbin/
usr/local/sbin/runc
usr/local/bin/
usr/local/bin/crictl
usr/local/bin/containerd-shim-runc-v1
usr/local/bin/critest
usr/local/bin/containerd
usr/local/bin/containerd-shim-runc-v2
usr/local/bin/containerd-shim
usr/local/bin/ctd-decoder
usr/local/bin/ctr
usr/local/bin/containerd-stress
opt/
opt/containerd/
opt/containerd/cluster/
opt/containerd/cluster/version
opt/containerd/cluster/gce/
opt/containerd/cluster/gce/cloud-init/
opt/containerd/cluster/gce/cloud-init/master.yaml
opt/containerd/cluster/gce/cloud-init/node.yaml
opt/containerd/cluster/gce/cni.template
opt/containerd/cluster/gce/env
opt/containerd/cluster/gce/configure.sh
opt/cni/
opt/cni/bin/
opt/cni/bin/tuning
opt/cni/bin/static
opt/cni/bin/macvlan
opt/cni/bin/vrf
opt/cni/bin/sbr
opt/cni/bin/bridge
opt/cni/bin/firewall
opt/cni/bin/host-device
opt/cni/bin/host-local
opt/cni/bin/bandwidth
opt/cni/bin/loopback
opt/cni/bin/vlan
opt/cni/bin/dhcp
opt/cni/bin/ptp
opt/cni/bin/dummy
opt/cni/bin/portmap
opt/cni/bin/ipvlan
3、配置Containerd所需的模块
cat <<EOF | sudo tee /etc/modules-load.d/containerd.conf
overlay
br_netfilter
EOF
systemctl restart systemd-modules-load.service
4、配置Containerd所需的内核
cat <<EOF | sudo tee /etc/sysctl.d/99-kubernetes-cri.conf
net.bridge.bridge-nf-call-iptables = 1
net.ipv4.ip_forward = 1
net.bridge.bridge-nf-call-ip6tables = 1
EOF
# 加载内核
sysctl --system
5、修改Containerd的配置文件
# 创建默认配置文件
mkdir -p /etc/containerd
containerd config default | tee /etc/containerd/config.toml
# 修改Containerd的配置文件
sed -i "s#SystemdCgroup\ \=\ false#SystemdCgroup\ \=\ true#g" /etc/containerd/config.toml
cat /etc/containerd/config.toml | grep SystemdCgroup
sed -i "s#registry.k8s.io#registry.aliyuncs.com/google_containers#g" /etc/containerd/config.toml
cat /etc/containerd/config.toml | grep sandbox_image
6、启动并设置为开机启动
systemctl enable --now containerd
systemctl restart containerd
# 打印出版本信息
[root@master ~]# crictl version
Version: 0.1.0
RuntimeName: containerd
RuntimeVersion: v1.7.0
RuntimeApiVersion: v1
7、 安装runc
install -m 755 runc.amd64 /usr/local/sbin/runc
cp -p /usr/local/sbin/runc /usr/local/bin/runc
cp -p /usr/local/sbin/runc /usr/bin/runc
#下载高于2.4以上的包
yum -y install http://rpmfind.net/linux/centos/8-stream/BaseOS/x86_64/os/Packages/libseccomp-2.5.1-1.el8.x86_64.rpm
#查看当前版本
[root@master ~]# rpm -qa | grep libseccomp
libseccomp-2.5.1-1.el8.x86_64
三、使用kubeadm部署Kubernetes
1、配置源
注:如下为阿里内网源,外网源将http://mirrors.cloud.aliyuncs.com改为https://mirrors.aliyun.com
cat <<EOF > /etc/yum.repos.d/kubernetes.repo
[kubernetes]
name=Kubernetes
baseurl=http://mirrors.cloud.aliyuncs.com/kubernetes/yum/repos/kubernetes-el7-x86_64
enabled=1
gpgcheck=1
repo_gpgcheck=0
gpgkey=http://mirrors.cloud.aliyuncs.com/kubernetes/yum/doc/yum-key.gpg
http://mirrors.cloud.aliyuncs.com/kubernetes/yum/doc/rpm-package-key.gpg
EOF
2、安装kubeadm、kubelet、kubectl
#将软件包在本地索引缓存
yum makecache fast
#查看当前库支持k8s版本
yum list kubeadm --showduplicates | sort -r
#安装对应的版本
yum install kubelet-1.26.0-0 kubeadm-1.26.0-0 kubectl-1.26.0-0 -y
3、使用kubeadm init初始化集群
#启动kubelet
systemctl enable kubelet.service
#打印集群初始化默认的使用的配置
kubeadm config print init-defaults --component-configs
#例:(用第一个就行)
kubeadm config print init-defaults --component-configs KubeletConfiguration > kubeadm.yaml
kubeadm config print init-defaults --component-configs KubeProxyConfiguration > kubeadm.yaml
#打印 kubeadm 列出拉取镜像列表
kubeadm config images list
kubeadm config images pull
--image-repository string 默认值:"registry.k8s.io" #选择用于拉取控制平面镜像的容器仓库
kubeadm config images pull --image-repository registry.aliyuncs.com/google_containers
#还原kubeadm配置(非必须执行,出错在执行)
kubeadm reset
#使用kubeadm初始化集群
kubeadm init --config kubeadm.yaml
#配置使用kubectl访问集群
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
#检查是否成功
[root@master ~]# kubectl get cs,node
Warning: v1 ComponentStatus is deprecated in v1.19+
NAME STATUS MESSAGE ERROR
componentstatus/scheduler Healthy ok
componentstatus/controller-manager Healthy ok
componentstatus/etcd-0 Healthy {"health":"true","reason":""}
NAME STATUS ROLES AGE VERSION
node/master Ready control-plane 2m40s v1.26.0
四、node加入集群
#初始化完成后生成的加入token记录
kubeadm join 1.2.3.4:6443 --token abcdef.012345678xxxxxx \
--discovery-token-ca-cert-hash sha256:da64abf2351fe608aa6b5d4a91a60da7e805338581039e9a917ac6f626xxxxx
#加入集群所需的命令和凭证
kubeadm token create --print-join-command
五、安装组件
1、安装网络插件
下载地址:
curl -LJO https://github.com/projectcalico/calico/raw/release-v3.25/manifests/calico.yaml
wget https://docs.projectcalico.org/manifests/calico.yaml -O calico.yaml
curl -LO https://github.com/projectcalico/calico/raw/v3.25.1/manifests/calico.yaml
#查看需要安装的镜像
[root@master ~]# cat calico.yaml | grep image: | sort | uniq
image: docker.io/calico/cni:v3.25.0
image: docker.io/calico/kube-controllers:v3.25.0
image: docker.io/calico/node:v3.25.0
#可以手动拉取对应镜像
crictl pull docker.io/calico/cni:v3.25.0
crictl pull docker.io/calico/node:v3.25.0
crictl pull docker.io/calico/kube-controllers:v3.25.0
#批量拉取镜像
cat > calico_images.txt << EOF
docker.io/calico/cni:v3.25.0
docker.io/calico/node:v3.25.0
docker.io/calico/kube-controllers:v3.25.0
EOF
while read line; do crictl pull $line; done < calico_images.txt
#上述命令将会循环读取 images.txt 文件中的每一行,然后使用 crictl pull 命令拉取对应的镜像
[root@master ~]# while read line; do crictl pull $line; done < calico_images.txt
Image is up to date for sha256:d70a5947d57e5ab3340d126a38e6ae51bd9e8e0b342daa2012e78d8868bed5b7
Image is up to date for sha256:08616d26b8e74867402274687491e5978ba4a6ded94e9f5ecc3e364024e5683e
Image is up to date for sha256:5e785d005ccc1ab22527a783835cf2741f6f5f385a8956144c661f8c23ae9d78
[root@master ~]# crictl images | grep calico
docker.io/calico/cni v3.25.0 d70a5947d57e5 88MB
docker.io/calico/kube-controllers v3.25.0 5e785d005ccc1 31.3MB
docker.io/calico/node v3.25.0 08616d26b8e74 87.2MB
#安装calico.yaml
[root@master ~]# kubectl create -f calico.yaml
[root@master ~]# kubectl -nkube-system get po | grep calico
calico-kube-controllers-57b57c56f-rrk2q 1/1 Running 0 48s
calico-node-lr745 1/1 Running 0 48s
2、安装监控软件
下载地址:
wget https://github.com/kubernetes-sigs/metrics-server/releases/latest/download/components.yaml
#查看需要安装的镜像
[root@master ~]# cat components.yaml | grep image:
image: registry.k8s.io/metrics-server/metrics-server:v0.6.3
#拉取镜像(建议用境外服务器拉取后打包scp传输)
注:此处是境外服务器命令,非master命令
docker save -o registry.k8s.io_metrics-server_v0_6_3.tar registry.k8s.io/metrics-server/metrics-server:v0.6.3
#此次需要配置免密登录
scp registry.k8s.io_metrics-server_v0_6_3.tar root@master:/root/
#导入镜像
ctr -n=k8s.io image import registry.k8s.io_metrics-server_v0_6_3.tar
#查看镜像
[root@master ~]# crictl images | grep metrics
registry.k8s.io/metrics-server/metrics-server v0.6.3 817bbe3f2e517 70.3MB
注:此处直接用官方yaml安装会存在两个报错,需要修改下yaml文件。
如下为两种解决方案:
报错1:master存在污点,如果没加入node节点,会导致metrics-server无法调度
#查看安装的服务
[root@master ~]# kubectl -nkube-system get po | grep metrics
metrics-server-6f6cdbf67d-bbntc 0/1 Pending 0 12m
#查看报错事件
[root@master ~]# kubectl -nkube-system describe po metrics-server-6f6cdbf67d-bbntc | sed -n '/^Events:/,$p'
Events:
Type Reason Age From Message
---- ------ ---- ---- -------
Warning FailedScheduling 2m17s (x3 over 12m) default-scheduler 0/1 nodes are available: 1 node(s) had untolerated taint {node-role.kubernetes.io/control-plane: }. preemption: 0/1 nodes are available: 1 Preemption is not helpful for scheduling..
# master存在污点,需要在deployment增加容忍
kubectl -nkube-system patch pod metrics-server-6f6cdbf67d-bbntc -p '{"spec":{"tolerations":[{"key":"node-role.kubernetes.io/control-plane", "operator":"Exists", "effect":"NoSchedule"}]}}'
报错2:Kubelet证书需要由群集证书颁发机构签名(或可以禁用证书验证,通过对Metrics Server配置参数–Kubelet-insecure-tls不安全)
Events:
Type Reason Age From Message
---- ------ ---- ---- -------
Normal Scheduled 50s default-scheduler Successfully assigned kube-system/metrics-server-5fd88cf44c-qtvvq to master
Normal Pulled 49s kubelet Container image "registry.k8s.io/metrics-server/metrics-server:v0.6.3" already present on machine
Normal Created 49s kubelet Created container metrics-server
Normal Started 49s kubelet Started container metrics-server
Warning Unhealthy 10s (x2 over 20s) kubelet Readiness probe failed: HTTP probe failed with statuscode: 500
在 kube-system 命名空间中编辑 metrics-server 的部署,使用以下命令:
kubectl -n kube-system edit deployment metrics-server
打开 metrics-server 部署的配置文件。在其中找到 spec.template.spec.containers[0].args 字段,并在其中添加 --kubelet-insecure-tls 参数。应该得到类似以下这样的一段代码:
spec:
template:
spec:
containers:
- name: metrics-server
image: k8s.gcr.io/metrics-server/metrics-server:v0.5.0
args:
- --kubelet-insecure-tls <--- 新增的参数
- --kubelet-preferred-address-types=InternalIP,ExternalIP,Hostname
- --metric-resolution=30s
如下为修改后完整的metrics-server.yaml文件
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
k8s-app: metrics-server
name: metrics-server
namespace: kube-system
spec:
selector:
matchLabels:
k8s-app: metrics-server
strategy:
rollingUpdate:
maxUnavailable: 0
template:
metadata:
labels:
k8s-app: metrics-server
spec:
containers:
- args:
- --cert-dir=/tmp
- --secure-port=4443
- --kubelet-preferred-address-types=InternalIP,ExternalIP,Hostname
- --kubelet-use-node-status-port
- --metric-resolution=15s
- --kubelet-insecure-tls
image: registry.k8s.io/metrics-server/metrics-server:v0.6.3
imagePullPolicy: IfNotPresent
livenessProbe:
failureThreshold: 3
httpGet:
path: /livez
port: https
scheme: HTTPS
periodSeconds: 10
name: metrics-server
ports:
- containerPort: 4443
name: https
protocol: TCP
readinessProbe:
failureThreshold: 3
httpGet:
path: /readyz
port: https
scheme: HTTPS
initialDelaySeconds: 20
periodSeconds: 10
resources:
requests:
cpu: 100m
memory: 200Mi
securityContext:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 1000
volumeMounts:
- mountPath: /tmp
name: tmp-dir
nodeSelector:
kubernetes.io/os: linux
priorityClassName: system-cluster-critical
serviceAccountName: metrics-server
volumes:
- emptyDir: {}
name: tmp-dir
tolerations:
- effect: NoSchedule
key: node-role.kubernetes.io/control-plane
operator: Exists
查看输出结果:
[root@master ~]# kubectl get po -nkube-system | grep metrics
metrics-server-6b7f7bf5cb-ht5hh 1/1 Running 0 100s
[root@master ~]# kubectl top node
NAME CPU(cores) CPU% MEMORY(bytes) MEMORY%
master 116m 2% 1728Mi 11%