一,环境准备
1,所有机器系统配置
1.关闭防火墙:
systemctl stop firewalld
systemctl disable firewalld
2.禁用SELinux:
setenforce 0
3.编辑文件/etc/selinux/config,将SELINUX修改为disabled,如下:
sed -i 's/SELINUX=permissive/SELINUX=disabled/' /etc/sysconfig/selinux
SELINUX=disabled
2,关闭swap
1.5之后的新规定
Kubernetes 1.8开始要求关闭系统的Swap,如果不关闭,默认配置下kubelet将无法启动。方法一,通过kubelet的启动参数–fail-swap-on=false更改这个限制。方法二,关闭系统的Swap。
swapoff -a
修改/etc/fstab文件,注释掉SWAP的自动挂载,使用free -m确认swap已经关闭。
注释掉swap分区:
sed -i 's/.*swap.*/#&/' /etc/fstab
free -m
total used free shared buff/cache available
Mem: 3935 144 3415 8 375 3518
Swap: 0 0 0
3,安装docker--三台机器都操作
yum remove docker \
docker-client \
docker-client-latest \
docker-common \
docker-latest \
docker-latest-logrotate \
docker-logrotate \
docker-selinux \
docker-engine-selinux \
docker-engine
yum install -y yum-utils device-mapper-persistent-data lvm2 git
yum-config-manager --add-repo http://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo
yum install docker-ce -y
vim /etc/docker/daemon.json
{
"exec-opts":["native.cgroupdriver=systemd"]
}
重启docker
#注:在目前k8s版本1.20以后将原来的cgroupfs驱动程序修改成了systemd来为容器做资源限制,故此需要将docker的驱动程序修改为systemd。(所有节点)
4,阿里仓库下载
vim pull.sh
#!/usr/bin/bash
docker pull registry.cn-hangzhou.aliyuncs.com/google_containers/kube-controller-manager:v1.22.2
docker pull registry.cn-hangzhou.aliyuncs.com/google_containers/kube-proxy:v1.22.2
docker pull registry.cn-hangzhou.aliyuncs.com/google_containers/kube-apiserver:v1.22.2
docker pull registry.cn-hangzhou.aliyuncs.com/google_containers/kube-scheduler:v1.22.2
docker pull registry.cn-hangzhou.aliyuncs.com/google_containers/coredns:1.8.4
docker pull registry.cn-hangzhou.aliyuncs.com/google_containers/etcd:3.5.0-0
docker pull registry.cn-hangzhou.aliyuncs.com/google_containers/pause:3.5
vim tag.sh
#!/usr/bin/bash
docker tag registry.cn-hangzhou.aliyuncs.com/google_containers/kube-controller-manager:v1.22.2 k8s.gcr.io/kube-controller-manager:v1.22.2
docker tag registry.cn-hangzhou.aliyuncs.com/google_containers/kube-proxy:v1.22.2 k8s.gcr.io/kube-proxy:v1.22.2
docker tag registry.cn-hangzhou.aliyuncs.com/google_containers/kube-apiserver:v1.22.2 k8s.gcr.io/kube-apiserver:v1.22.2
docker tag registry.cn-hangzhou.aliyuncs.com/google_containers/kube-scheduler:v1.22.2 k8s.gcr.io/kube-scheduler:v1.22.2
docker tag registry.cn-hangzhou.aliyuncs.com/google_containers/coredns:1.8.4 k8s.gcr.io/coredns/coredns:v1.8.4
docker tag registry.cn-hangzhou.aliyuncs.com/google_containers/etcd:3.5.0-0 k8s.gcr.io/etcd:3.5.0-0
docker tag registry.cn-hangzhou.aliyuncs.com/google_containers/pause:3.5 k8s.gcr.io/pause:3.5
版本号不用变
5,配置kubernetes源
cat <<EOF > /etc/yum.repos.d/kubernetes.repo
[kubernetes]
name=Kubernetes
baseurl=https://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64
enabled=1
gpgcheck=1
repo_gpgcheck=1
gpgkey=https://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg https://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpg
EOF
二,安装
1,安装kub
# 安装对应版本
yum install -y kubelet-1.22.2-0.x86_64 kubeadm-1.22.2-0.x86_64 kubectl-1.22.2-0.x86_64 ipvsadm
# 安装最新版本
yum makecache fast
yum install -y kubelet kubeadm kubectl ipvsadm
2,加载ipvs相关内核模块
vim /etc/rc.local
modprobe ip_vs
modprobe ip_vs_rr
modprobe ip_vs_wrr
modprobe ip_vs_sh
modprobe nf_conntrack_ipv4
vim /etc/rc.local
chmod +x /etc/rc.local
# 重启服务
3,配置
配置转发相关参数,否则可能会出错
cat <<EOF > /etc/sysctl.d/k8s.conf
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
vm.swappiness=0
EOF
sysctl --system #使配置生效
4,如果net.bridge.bridge-nf-call-iptables报错,加载br_netfilter模块
modprobe br_netfilter
sysctl -p /etc/sysctl.d/k8s.conf
5,查看是否加载成功
lsmod | grep ip_vs
ip_vs_sh 12688 0
ip_vs_wrr 12697 0
ip_vs_rr 12600 0
ip_vs 141092 6 ip_vs_rr,ip_vs_sh,ip_vs_wrr
nf_conntrack 133387 2 ip_vs,nf_conntrack_ipv4
libcrc32c 12644 3 xfs,ip_vs,nf_conntrack
三,配置启动kubelet(所有节点)
1.配置kubelet使用pause镜像
DOCKER_CGROUPS=`docker info |grep 'Cgroup' | awk ' NR==1 {print $3}'`
echo $DOCKER_CGROUPS
systemd
2.配置kubelet的cgroups
cat >/etc/sysconfig/kubelet<<EOF
KUBELET_EXTRA_ARGS="--cgroup-driver=$DOCKER_CGROUPS --pod-infra-container-image=registry.cn-hangzhou.aliyuncs.com/google_containers/pause-amd64:3.1"
EOF
3.配置kubelet的cgroups
cat >/etc/sysconfig/kubelet<<EOF
KUBELET_EXTRA_ARGS="--cgroup-driver=$DOCKER_CGROUPS --pod-infra-container-image=k8s.gcr.io/pause:3.5"
EOF
4.启动
systemctl daemon-reload
systemctl enable kubelet && systemctl restart kubelet
在这里使用 systemctl status kubelet,你会发现报错误信息;
10月 11 00:26:43 node1 systemd[1]: kubelet.service: main process exited, code=exited, status=255/n/a
10月 11 00:26:43 node1 systemd[1]: Unit kubelet.service entered failed state.
10月 11 00:26:43 node1 systemd[1]: kubelet.service failed.
运行 journalctl -xefu kubelet 命令查看systemd日志才发现,真正的错误是:
unable to load client CA file /etc/kubernetes/pki/ca.crt: open /etc/kubernetes/pki/ca.crt: no such file or directory
#这个错误在运行kubeadm init 生成CA证书后会被自动解决,此处可先忽略。
#简单地说就是在kubeadm init 之前kubelet会不断重启。
四,配置master节点
kubeadm init --kubernetes-version=v1.16.1 --pod-network-cidr=10.244.0.0/16 --apiserver-advertise-address=192.168.75.25 --ignore-preflight-errors=Swap
注:
apiserver-advertise-address=192.168.75.25 ---master的ip地址。
--kubernetes-version=v1.16.1 --更具具体版本进行修改
注意在检查一下swap分区是否关闭
如果报错会有版本提示,那就是有更新新版本了
[init] Using Kubernetes version: v1.16.1
[preflight] Running pre-flight checks
[WARNING IsDockerSystemdCheck]: detected "cgroupfs" as the Docker cgroup driver. The recommended driver is "systemd". Please follow the guide at https://kubernetes.io/docs/setup/cri/
[WARNING SystemVerification]: this Docker version is not on the list of validated versions: 18.03.0-ce. Latest validated version: 18.09
[preflight] Pulling images required for setting up a Kubernetes cluster
[preflight] This might take a minute or two, depending on the speed of your internet connection
[preflight] You can also perform this action in beforehand using 'kubeadm config images pull'
[kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"
[kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
[kubelet-start] Activating the kubelet service
[certs] Using certificateDir folder "/etc/kubernetes/pki"
[certs] Generating "ca" certificate and key
[certs] Generating "apiserver" certificate and key
[certs] apiserver serving cert is signed for DNS names [kub-k8s-master kubernetes kubernetes.default kubernetes.default.svc kubernetes.default.svc.cluster.local] and IPs [10.96.0.1 192.168.246.166]
[certs] Generating "apiserver-kubelet-client" certificate and key
[certs] Generating "front-proxy-ca" certificate and key
[certs] Generating "front-proxy-client" certificate and key
[certs] Generating "etcd/ca" certificate and key
[certs] Generating "etcd/server" certificate and key
[certs] etcd/server serving cert is signed for DNS names [kub-k8s-master localhost] and IPs [192.168.246.166 127.0.0.1 ::1]
[certs] Generating "etcd/peer" certificate and key
[certs] etcd/peer serving cert is signed for DNS names [kub-k8s-master localhost] and IPs [192.168.246.166 127.0.0.1 ::1]
[certs] Generating "etcd/healthcheck-client" certificate and key
[certs] Generating "apiserver-etcd-client" certificate and key
[certs] Generating "sa" key and public key
[kubeconfig] Using kubeconfig folder "/etc/kubernetes"
[kubeconfig] Writing "admin.conf" kubeconfig file
[kubeconfig] Writing "kubelet.conf" kubeconfig file
[kubeconfig] Writing "controller-manager.conf" kubeconfig file
[kubeconfig] Writing "scheduler.conf" kubeconfig file
[control-plane] Using manifest folder "/etc/kubernetes/manifests"
[control-plane] Creating static Pod manifest for "kube-apiserver"
[control-plane] Creating static Pod manifest for "kube-controller-manager"
[control-plane] Creating static Pod manifest for "kube-scheduler"
[etcd] Creating static Pod manifest for local etcd in "/etc/kubernetes/manifests"
[wait-control-plane] Waiting for the kubelet to boot up the control plane as static Pods from directory "/etc/kubernetes/manifests". This can take up to 4m0s
[apiclient] All control plane components are healthy after 24.575209 seconds
[upload-config] Storing the configuration used in ConfigMap "kubeadm-config" in the "kube-system" Namespace
[kubelet] Creating a ConfigMap "kubelet-config-1.16" in namespace kube-system with the configuration for the kubelets in the cluster
[upload-certs] Skipping phase. Please see --upload-certs
[mark-control-plane] Marking the node kub-k8s-master as control-plane by adding the label "node-role.kubernetes.io/master=''"
[mark-control-plane] Marking the node kub-k8s-master as control-plane by adding the taints [node-role.kubernetes.io/master:NoSchedule]
[bootstrap-token] Using token: 93erio.hbn2ti6z50he0lqs
[bootstrap-token] Configuring bootstrap tokens, cluster-info ConfigMap, RBAC Roles
[bootstrap-token] configured RBAC rules to allow Node Bootstrap tokens to post CSRs in order for nodes to get long term certificate credentials
[bootstrap-token] configured RBAC rules to allow the csrapprover controller automatically approve CSRs from a Node Bootstrap Token
[bootstrap-token] configured RBAC rules to allow certificate rotation for all node client certificates in the cluster
[bootstrap-token] Creating the "cluster-info" ConfigMap in the "kube-public" namespace
[addons] Applied essential addon: CoreDNS
[addons] Applied essential addon: kube-proxy
Your Kubernetes control-plane has initialized successfully!
To start using your cluster, you need to run the following as a regular user:
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
You should now deploy a pod network to the cluster.
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
https://kubernetes.io/docs/concepts/cluster-administration/addons/
Then you can join any number of worker nodes by running the following on each as root:
kubeadm join 192.168.75.25:6443 --token 93erio.hbn2ti6z50he0lqs \
--discovery-token-ca-cert-hash sha256:3bc60f06a19bd09f38f3e05e5cff4299011b7110ca3281796668f4edb29a56d9 #需要记住
=======================================================================================
上面记录了完成的初始化输出的内容,根据输出的内容基本上可以看出手动初始化安装一个Kubernetes集群所需要的关键步骤。
其中有以下关键内容:
[kubelet] 生成kubelet的配置文件”/var/lib/kubelet/config.yaml”
[certificates]生成相关的各种证书
[kubeconfig]生成相关的kubeconfig文件
[bootstraptoken]生成token记录下来,后边使用kubeadm join往集群中添加节点时会用到
配置使用kubectl
如下操作在master节点操作
[root@kub-k8s-master ~]# rm -rf $HOME/.kube
[root@kub-k8s-master ~]# mkdir -p $HOME/.kube
[root@kub-k8s-master ~]# cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
[root@kub-k8s-master ~]# chown $(id -u):$(id -g) $HOME/.kube/config
五,配置使用网络插件
在master节点操作
下载配置
# cd ~ && mkdir flannel && cd flannel
下载flannel文件由于网站被墙了,需要如下操作:
[root@k8s-master flannel]# vim /etc/hosts
199.232.68.133 raw.githubusercontent.com
[root@k8s-master flannel]# curl -O https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml
修改配置文件kube-flannel.yml:
此处的ip配置要与上面kubeadm的pod-network一致,本来就一致,不用改
net-conf.json: |
{
"Network": "10.244.0.0/16",
"Backend": {
"Type": "vxlan"
}
}
# 这里注意kube-flannel.yml这个文件里的flannel的镜像是quay.io/coreos/flannel:v0.14.0 需要提前pull下来。
# 如果Node有多个网卡的话,参考https://github.com/kubernetes/kubernetes/issues/39701
# 目前需要在kube-flannel.yml中使用--iface参数指定集群主机内网网卡的名称,否则可能会出现dns无法解析。容器无法通信的情况。
#需要将kube-flannel.yml下载到本地,
# flanneld启动参数加上--iface=<iface-name>
containers:
- name: kube-flannel
image: quay.io/coreos/flannel:v0.12.0-amd64
command:
- /opt/bin/flanneld
args:
- --ip-masq
- --kube-subnet-mgr
- --iface=ens33
- --iface=eth0
⚠️⚠️⚠️--iface=ens33 的值,是你当前的网卡,或者可以指定多网卡
# 1.12版本的kubeadm额外给node1节点设置了一个污点(Taint):node.kubernetes.io/not-ready:NoSchedule,
# 很容易理解,即如果节点还没有ready之前,是不接受调度的。可是如果Kubernetes的网络插件还没有部署的话,节点是不会进入ready状态的。
# 因此修改以下kube-flannel.yaml的内容,加入对node.kubernetes.io/not-ready:NoSchedule这个污点的容忍:
- key: beta.kubernetes.io/arch
operator: In
values:
- arm64
hostNetwork: true
tolerations:
- operator: Exists
effect: NoSchedule
- key: node.kubernetes.io/not-ready #添加如下三行---在165行左右
operator: Exists
effect: NoSchedule
serviceAccountName: flannel
启动:
# kubectl apply -f ~/flannel/kube-flannel.yml #启动完成之后需要等待一会
NAME READY STATUS RESTARTS AGE
coredns-5644d7b6d9-sm8hs 1/1 Running 0 9m18s
coredns-5644d7b6d9-vddll 1/1 Running 0 9m18s
etcd-kub-k8s-master 1/1 Running 0 8m14s
kube-apiserver-kub-k8s-master 1/1 Running 0 8m17s
kube-controller-manager-kub-k8s-master 1/1 Running 0 8m20s
kube-flannel-ds-amd64-9wgd8 1/1 Running 0 8m42s
kube-proxy-sgphs 1/1 Running 0 9m18s
kube-scheduler-kub-k8s-master 1/1 Running 0 8m10s
查看:
# kubectl get pods --namespace kube-system
# kubectl get service
# kubectl get svc --namespace kube-system
只有网络插件也安装配置完成之后,才能会显示为ready状态
六,所有node节点操作
配置node节点加入集群:
如果报错开启ip转发:
sysctl -w net.ipv4.ip_forward=1
在所有node节点操作,此命令为初始化master成功后返回的结果
kubeadm join 192.168.75.25:6443 --token 93erio.hbn2ti6z50he0lqs \
--discovery-token-ca-cert-hash sha256:3bc60f06a19bd09f38f3e05e5cff4299011b7110ca3281796668f4edb29a56d9
七,在master操作
各种检测:
1.查看pods:
[root@kub-k8s-master ~]# kubectl get pods -n kube-system
NAME READY STATUS RESTARTS AGE
coredns-5644d7b6d9-sm8hs 1/1 Running 0 39m
coredns-5644d7b6d9-vddll 1/1 Running 0 39m
etcd-kub-k8s-master 1/1 Running 0 37m
kube-apiserver-kub-k8s-master 1/1 Running 0 38m
kube-controller-manager-kub-k8s-master 1/1 Running 0 38m
kube-flannel-ds-amd64-9wgd8 1/1 Running 0 38m
kube-flannel-ds-amd64-lffc8 1/1 Running 0 2m11s
kube-flannel-ds-amd64-m8kk2 1/1 Running 0 2m2s
kube-proxy-dwq9l 1/1 Running 0 2m2s
kube-proxy-l77lz 1/1 Running 0 2m11s
kube-proxy-sgphs 1/1 Running 0 39m
kube-scheduler-kub-k8s-master 1/1 Running 0 37m
2.查看异常pod信息:
[root@kub-k8s-master ~]# kubectl describe pods kube-flannel-ds-sr6tq -n kube-system
Name: kube-flannel-ds-sr6tq
Namespace: kube-system
Priority: 0
PriorityClassName: <none>
......
Events:
Type Reason Age From Message
---- ------ ---- ---- -------
Normal Pulling 12m kubelet, node2 pulling image "registry.cn-shanghai.aliyuncs.com/gcr-k8s/flannel:v0.10.0-amd64"
Normal Pulled 11m kubelet, node2 Successfully pulled image "registry.cn-shanghai.aliyuncs.com/gcr-k8s/flannel:v0.10.0-amd64"
Normal Created 11m kubelet, node2 Created container
Normal Started 11m kubelet, node2 Started container
Normal Created 11m (x4 over 11m) kubelet, node2 Created container
Normal Started 11m (x4 over 11m) kubelet, node2 Started container
Normal Pulled 10m (x5 over 11m) kubelet, node2 Container image "registry.cn-shanghai.aliyuncs.com/gcr-k8s/flannel:v0.10.0-amd64" already present on machine
Normal Scheduled 7m15s default-scheduler Successfully assigned kube-system/kube-flannel-ds-sr6tq to node2
Warning BackOff 7m6s (x23 over 11m) kubelet, node2 Back-off restarting failed container
3.遇到这种情况直接 删除异常pod:
[root@kub-k8s-master ~]# kubectl delete pod kube-flannel-ds-sr6tq -n kube-system
pod "kube-flannel-ds-sr6tq" deleted
4.查看节点:
[root@kub-k8s-master ~]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
kub-k8s-master Ready master 43m v1.16.1
kub-k8s-node1 Ready <none> 6m46s v1.16.1
kub-k8s-node2 Ready <none> 6m37s v1.16.1
到此集群配置完成
重新生成token
kubeadm 生成的token过期后,集群增加节点
通过kubeadm初始化后,都会提供node加入的token:
You should now deploy a pod network to the cluster.
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
https://kubernetes.io/docs/concepts/cluster-administration/addons/
You can now join any number of machines by running the following on each node
as root:
kubeadm join 192.168.246.166:6443 --token n38l80.y2icehgzsyuzkthi \
--discovery-token-ca-cert-hash sha256:5fb6576ef82b5655dee285e0c93432aee54d38779bc8488c32f5cbbb90874bac
默认token的有效期为24小时,当过期之后,该token就不可用了。
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
解决方法:
1. 重新生成新的token:
[root@kub-k8s-master]# kubeadm token create
kiyfhw.xiacqbch8o8fa8qj
[root@kub-k8s-master]# kubeadm token list
TOKEN TTL EXPIRES USAGES DESCRIPTION EXTRA GROUPS
gvvqwk.hn56nlsgsv11mik6 <invalid> 2018-10-25T14:16:06+08:00 authentication,signing <none> system:bootstrappers:kubeadm:default-node-token
kiyfhw.xiacqbch8o8fa8qj 23h 2018-10-27T06:39:24+08:00 authentication,signing <none> system:bootstrappers:kubeadm:default-node-token
2. 获取ca证书sha256编码hash值:
[root@kub-k8s-master]# openssl x509 -pubkey -in /etc/kubernetes/pki/ca.crt | openssl rsa -pubin -outform der 2>/dev/null | openssl dgst -sha256 -hex | sed 's/^.* //'
5417eb1b68bd4e7a4c82aded83abc55ec91bd601e45734d6aba85de8b1ebb057
3. 节点加入集群:
kubeadm join 18.16.202.35:6443 --token kiyfhw.xiacqbch8o8fa8qj --discovery-token-ca-cert-hash sha256:5417eb1b68bd4e7a4c82aded83abc55ec91bd601e45734d6aba85de8b1ebb057
几秒钟后,您应该注意到kubectl get nodes在主服务器上运行时输出中的此节点。
上面的方法比较繁琐,一步到位:
[root@kub-k8s-master ~]# kubeadm token create --print-join-command
第二种方法:
[root@kub-k8s-master ~]# token=$(kubeadm token generate)
kubeadm token create $token --print-join-command --ttl=0
然后在node节点执行
[root@kub-k8s-node1 ~]# kubeadm reset
[root@kub-k8s-node1 ~]# 执行生成得token
八,部署Harbor仓库
下面一步需要翻墙(用的1.8.0版本的harbor)
[root@kub-k8s-master ~]# wget https://storage.googleapis.com/harbor-releases/release-1.8.0/harbor-offline-installer-v1.8.0.tgz
[root@kub-k8s-master ~]# yum -y install lrzsz
[root@kub-k8s-master ~]# curl -L https://github.com/docker/compose/releases/download/1.22.0/docker-compose-`uname -s`-`uname -m` -o /usr/local/bin/docker-compose
[root@kub-k8s-master ~]# chmod +x /usr/local/bin/docker-compose
[root@kub-k8s-master ~]# tar xf harbor-offline-installer-v1.8.0.tgz
[root@kub-k8s-master ~]# cd harbor
http访问方式的配置:
[root@kub-k8s-master harbor]# vim harbor.yml #主机名要可以解析(需要部署dns服务器,用/etc/hosts文件没有用),如果不可以解析,可以使用IP地址,需要修改的内容如下
hostname: 192.168.75.25
[root@kub-k8s-master harbor]# ./install.sh #需要等待下载镜像
浏览器访问测试:
http://192.168.75.25