1、k8s高可用集群架构
2、实验环境部署准备
清理环境或者准备7台全新虚拟机
这里以清理之前的实验环境为例
1.保留仓库不动(server1),并增加k8s仓库
从阿里云拉取所需镜像及版本,并上传至k8s仓库
[root@server1 harbor]# docker pull registry.aliyuncs.com/google_containers/etcd:3.4.13-0
[root@server1 harbor]# docker pull registry.aliyuncs.com/google_containers/pause:3.2
[root@server1 harbor]# docker pull registry.aliyuncs.com/google_containers/kube-scheduler:v1.20.0
[root@server1 harbor]# docker pull registry.aliyuncs.com/google_containers/kube-apiserver:v1.20.0
[root@server1 harbor]# docker pull registry.aliyuncs.com/google_containers/kube-proxy:v1.20.0
[root@server1 harbor]# docker pull registry.aliyuncs.com/google_containers/coredns:1.7.0
[root@server1 harbor]# docker pull registry.aliyuncs.com/google_containers/kube-controller-manager:v1.20.0
2.清理之前server2,3,4的残留环境
##删除server2、3、4上面所有的镜像,然后从本地仓库部署k8s需要的镜像
[root@server2 ~]# docker rmi `docker images | grep -v ^REPOSITORY | awk '{print $1":"$2}'`
[root@server2 kubeapps]# kubectl -n kube-system get pod | grep coredns |awk '{system("kubectl -n kube-system delete pod "$1"")}'
pod "coredns-7f89b7bc75-f5jhv" deleted
pod "coredns-7f89b7bc75-qkqgm" deleted
3.移除k8s节点并初始化每个节点,最后初始化master节点
[root@server2 kubeapps]# kubectl delete nodes server3
node "server3" deleted
[root@server2 kubeapps]# kubectl delete nodes server4
node "server4" deleted
3.1 节点并初始化并重启下该节点
[root@server3 ~]# kubeadm reset
[reset] WARNING: Changes made to this host by 'kubeadm init' or 'kubeadm join' will be reverted.
[reset] Are you sure you want to proceed? [y/N]: y
[root@server4 ~]# kubeadm reset
[reset] WARNING: Changes made to this host by 'kubeadm init' or 'kubeadm join' will be reverted.
[reset] Are you sure you want to proceed? [y/N]: y
3.2 最后初始化master节点,初始化完成重启下
[root@server2 kubeapps]# kubeadm reset
[root@server2 kubeapps]# reboot
4. 配置三台新虚拟机server8,server9,server10。
[root@haojin ~]# cd /var/lib/libvirt/images/
[root@haojin images]# ls
rhel7.6.qcow2 server1 server2 server3 server4 server5
[root@haojin images]# qemu-img create -f qcow2 -b rhel7.6.qcow2 server8
Formatting 'server8', fmt=qcow2 size=21474836480 backing_file=rhel7.6.qcow2 cluster_size=65536 lazy_refcounts=off refcount_bits=16
[root@haojin images]# qemu-img create -f qcow2 -b rhel7.6.qcow2 server9
Formatting 'server9', fmt=qcow2 size=21474836480 backing_file=rhel7.6.qcow2 cluster_size=65536 lazy_refcounts=off refcount_bits=16
[root@haojin images]# qemu-img create -f qcow2 -b rhel7.6.qcow2 server10
Formatting 'server9', fmt=qcow2 size=21474836480 backing_file=rhel7.6.qcow2 cluster_size=65536 lazy_refcounts=off refcount_bits=16
1.保留仓库不动(server1),并增加k8s仓库
2.清理之前server2,3,4的残留环境
后台(UI)删除
3.移除k8s节点并初始化每个节点
- 配置三台新虚拟机server8,server9,server10
3、k8s集群部署
3.1 server8安装部署haproxy做负载均衡
1.配置仓库
[root@server8 ~]# vim /etc/yum.repos.d/haojin.repo
[root@server8 ~]# cat /etc/yum.repos.d/haojin.repo
[rhel7.6]
name=rhel7.6
baseurl=http://172.25.200.250/rhel7.6/
gpgcheck=0
[HighAvailability]
name="HighAvailability"
baseurl=http://172.25.200.250/rhel7.6/addons/HighAvailability
gpgcheck=0
[ResilientStorage]
name="ResilientStorage"
baseurl=http://172.25.200.250/rhel7.6/addons/ResilientStorage
gpgcheck=0
2.安装haproxy
[root@server8 ~]# yum repolist all
[root@server8 ~]# yum install -y haproxy
[root@server8 ~]# cd /etc/haproxy/
[root@server8 haproxy]# ls
haproxy.cfg
[root@server8 haproxy]# vim haproxy.cfg
3.启动haproxy
[root@server8 haproxy]# systemctl enable --now haproxy.service
[root@server8 haproxy]# systemctl status haproxy.service
[root@server8 haproxy]# netstat -antlp ##查看6443端口
4.设置一个虚拟IP
[root@server8 haproxy]# ip addr add 172.25.200.100/24 dev eth0
[root@server8 haproxy]# ip addr show eth0
配置yum.repo
vim haproxy.conf
3.2 对server2的操作
[root@server2 ~]# kubeadm config print init-defaults > kubeadm-init.yaml
[root@server2 ~]# vim kubeadm-init.yaml
[root@server2 ~]# cat kubeadm-init.yaml
apiVersion: kubeadm.k8s.io/v1beta2
bootstrapTokens:
- groups:
- system:bootstrappers:kubeadm:default-node-token
token: abcdef.0123456789abcdef
ttl: 24h0m0s
usages:
- signing
- authentication
kind: InitConfiguration
localAPIEndpoint:
advertiseAddress: 172.25.200.2
bindPort: 6443
nodeRegistration:
criSocket: /var/run/dockershim.sock
name: server2
taints:
- effect: NoSchedule
key: node-role.kubernetes.io/master
---
apiServer:
timeoutForControlPlane: 4m0s
apiVersion: kubeadm.k8s.io/v1beta2
certificatesDir: /etc/kubernetes/pki
clusterName: kubernetes
controlPlaneEndpoint: "172.25.200.100:6443"
controllerManager: {}
dns:
type: CoreDNS
etcd:
local:
dataDir: /var/lib/etcd
imageRepository: reg.westos.org/k8s
kind: ClusterConfiguration
kubernetesVersion: v1.20.0
networking:
dnsDomain: cluster.local
serviceSubnet: 10.96.0.0/12
podSubnet: 10.244.0.0/16
scheduler: {}
---
apiVersion: kubeproxy.config.k8s.io/v1alpha1
kind: KubeProxyConfiguration
mode: ipvs
[root@server2 ~]# kubeadm config images list --config kubeadm-init.yaml
##列出清单中所需镜像,可以对照仓库,增改仓库,满足这里的需求
reg.westos.org/kube-apiserver:v1.20.0
reg.westos.org/kube-controller-manager:v1.20.0
reg.westos.org/kube-scheduler:v1.20.0
reg.westos.org/kube-proxy:v1.20.0
reg.westos.org/pause:3.2
reg.westos.org/etcd:3.4.13-0
reg.westos.org/coredns:1.7.0
## 初始化
[root@server2 ~]# kubeadm init --config kubeadm-init.yaml --upload-certs
[init] Using Kubernetes version: v1.20.0
To start using your cluster, you need to run the following as a regular user:
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
Alternatively, if you are the root user, you can run:
export KUBECONFIG=/etc/kubernetes/admin.conf
You can now join any number of the control-plane node running the following command on each as root:
kubeadm join 172.25.200.100:6443 --token abcdef.0123456789abcdef \
--discovery-token-ca-cert-hash sha256:c07a9cf7eec1a3475e2d4f9076a41ca21aa7a5413e661ca8c3a9c3d6ef1bbb68 \
--control-plane --certificate-key 702f0ed8bbd53d22e5e2975315c8929594992e395df7964d2fdbb23e23fa7fea
Then you can join any number of worker nodes by running the following on each as root:
kubeadm join 172.25.200.100:6443 --token abcdef.0123456789abcdef \
--discovery-token-ca-cert-hash sha256:c07a9cf7eec1a3475e2d4f9076a41ca21aa7a5413e661ca8c3a9c3d6ef1bbb68
[root@server2 ~]# export KUBECONFIG=/etc/kubernetes/admin.conf
[root@server2 ~]# kubectl get node
NAME STATUS ROLES AGE VERSION
server2 Ready control-plane,master 2m3s v1.20.4
这里地址改成reg.westos.org/k8s
查看镜像,并开始初始化
3.3 把server3、4加入主节点(节点扩容,都是主节点)
server3、4操作一样,如果这里报错,最简单的方法kubeadm reset一下该节点
加入节点的命令在server2 kubeadm init --config kubeadm-init.yaml --upload-certs后有提示
[root@server3 ~]# kubeadm join 172.25.200.100:6443 --token abcdef.0123456789abcdef --discovery-token-ca-cert-hash sha256:ca9101419f0ea1cf94d764c5214f432bdab68a3e5ea0094a502464afd2f1aa8b --control-plane --certificate-key 5ae5fb71a5aa4293964d87dcc5db9dff388c6c954309da1f9a0c93169c915348
This node has joined the cluster and a new control plane instance was created:
* Certificate signing request was sent to apiserver and approval was received.
* The Kubelet was informed of the new secure connection details.
* Control plane (master) label and taint were applied to the new node.
* The Kubernetes control plane instances scaled up.
* A new etcd member was added to the local/stacked etcd cluster.
To start administering your cluster from this node, you need to run the following as a regular user:
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
Run 'kubectl get nodes' to see this node join the cluster.
[root@server3 ~]# mkdir -p $HOME/.kube
[root@server3 ~]# cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
[root@server3 ~]# chown $(id -u):$(id -g) $HOME/.kube/config
[root@server3 ~]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
server2 Ready control-plane,master 26m v1.20.4
server3 Ready control-plane,master 51s v1.20.4
server4 Ready control-plane,master 12m v1.20.4
成功
3.4 对server9的操作-作为唯一node节点测试
需要安装docker,配置仓库,安装k8s
3.4.1 安装docker,配置仓库
1.将docker和k8s的.repo文件从server2拷贝到server9
[root@server2 ~]# cd /etc/yum.repos.d/
[root@server2 yum.repos.d]# ls
docker-ce.repo haojin.repo k8s.repo redhat.repo
[root@server2 yum.repos.d]# cat docker-ce.repo
[docker]
name=docker-ce
baseurl=http://172.25.200.250/docker-ce
gpgcheck=0
[root@server2 yum.repos.d]# cat k8s.repo
[kubernetes]
name=Kubernetes
baseurl=https://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64/
enabled=1
gpgcheck=0
[root@server2 yum.repos.d]# scp docker-ce.repo k8s.repo server9:/etc/yum.repos.d/
The authenticity of host 'server9 (172.25.200.9)' can't be established.
ECDSA key fingerprint is SHA256:bj6pb0Tp1O+qsOmu93IhgsLwZGBnKg3JklhqzlIVD9I.
ECDSA key fingerprint is MD5:26:fa:d7:66:35:2b:89:ee:b7:3f:20:f5:4b:a4:9b:d4.
Are you sure you want to continue connecting (yes/no)? yes
Warning: Permanently added 'server9,172.25.200.9' (ECDSA) to the list of known hosts.
root@server9's password:
docker-ce.repo 100% 76 67.3KB/s 00:00
k8s.repo 100% 129
[root@server9 ~]# yum install docker-ce -y
2. 解决docker运行 warning问题
[root@server2 ~]# cd /etc/sysctl.d/
[root@server2 sysctl.d]# ls
99-sysctl.conf docker.conf
[root@server2 sysctl.d]# cat docker.conf
net.bridge.bridge-nf-call-iptables = 1
net.bridge.bridge-nf-call-ip6tables = 1
[root@server2 sysctl.d]# scp docker.conf server9:/etc/sysctl.d/
root@server9's password:
docker.conf 100% 81 98.3KB/s 00:00
3. 启动docker
[root@server9 ~]# systemctl enable --now docker.service
[root@server9 ~]# systemctl status docker.service
4.配置仓库加速器及加密认证
[root@server2 ~]# scp -r /etc/docker/daemon.json /etc/docker/certs.d/ server9:/etc/docker/
root@server9's password:
daemon.json 100% 271 188.0KB/s 00:00
ca.crt 100% 2106 448.7KB/s 00:00
5.重启下docker
[root@server9 ~]# systemctl restart docker.service
[root@server9 ~]# docker info
reg.westos.org,仓库解析不正确会导致k8s起不来
重启docker
3.4.2 安装k8s
1. 安装ipvsadm,配置ipvsadm
[root@server9 ~]# yum install -y ipvsadm.x86_64 ##安装ipvsadm
[root@server9 ~]# modprobe ip_vs_rr
[root@server9 ~]# modprobe ip_vs_sh
[root@server9 ~]# modprobe ip_vs_wrr
[root@server9 ~]# ipvsadm -l
IP Virtual Server version 1.2.1 (size=4096)
Prot LocalAddress:Port Scheduler Flags
-> RemoteAddress:Port Forward Weight ActiveConn InActConn
[root@server9 ~]# lsmod | grep ip_vs
2.修改server2网络插件,前面做过可以不需要这步
[root@server2 ~]# vim kube-flannel.yml
[root@server2 ~]# kubectl apply -f kube-flannel.yml
[root@server2 ~]# kubectl -n kube-system get pod
[root@server2 ~]# kubectl -n kube-system get pod | grep coredns | awk '{system("kubectl -n kube-system delete pod "$1"")}'
[root@server2 ~]# kubectl -n kube-system edit cm kube-flannel-cfg
3.在server9上安装k8s
# 临时关闭swap
[root@server9 ~]# swapoff -a
# 永久关闭swap
[root@server9 ~]# vim /etc/fstab
#/dev/mapper/rhel-swap swap swap defaults 0 0
[root@server9 ~]# yum install -y kubeadm.x86_64 kubelet.x86_64
[root@server9 ~]# systemctl enable --now kubelet.service
[root@server9 ~]# systemctl status kubelet.service
1.安装ipvsadm,配置ipvsadm
2.修改server2网络插件,前面做过可以不需要这步
vim kube-flannel.yml
kubectl -n kube-system edit cm kube-flannel-cfg
问题:这里flannel组件启动不了,还没找到原因,实验暂停。。。
问题解决:3.2节 kubeadm-init.yaml清单文件内容有问题,更正后ok
3.在server9上安装k8s
3.4.3 将server9加入k8s集群
成功加入节点:
到这里,三个master集群冗余已经做好了,整个都绑定在server8的VIP上,如果server8宕机,就会使整个系统瘫痪,所以下一节会在加一个server10与server8组pacemaker高可用,就可保证整个系统的高可用性。
4、server10+server8组pacemaker高可用
4.1 server10安装haproxy
[root@server8 ~]# scp /etc/yum.repos.d/haojin.repo server10:/etc/yum.repos.d/haojin.repo
##拷贝一份仓库文件
[root@server10 ~]# yum install -y haproxy
[root@server8 ~]# scp /etc/haproxy/haproxy.cfg server10:/etc/haproxy/
4.2 安装pacemaker
1. 免密
[root@server8 ~]# ssh-keygen ##做免密
[root@server8 ~]# ssh-copy-id server8 ##做免密
2.安装pacemaker
[root@server8 ~]# yum install -y pacemaker pcs psmisc policycoreutils-python
##安装pacemaker以及相关组件
[root@server8 yum.repos.d]# ssh server10 yum install -y pacemaker pcs psmisc policycoreutils-python ##server10也配置同样的pacemaker
3.启动pcs服务
[root@server8 ~]# systemctl enable --now pcsd.service
[root@server8 ~]# ssh server10 systemctl enable --now pcsd.service ##启动server10 服务
4.3 配置pacemaker
1. create a hacluster user
[root@server8 ~]# passwd hacluster ##创建一个用户(两个节点使用一个)
[root@server8 ~]# echo westos | passwd --stdin hacluster
[root@server8 ~]# ssh server10 'echo westos | passwd --stdin hacluster'
[root@server8 ~]# cat /etc/shadow ##查看是否有密码
[root@server10 ~]# cat /etc/shadow
2. 对两个用户进行认证
[root@server8 ~]# pcs cluster auth server8 server10
Username: hacluster ##用户是上一条命令生成的
Password:
server8: Authorized
server10: Authorized
3. 组件集群
[root@server8 ~]# pcs cluster setup --name mycluster server8 server10
##集群名字mycluster
4. 设置开机自启动集群
[root@server8 ~]# pcs cluster start --all
server8: Starting Cluster (corosync)... ##集群心跳
server10: Starting Cluster (corosync)... ##pacemaker是资源管理器
server8: Starting Cluster (pacemaker)...
server10: Starting Cluster (pacemaker)...
[root@server8 ~]# pcs cluster enable --all
server8: Cluster Enabled
server10: Cluster Enabled
1.create a hacluster user
2. 对两个用户进行认证
3. 组件集群
- 设置开机自启动集群
4.4 设置stonith
[root@server8 ~]# corosync-cfgtool -s
Printing ring status.
Local node ID 1
RING ID 0
id = 172.25.13.5
status = ring 0 active with no faults
[root@server8 ~]# pcs status ##查看状态
Cluster name: mycluster
WARNINGS:
No stonith devices and stonith-enabled is not false
[root@server8 ~]# pcs property set stonith-enabled=false ##设置stonith为False
[root@server8 ~]# pcs status ##状态没有警告
[root@server8 ~]# crm_verify -LV ##验证成功
4.5 配置资源
4.5.1 配置vip资源
[root@server8 ~]# pcs resource standards ##查看系统级资源
lsb
ocf
service
systemd
[root@server8 ~]# pcs resource create --help ##查看资源创建的帮助
[root@server8 ~]# pcs resource create vip ocf:heartbeat:IPaddr2 ip=172.25.200.100 cidr_netmask=24 op monitor interval=30s ##创建vip资源(op表示监控)
[root@server8 ~]# pcs status ##查看状态,看是否创建成功
4.5.2 配置haproxy服务资源
[root@server8 ~]# pcs resource create haproxy systemd:haproxy op monitor interval=30s
##添加haproxy服务资源
[root@server8 ~]# pcs status ##查看状态
4.5.3 资源放到一个组
[root@server8 ~]# pcs resource group add apiserver vip haproxy
#先起vip,后起服务。把资源加到apiserver这个组,apiserver是组名
4.6 测试主从切换(双机热备)
[root@server8 ~]# pcs node standby ## 模拟掉线
[root@server8 ~]# pcs status