keepalived参考地址:https://blog.csdn.net/mofiu/article/details/76644012
一、部署环境准备
- 所有节点都部署的准备工作
1. 设置主机名
master01 – 10.4.7.11
master02 – 10.4.7.12
master03 – 10.4.7.13
worker01 – 10.4.7.14
worker02 – 10.4.7.15
# 在10.4.7.11上执行
[root@localhost ~]# hostnamectl set-hostname master01 && bash
# 在10.4.7.12上执行
[root@localhost ~]# hostnamectl set-hostname master02 && bash
# 在10.4.7.13上执行
[root@localhost ~]# hostnamectl set-hostname master03 && bash
# 在10.4.7.14上执行
[root@localhost ~]# hostnamectl set-hostname worker01 && bash
# 在10.4.7.15上执行
[root@localhost ~]# hostnamectl set-hostname worker01 && bash
2. 关闭防火墙、swap,重置iptables
# 关闭防火墙
[root@localhost ~]# systemctl stop firewalld && systemctl disable firewalld
# 重置iptables
[root@localhost ~]# iptables -F && iptables -X && iptables -F -t nat && iptables -X -t nat && iptables -P FORWARD ACCEPT
# 关闭swap
[root@localhost ~]# swapoff -a
[root@localhost ~]# sed -i '/swap/s/^\(.*\)$/#\1/g' /etc/fstab
# 关闭selinux
[root@localhost ~]# setenforce 0
# 关闭dnsmasq(否则可能导致docker容器无法解析域名)
[root@localhost ~]# service dnsmasq stop && systemctl disable dnsmasq
3. 安装依赖包
# 更新yum
[root@localhost ~]# yum update
# 安装依赖包
[root@localhost ~]# yum install -y conntrack ipvsadm ipset jq sysstat curl iptables libseccomp
4. 系统参数设置
# 制作配置文件
[root@localhost ~]# cat > /etc/sysctl.d/kubernetes.conf <<EOF
net.bridge.bridge-nf-call-iptables=1
net.bridge.bridge-nf-call-ip6tables=1
net.ipv4.ip_forward=1
vm.swappiness=0
vm.overcommit_memory=1
vm.panic_on_oom=0
fs.inotify.max_user_watches=89100
EOF
# 生效文件
[root@localhost ~]# sysctl -p /etc/sysctl.d/kubernetes.conf
5. 安装docker
[root@localhost ~]# yum -y install yum-utils device-mapper-persistent-data lvm2 epel-release
# 设置阿里镜像源
[root@localhost ~]# yum-config-manager --add-repo https://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo
[root@localhost ~]# yum install -y docker-ce
[root@localhost ~]# systemctl start docker
[root@localhost ~]# systemctl enable docker
[root@localhost ~]# systemctl status docker
[root@localhost ~]# cat > /etc/docker/daemon.json <<EOF
{
"storage-driver": "overlay2",
"insecure-registries": ["registry.access.redhat.com","quay.io","harbor.prod.com"],
"exec-opts": ["native.cgroupdriver=systemd"],
"live-restore": true
}
EOF
# "graph": "/data/docker", # 可以加上这个参数,是镜像、容器的存储路径,默认是/var/lib/docker
[root@localhost ~]# systemctl daemon-reload
[root@localhost ~]# systemctl restart docker
6. 各节点安装ipset服务,开启ip_vs
[root@localhost ~]# yum -y install ipvsadm ipset sysstat conntrack libseccomp
[root@localhost ~]# cat > /etc/sysconfig/modules/ipvs.modules <<EOF
#!/bin/sh
modprobe -- ip_vs
modprobe -- ip_vs_rr
modprobe -- ip_vs_wrr
modprobe -- ip_vs_sh
modprobe -- nf_conntrack_ipv4
EOF
[root@localhost ~]# chmod 755 /etc/sysconfig/modules/ipvs.modules
[root@localhost ~]# sh /etc/sysconfig/modules/ipvs.modules
[root@localhost ~]# lsmod | grep -e ip_vs -e nf_conntrack_ipv4
二、安装kubeadm部署工具
1. 指定kubeadm工具下载yum源
[root@localhost ~]# cat > /etc/yum.repos.d/kubernetes.repo << EOF
[kubernetes]
name=Kubernetes
baseurl=https://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64
enabled=1
gpgcheck=0
repo_gpgcheck=0
gpgkey=https://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg https://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpg
EOF
2. 下载安装相关工具
# 查看kubeadm版本
[root@localhost ~]# yum list kubeadm --showduplicates | sort -r
# 指定安装版本
[root@localhost ~]# yum install -y kubelet-1.19.9 kubeadm-1.19.9 kubectl-1.19.9
# 检查是否下载成功
[root@localhost ~]# rpm -qa | grep kube
kubernetes-cni-0.8.7-0.x86_64
kubectl-1.19.9-0.x86_64
kubelet-1.19.9-0.x86_64
kubeadm-1.19.9-0.x86_64
# 设置kubelet的cgroupdriver
# kubelet的cgroupdriver默认为systemd,如果上面没有设置docker的exec-opts为systemd,这里就需要将kubelet的设置为cgroupfs,如下:(这步可以省略)
# sed -i "s/cgroup-driver=systemd/cgroup-driver=cgroupfs/g" /etc/systemd/system/kubelet.service.d/10-kubeadm.conf
# 启动kubelet
[root@localhost ~]# systemctl enable kubelet && systemctl start kubelet
三、k8s高可用部署
1. master节点安装haproxy和keepalived服务
- 三台 master 节点上都执行(10.4.7.11 、10.4.7.12 、10.4.7.13)
1.1 安装 haproxy 和 keepalived
# 三个master节点都执行
[root@master01 ~]# yum -y install haproxy keepalived
1.2 修改三个master节点keepalived配置文件
master01节点上keepalived配置
# 备份原配置文件
[root@master01 ~]# cp -p /etc/keepalived/keepalived.conf /etc/keepalived/keepalived_example.conf
# 修改配置文件
[root@master01 ~]# vim /etc/keepalived/keepalived.conf
! Configuration File for keepalived
global_defs {
notification_email {
acassen@firewall.loc
failover@firewall.loc
sysadmin@firewall.loc
}
notification_email_from Alexandre.Cassen@firewall.loc
smtp_server localhost
smtp_connect_timeout 30
router_id LVS_DEVEL
vrrp_skip_check_adv_addr
# vrrp_strict # 将这个参数注释掉,否则会造成vip无法ping通
vrrp_garp_interval 0
vrrp_gna_interval 0
# 在global_defs模块内添加如下两行内容
script_user root # 设置运行脚本默认用户和组
enable_script_security # 设置脚本只允许root用户运行
}
# 新增如下脚本检查模块check_haproxy
vrrp_script check_haproxy {
script "/etc/keepalived/check_haproxy.sh" # 定义检测脚本路径
interval 3 # 指定脚本执行的间隔。单位是秒。默认为1s。
weight -2 # 调整优先级<-254 ~ 254>。默认为2
fall 10 # 执行失败多少次才认为失败。
rise 2 # 执行成功多少次才认为是成功。
}
vrrp_instance VI_1 {
state MASTER # master01节点定义角色为MASTER
interface ens33 # 本地网卡名称
virtual_router_id 51
priority 100 # 配置权重100,vip会优先调到权重高的节点上
advert_int 1
authentication {
auth_type PASS
auth_pass 1111
}
virtual_ipaddress {
# 在这里定义vip地址
10.4.7.253 # 定义了Vip地址
}
# track_script模块,循环执行脚本
track_script {
check_haproxy # 上面定义的脚本模块
}
}
master02节点上keepalived配置
# 备份原配置文件
[root@master02 ~]# cp -p /etc/keepalived/keepalived.conf /etc/keepalived/keepalived_example.conf
# 修改配置文件
[root@master02 ~]# vim /etc/keepalived/keepalived.conf
! Configuration File for keepalived
global_defs {
notification_email {
acassen@firewall.loc
failover@firewall.loc
sysadmin@firewall.loc
}
notification_email_from Alexandre.Cassen@firewall.loc
smtp_server localhost
smtp_connect_timeout 30
router_id LVS_DEVEL
vrrp_skip_check_adv_addr
# vrrp_strict # 将这个参数注释掉,否则会造成vip无法ping通
vrrp_garp_interval 0
vrrp_gna_interval 0
# 在global_defs模块内添加如下两行内容
script_user root # 设置运行脚本默认用户和组
enable_script_security # 设置脚本只允许root用户运行
}
# 新增如下脚本检查模块check_haproxy
vrrp_script check_haproxy {
script "/etc/keepalived/check_haproxy.sh" # 定义检测脚本路径
interval 3 # 指定脚本执行的间隔。单位是秒。默认为1s。
weight -2 # 调整优先级<-254 ~ 254>。默认为2
fall 10 # 执行失败多少次才认为失败。
rise 2 # 执行成功多少次才认为是成功。
}
vrrp_instance VI_1 {
state BACKUP # master02节点定义角色为BACKUP
nopreempt # 定义非抢占模式,以防master网络问题来回抢占
interface ens33 # 本地网卡名称
virtual_router_id 51
priority 99 # 配置权重100,vip会优先调到权重高的节点上
advert_int 1
authentication {
auth_type PASS
auth_pass 1111
}
virtual_ipaddress {
# 在这里定义vip地址
10.4.7.253 # 定义了Vip地址
}
# track_script模块,循环执行脚本
track_script {
check_haproxy # 上面定义的脚本模块
}
}
master03节点上keepalived配置
# 备份原配置文件
[root@master03 ~]# cp -p /etc/keepalived/keepalived.conf /etc/keepalived/keepalived_example.conf
# 修改配置文件
[root@master03 ~]# vim /etc/keepalived/keepalived.conf
! Configuration File for keepalived
global_defs {
notification_email {
acassen@firewall.loc
failover@firewall.loc
sysadmin@firewall.loc
}
notification_email_from Alexandre.Cassen@firewall.loc
smtp_server localhost
smtp_connect_timeout 30
router_id LVS_DEVEL
vrrp_skip_check_adv_addr
# vrrp_strict # 将这个参数注释掉,否则会造成vip无法ping通
vrrp_garp_interval 0
vrrp_gna_interval 0
# 在global_defs模块内添加如下两行内容
script_user root # 设置运行脚本默认用户和组
enable_script_security # 设置脚本只允许root用户运行
}
# 新增如下脚本检查模块check_haproxy
vrrp_script check_haproxy {
script "/etc/keepalived/check_haproxy.sh" # 定义检测脚本路径
interval 3 # 指定脚本执行的间隔。单位是秒。默认为1s。
weight -2 # 调整优先级<-254 ~ 254>。默认为2
fall 10 # 执行失败多少次才认为失败。
rise 2 # 执行成功多少次才认为是成功。
}
vrrp_instance VI_1 {
state BACKUP # master03节点定义角色为BACKUP
nopreempt # 定义非抢占模式,以防master网络问题来回抢占
interface ens33 # 本地网卡名称
virtual_router_id 51
priority 98 # 配置权重100,vip会优先调到权重高的节点上
advert_int 1
authentication {
auth_type PASS
auth_pass 1111
}
virtual_ipaddress {
# 在这里定义vip地址
10.4.7.253 # 定义了Vip地址
}
# track_script模块,循环执行脚本
track_script {
check_haproxy # 上面定义的脚本模块
}
}
1.3 修改三个master节点haproxy配置文件
- 关于haproxy 的配置,三个master节点都相同,因此只需要修改一个节点,然后copy即可
[root@master01 ~]# cp -p /etc/haproxy/haproxy.cfg /etc/haproxy/haproxy_example.cfg
# 全部文件内容如下,对照修改即可
[root@master01 ~]# vim /etc/haproxy/haproxy.cfg
#---------------------------------------------------------------------
# Example configuration for a possible web application. See the
# full configuration options online.
#
# http://haproxy.1wt.eu/download/1.4/doc/configuration.txt
#
#---------------------------------------------------------------------
#---------------------------------------------------------------------
# Global settings
#---------------------------------------------------------------------
global
# to have these messages end up in /var/log/haproxy.log you will
# need to:
#
# 1) configure syslog to accept network log events. This is done
# by adding the '-r' option to the SYSLOGD_OPTIONS in
# /etc/sysconfig/syslog
#
# 2) configure local2 events to go to the /var/log/haproxy.log
# file. A line like the following can be added to
# /etc/sysconfig/syslog
#
# local2.* /var/log/haproxy.log
#
log 127.0.0.1 local2
chroot /var/lib/haproxy
pidfile /var/run/haproxy.pid
maxconn 4000
user haproxy
group haproxy
daemon
# turn on stats unix socket
stats socket /var/lib/haproxy/stats
#---------------------------------------------------------------------
# common defaults that all the 'listen' and 'backend' sections will
# use if not designated in their block
#---------------------------------------------------------------------
defaults
mode http
log global
option httplog
option dontlognull
option http-server-close
option forwardfor except 127.0.0.0/8
option redispatch
retries 3
timeout http-request 10s
timeout queue 1m
timeout connect 10s
timeout client 1m
timeout server 1m
timeout http-keep-alive 10s
timeout check 10s
maxconn 3000
#---------------------------------------------------------------------
# main frontend which proxys to the backends
#---------------------------------------------------------------------
#frontend main *:5000
# acl url_static path_beg -i /static /images /javascript /stylesheets
# acl url_static path_end -i .jpg .gif .png .css .js
#
# use_backend static if url_static
# default_backend app
frontend kubernetes-apiserver
mode tcp
bind *:16443
option tcplog
default_backend kubernetes-apiserver
#---------------------------------------------------------------------
# static backend for serving up images, stylesheets and such
#---------------------------------------------------------------------
#backend static
# balance roundrobin
# server static 127.0.0.1:4331 check
listen stats
bind *:1080
stats auth admin:awesomePassword
stats refresh 5s
stats realm HAProxy\ Statistics
stats uri /admin?stats
#---------------------------------------------------------------------
# round robin balancing between the various backends
#---------------------------------------------------------------------
backend kubernetes-apiserver
balance roundrobin
server master01 10.4.7.11:6443 check
server master02 10.4.7.12:6443 check
server master03 10.4.7.13:6443 check
[root@master01 ~]# scp /etc/keepalived/keepalived.conf 10.4.7.12:/etc/keepalived/keepalived.conf
[root@master01 ~]# scp /etc/keepalived/keepalived.conf 10.4.7.13:/etc/keepalived/keepalived.conf
1.4 在三个master节点上写haproxy健康检查脚本
[root@master01 ~]# cat >/etc/keepalived/check_haproxy.sh <<'EOF'
#!/bin/sh
# HAPROXY down
A=`ps -C haproxy --no-header | wc -l`
if [ $A -eq 0 ]
then
systmectl start haproxy
if [ $(ps -C haproxy --no-header | wc -l) -eq 0 ]
then
killall -9 haproxy
echo "HAPROXY down" | mail -s "haproxy"
sleep 360
fi
fi
EOF
[root@master01 ~]# chmod +x /etc/keepalived/check_haproxy.sh
[root@master01 ~]# scp /etc/keepalived/check_haproxy.sh 10.4.7.12:/etc/keepalived/check_haproxy.sh
[root@master01 ~]# scp /etc/keepalived/check_haproxy.sh 10.4.7.13:/etc/keepalived/check_haproxy.sh
1.5 在三台master上启动keepalived和haproxy
[root@master01 ~]# systemctl start keepalived && systemctl enable keepalived
[root@master01 ~]# systemctl start haproxy && systemctl enable haproxy
2. 正式部署k8s
2.1 获取kubeadm默认配置文件
- 修改kubeadm默认配置文件,并 下载相关镜像
在master01上执行(只需要在一台master上执行就可以了)
[root@master01 ~]# kubeadm config print init-defaults > kubeadm-config.yaml
# 修改kubeadm默认配置文件
[root@master01 ~]# vim kubeadm-config.yaml
apiVersion: kubeadm.k8s.io/v1beta2
bootstrapTokens:
- groups:
- system:bootstrappers:kubeadm:default-node-token
token: abcdef.0123456789abcdef
ttl: 24h0m0s
usages:
- signing
- authentication
kind: InitConfiguration
localAPIEndpoint:
advertiseAddress: 10.4.7.11 # 本地ip地址
bindPort: 6443
nodeRegistration:
criSocket: /var/run/dockershim.sock
name: master.10.4.7.11 # 节点名称
taints:
- effect: NoSchedule
key: node-role.kubernetes.io/master
---
apiServer:
timeoutForControlPlane: 4m0s
apiVersion: kubeadm.k8s.io/v1beta2
certificatesDir: /etc/kubernetes/pki
clusterName: kubernetes
controlPlaneEndpoint: "10.4.7.253:16443" # 虚拟IP和haproxy端口
controllerManager: {}
dns:
type: CoreDNS
etcd:
local:
dataDir: /var/lib/etcd
imageRepository: registry.cn-hangzhou.aliyuncs.com/google_containers # 镜像仓库地址,修改成阿里云的仓库地址(原本是国外源,会卡,如果有vpn,这个不用改)
kind: ClusterConfiguration
kubernetesVersion: v1.19.0
networking:
dnsDomain: cluster.local
podSubnet: "10.244.0.0/16" # pod地址段
serviceSubnet: 10.96.0.0/12
scheduler: {}
---
apiVersion: kubeproxy.config.k8s.io/v1alpha1
kind: KubeProxyConfiguration
featureGates:
SupportIPVSProxyMode: true
mode: ipvs
# 下载相关镜像
[root@master01 ~]# kubeadm config images pull --config kubeadm-config.yaml
# 可以通过这种方式修改
[root@master02 ~]# kubeadm config print init-defaults > kubeadm-config.yaml
[root@master02 ~]# reip=$(hostname -I |awk '{print $1}')
[root@master02 ~]# nodeName=$(str=$(hostname);echo ${str:0:6}-$(hostname -I |awk '{print $1}'))
[root@master02 ~]# havip='controlPlaneEndpoint: "10.4.7.253:16443"' # 根据实际情况修改vip+haproxy的端口
[root@master02 ~]# podip='podSubnet: "10.244.0.0/16"'
[root@master02 ~]# alireg="registry.cn-hangzhou.aliyuncs.com/google_containers"
[root@master02 ~]# sed -i "/name: $(hostname)/s/name.*/name: ${nodeName}/" kubeadm-config.yaml
[root@master02 ~]# sed -i "/clusterName: /a${havip}" kubeadm-config.yaml
[root@master02 ~]# sed -i "/dnsDomain: /a$(echo "\ " ${podip})" kubeadm-config.yaml
[root@master02 ~]# sed -i "/advertiseAddress: /s/advertiseAddress.*/advertiseAddress: ${reip}/" kubeadm-config.yaml
[root@master03 ~]# cat >> kubeadm-config.yaml << EOF
---
apiVersion: kubeproxy.config.k8s.io/v1alpha1
kind: KubeProxyConfiguration
featureGates:
SupportIPVSProxyMode: true
EOF
# 下载相关镜像
[root@master02 ~]# kubeadm config images pull --config kubeadm-config.yaml
2.2 初始化集群节点
- 在master01上执行
- 执行产生的这些信息最好保留下来
[root@master01 ~]# kubeadm init --config kubeadm-config.yaml
W0608 14:48:17.402870 12982 configset.go:348] WARNING: kubeadm cannot validate component configs for API groups [kubelet.config.k8s.io kubeproxy.config.k8s.io]
[init] Using Kubernetes version: v1.19.0
[preflight] Running pre-flight checks
[WARNING SystemVerification]: this Docker version is not on the list of validated versions: 20.10.7. Latest validated version: 19.03
[WARNING Hostname]: hostname "master01" could not be reached
[WARNING Hostname]: hostname "master01": lookup master01 on 8.8.8.8:53: no such host
[preflight] Pulling images required for setting up a Kubernetes cluster
[preflight] This might take a minute or two, depending on the speed of your internet connection
[preflight] You can also perform this action in beforehand using 'kubeadm config images pull'
[certs] Using certificateDir folder "/etc/kubernetes/pki"
[certs] Generating "ca" certificate and key
[certs] Generating "apiserver" certificate and key
[certs] apiserver serving cert is signed for DNS names [kubernetes kubernetes.default kubernetes.default.svc kubernetes.default.svc.cluster.local master01] and IPs [10.96.0.1 10.4.7.11 10.4.7.253]
[certs] Generating "apiserver-kubelet-client" certificate and key
[certs] Generating "front-proxy-ca" certificate and key
[certs] Generating "front-proxy-client" certificate and key
[certs] Generating "etcd/ca" certificate and key
[certs] Generating "etcd/server" certificate and key
[certs] etcd/server serving cert is signed for DNS names [localhost master01] and IPs [10.4.7.11 127.0.0.1 ::1]
[certs] Generating "etcd/peer" certificate and key
[certs] etcd/peer serving cert is signed for DNS names [localhost master01] and IPs [10.4.7.11 127.0.0.1 ::1]
[certs] Generating "etcd/healthcheck-client" certificate and key
[certs] Generating "apiserver-etcd-client" certificate and key
[certs] Generating "sa" key and public key
[kubeconfig] Using kubeconfig folder "/etc/kubernetes"
[endpoint] WARNING: port specified in controlPlaneEndpoint overrides bindPort in the controlplane address
[kubeconfig] Writing "admin.conf" kubeconfig file
[endpoint] WARNING: port specified in controlPlaneEndpoint overrides bindPort in the controlplane address
[kubeconfig] Writing "kubelet.conf" kubeconfig file
[endpoint] WARNING: port specified in controlPlaneEndpoint overrides bindPort in the controlplane address
[kubeconfig] Writing "controller-manager.conf" kubeconfig file
[endpoint] WARNING: port specified in controlPlaneEndpoint overrides bindPort in the controlplane address
[kubeconfig] Writing "scheduler.conf" kubeconfig file
[kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"
[kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
[kubelet-start] Starting the kubelet
[control-plane] Using manifest folder "/etc/kubernetes/manifests"
[control-plane] Creating static Pod manifest for "kube-apiserver"
[control-plane] Creating static Pod manifest for "kube-controller-manager"
[control-plane] Creating static Pod manifest for "kube-scheduler"
[etcd] Creating static Pod manifest for local etcd in "/etc/kubernetes/manifests"
[wait-control-plane] Waiting for the kubelet to boot up the control plane as static Pods from directory "/etc/kubernetes/manifests". This can take up to 4m0s
[kubelet-check] Initial timeout of 40s passed.
[apiclient] All control plane components are healthy after 106.014604 seconds
[upload-config] Storing the configuration used in ConfigMap "kubeadm-config" in the "kube-system" Namespace
[kubelet] Creating a ConfigMap "kubelet-config-1.19" in namespace kube-system with the configuration for the kubelets in the cluster
[upload-certs] Skipping phase. Please see --upload-certs
[mark-control-plane] Marking the node master01 as control-plane by adding the label "node-role.kubernetes.io/master=''"
[mark-control-plane] Marking the node master01 as control-plane by adding the taints [node-role.kubernetes.io/master:NoSchedule]
[bootstrap-token] Using token: abcdef.0123456789abcdef
[bootstrap-token] Configuring bootstrap tokens, cluster-info ConfigMap, RBAC Roles
[bootstrap-token] configured RBAC rules to allow Node Bootstrap tokens to get nodes
[bootstrap-token] configured RBAC rules to allow Node Bootstrap tokens to post CSRs in order for nodes to get long term certificate credentials
[bootstrap-token] configured RBAC rules to allow the csrapprover controller automatically approve CSRs from a Node Bootstrap Token
[bootstrap-token] configured RBAC rules to allow certificate rotation for all node client certificates in the cluster
[bootstrap-token] Creating the "cluster-info" ConfigMap in the "kube-public" namespace
[kubelet-finalize] Updating "/etc/kubernetes/kubelet.conf" to point to a rotatable kubelet client certificate and key
[addons] Applied essential addon: CoreDNS
[endpoint] WARNING: port specified in controlPlaneEndpoint overrides bindPort in the controlplane address
[addons] Applied essential addon: kube-proxy
Your Kubernetes control-plane has initialized successfully!
To start using your cluster, you need to run the following as a regular user:
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
You should now deploy a pod network to the cluster.
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
https://kubernetes.io/docs/concepts/cluster-administration/addons/
You can now join any number of control-plane nodes by copying certificate authorities
and service account keys on each node and then running the following as root:
kubeadm join 10.4.7.253:16443 --token abcdef.0123456789abcdef \
--discovery-token-ca-cert-hash sha256:a17804d9aa1f198f6530269df2c10fed923570dc2acbde9706490184ee876acf \
--control-plane
Then you can join any number of worker nodes by running the following on each as root:
kubeadm join 10.4.7.253:16443 --token abcdef.0123456789abcdef \
--discovery-token-ca-cert-hash sha256:a17804d9aa1f198f6530269df2c10fed923570dc2acbde9706490184ee876acf
# 非root用户执行下面三条命令
[root@master01 ~]# mkdir -p $HOME/.kube
[root@master01 ~]# sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
[root@master01 ~]# sudo chown $(id -u):$(id -g) $HOME/.kube/config
- 若集群初始化失败重置集群
kubeadm reset
2.3 将证书及秘钥上传给其他master节点
- 在其它两个master节点创建以下目录存放秘钥和证书
- 然后上传证书和秘钥
[root@master01 ~]# ssh 10.4.7.12 "mkdir -p /etc/kubernetes/pki/etcd"
[root@master01 ~]# ssh 10.4.7.13 "mkdir -p /etc/kubernetes/pki/etcd"
# 上传给master02
[root@master01 ~]# scp /etc/kubernetes/pki/ca.* root@10.4.7.12:/etc/kubernetes/pki/
[root@master01 ~]# scp /etc/kubernetes/pki/sa.* root@10.4.7.12:/etc/kubernetes/pki/
[root@master01 ~]# scp /etc/kubernetes/pki/front-proxy-ca.* root@10.4.7.12:/etc/kubernetes/pki/
[root@master01 ~]# scp /etc/kubernetes/pki/etcd/ca.* root@10.4.7.12:/etc/kubernetes/pki/etcd/
[root@master01 ~]# scp /etc/kubernetes/admin.conf root@10.4.7.12:/etc/kubernetes/
# 上传给master03
[root@master01 ~]# scp /etc/kubernetes/pki/ca.* root@10.4.7.13:/etc/kubernetes/pki/
[root@master01 ~]# scp /etc/kubernetes/pki/sa.* root@10.4.7.13:/etc/kubernetes/pki/
[root@master01 ~]# scp /etc/kubernetes/pki/front-proxy-ca.* root@10.4.7.13:/etc/kubernetes/pki/
[root@master01 ~]# scp /etc/kubernetes/pki/etcd/ca.* root@10.4.7.13:/etc/kubernetes/pki/etcd/
[root@master01 ~]# scp /etc/kubernetes/admin.conf root@10.4.7.13:/etc/kubernetes/
2.4 将master节点admin.conf证书复制到其他node节点
[root@master01 ~]# scp /etc/kubernetes/admin.conf root@10.4.7.14:/etc/kubernetes/
[root@master01 ~]# scp /etc/kubernetes/admin.conf root@10.4.7.15:/etc/kubernetes/
2.5 将另两个master加入控制节点
- 执行初始化集群时产生的命令
- 在另外两个master节点上执行
在master02上执行
[root@master02 ~]# kubeadm join 10.4.7.253:16443 --token abcdef.0123456789abcdef \
--discovery-token-ca-cert-hash sha256:a17804d9aa1f198f6530269df2c10fed923570dc2acbde9706490184ee876acf \
--control-plane --node-name=worker-10.4.7.12
# 非root用户执行下面三条命令
[root@master02 ~]# mkdir -p $HOME/.kube
[root@master02 ~]# sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
[root@master02 ~]# sudo chown $(id -u):$(id -g) $HOME/.kube/config
在master03上执行
[root@master03 ~]# kubeadm join 10.4.7.253:16443 --token abcdef.0123456789abcdef \
--discovery-token-ca-cert-hash sha256:a17804d9aa1f198f6530269df2c10fed923570dc2acbde9706490184ee876acf \
--control-plane --node-name=worker-10.4.7.13
# 非root用户执行下面三条命令
[root@master03 ~]# mkdir -p $HOME/.kube
[root@master03 ~]# sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
[root@master03 ~]# sudo chown $(id -u):$(id -g) $HOME/.kube/config
2.6 将两个node节点加入集群
- 在 worker01上执行 (加上"
--node-name=节点名称
"参数,可以指定节点名称)
[root@worker01 ~]# kubeadm join 10.4.7.253:16443 --token abcdef.0123456789abcdef \
--discovery-token-ca-cert-hash sha256:a17804d9aa1f198f6530269df2c10fed923570dc2acbde9706490184ee876acf --node-name=worker-10.4.7.14
- 在 worker02上执行
[root@worker02 ~]# kubeadm join 192.168.2.100:16443 --token abcdef.0123456789abcdef \
--discovery-token-ca-cert-hash sha256:a17804d9aa1f198f6530269df2c10fed923570dc2acbde9706490184ee876acf --node-name=worker-10.4.7.15
2.7 安装网络插件
- 先查看当前节点状态,目前都是 NotReady 状态
[root@master01 ~]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
master-10.4.7.11 NotReady master 65m v1.19.9
master-10.4.7.12 NotReady master 19m v1.19.9
master-10.4.7.13 NotReady master 24m v1.19.9
worker-10.4.7.14 NotReady <none> 2m25s v1.19.9
worker-10.4.7.15 NotReady <none> 3m20s v1.19.9
- 安装网络插件(在一个master节点执行即可)
[root@master01 ~]# kubectl apply -f https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml
- 等待网络插件pod都启动完成,再次查看节点状态
[root@master01 ~]# kubectl get pod -n kube-system -owide|grep flannel
kube-flannel-ds-cqxks 1/1 Running 0 14m 10.4.7.15 worker02 <none> <none>
kube-flannel-ds-fvkbp 1/1 Running 0 14m 10.4.7.14 worker01 <none> <none>
kube-flannel-ds-pvcmd 1/1 Running 0 14m 10.4.7.12 master02 <none> <none>
kube-flannel-ds-xcsk4 1/1 Running 0 12m 10.4.7.13 master03 <none> <none>
kube-flannel-ds-xncbk 1/1 Running 0 14m 10.4.7.11 master01 <none> <none>
[root@master01 ~]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
master-10.4.7.11 Ready master 72m v1.19.9
master-10.4.7.12 Ready master 27m v1.19.9
master-10.4.7.13 Ready master 32m v1.19.9
worker-10.4.7.14 Ready <none> 10m v1.19.9
worker-10.4.7.15 Ready <none> 11m v1.19.9
2.8 、下载etcdctl客户端命令行工具
下载工具包
wget https://github.com/etcd-io/etcd/releases/download/v3.4.14/etcd-v3.4.14-linux-amd64.tar.gz
解压并加入环境变量
tar -zxf etcd-v3.4.14-linux-amd64.tar.gz
mv etcd-v3.4.14-linux-amd64/etcdctl /usr/local/bin
chmod +x /usr/local/bin/
查看etcd高可用集群健康状态
[root@master01 ~]# ETCDCTL_API=3 etcdctl \
--cacert=/etc/kubernetes/pki/etcd/ca.crt \
--cert=/etc/kubernetes/pki/etcd/peer.crt \
--key=/etc/kubernetes/pki/etcd/peer.key \
--write-out=table \
--endpoints=10.4.7.11:2379,10.4.7.12:2379,10.4.7.13:2379 \
endpoint health
+--------------------+--------+-------------+-------+
| ENDPOINT | HEALTH | TOOK | ERROR |
+--------------------+--------+-------------+-------+
| 10.4.7.11:2379 | true | 19.782159ms | |
| 10.4.7.12:2379 | true | 20.014403ms | |
| 10.4.7.13:2379 | true | 28.543079ms | |
+--------------------+--------+-------------+-------+
查看etcd高可用集群成员列表
[root@master01 ~]# ETCDCTL_API=3 etcdctl \
--cacert=/etc/kubernetes/pki/etcd/ca.crt \
--cert=/etc/kubernetes/pki/etcd/peer.crt \
--key=/etc/kubernetes/pki/etcd/peer.key \
--write-out=table \
--endpoints=10.4.7.11:2379,10.4.7.12:2379,10.4.7.13:2379 \
member list
+------------------+---------+----------+----------------------------+----------------------------+------------+
| ID | STATUS | NAME | PEER ADDRS | CLIENT ADDRS | IS LEARNER |
+------------------+---------+----------+----------------------------+----------------------------+------------+
| 1c422186d56f7d31 | started | master01 | https://10.4.7.11:2380 | https://10.4.7.11:2379 | false |
| 1d722801456abd95 | started | master03 | https://10.4.7.13:2380 | https://10.4.7.13:2379 | false |
| 2b88f5e384805b4a | started | master02 | https://10.4.7.12:2380 | https://10.4.7.12:2379 | false |
+------------------+---------+----------+----------------------------+----------------------------+------------+
查看etcd高可用集群 leader
[root@master01 ~]# ETCDCTL_API=3 etcdctl \
--cacert=/etc/kubernetes/pki/etcd/ca.crt \
--cert=/etc/kubernetes/pki/etcd/peer.crt \
--key=/etc/kubernetes/pki/etcd/peer.key \
--write-out=table \
--endpoints=10.4.7.11:2379,10.4.7.13:2379,10.4.7.12:2379 \
endpoint status
+--------------------+------------------+---------+---------+-----------+------------+-----------+------------+--------------------+--------+
| ENDPOINT | ID | VERSION | DB SIZE | IS LEADER | IS LEARNER | RAFT TERM | RAFT INDEX | RAFT APPLIED INDEX | ERRORS |
+--------------------+------------------+---------+---------+-----------+------------+-----------+------------+--------------------+--------+
| 10.4.7.11:2379 | 1c422186d56f7d31 | 3.4.13 | 3.2 MB | true | false | 5 | 17663 | 17663 | |
| 10.4.7.13:2379 | 1d722801456abd95 | 3.4.13 | 3.2 MB | false | false | 5 | 17663 | 17663 | |
| 10.4.7.12:2379 | 2b88f5e384805b4a | 3.4.13 | 3.3 MB | false | false | 5 | 17663 | 17663 | |
+--------------------+------------------+---------+---------+-----------+------------+-----------+------------+--------------------+--------+
2.9 查看各组件状态异常
- 如果发现以下 scheduler 和 controller-manager 不健康状态报错,可以将三台master上的 /etc/kubernetes/manifests 目录下的 kube-scheduler.yaml 和 kube-controller-manager.yaml 文件中 “- --port=0” 注释掉
报错现象:
[root@master01 ~]# kubectl get cs
Warning: v1 ComponentStatus is deprecated in v1.19+
NAME STATUS MESSAGE ERROR
scheduler Unhealthy Get "http://127.0.0.1:10251/healthz": dial tcp 127.0.0.1:10251: connect: connection refused
controller-manager Unhealthy Get "http://127.0.0.1:10252/healthz": dial tcp 127.0.0.1:10252: connect: connection refused
etcd-0 Healthy {"health":"true"}
解决过程:在三台master上执行下面两条命令
sed -i '/- --port=0/s/^/#/' /etc/kubernetes/manifests/kube-scheduler.yaml
sed -i '/- --port=0/s/^/#/' /etc/kubernetes/manifests/kube-controller-manager.yaml
- 再次查看组件状态
[root@master01 manifests]# kubectl get cs
Warning: v1 ComponentStatus is deprecated in v1.19+
NAME STATUS MESSAGE ERROR
scheduler Healthy ok
controller-manager Healthy ok
etcd-0 Healthy {"health":"true"}
外挂etcd的kubeadm-config.yaml高可用参考模板
apiVersion: kubeadm.k8s.io/v1beta2
bootstrapTokens:
- groups:
- system:bootstrappers:kubeadm:default-node-token
token: abcdef.0123456789abcdef
ttl: 24h0m0s
usages:
- signing
- authentication
kind: InitConfiguration
localAPIEndpoint:
advertiseAddress: 172.16.3.130
bindPort: 6443
nodeRegistration:
criSocket: /var/run/dockershim.sock
name: master
taints:
- effect: NoSchedule
key: node-role.kubernetes.io/master
---
apiServer:
timeoutForControlPlane: 4m0s
certSANs:
- "node0"
- "node1"
- "node2"
- "172.16.3.110"
- "172.16.3.130"
- "172.16.3.131"
- "172.16.3.132"
- "127.0.0.1"
extraArgs:
etcd-cafile: /etc/etcd/ssl/ca.pem
etcd-certfile: /etc/etcd/ssl/etcd.pem
etcd-keyfile: /etc/etcd/ssl/etcd-key.pem
controlPlaneEndpoint: "172.16.3.130:6443"
apiVersion: kubeadm.k8s.io/v1beta2
certificatesDir: /etc/kubernetes/pki
clusterName: kubernetes
controllerManager: {}
dns:
type: CoreDNS
etcd:
external:
caFile: /etc/etcd/ssl/ca.pem
certFile: /etc/etcd/ssl/etcd.pem
keyFile: /etc/etcd/ssl/etcd-key.pem
endpoints:
- https://172.16.3.130:2379
- https://172.16.3.131:2379
- https://172.16.3.132:2379
imageRepository: registry.aliyuncs.com/google_containers
kind: ClusterConfiguration
kubernetesVersion: v1.19.0
networking:
dnsDomain: cluster.local
serviceSubnet: 10.96.0.0/12
podSubnet: "10.244.0.0/16"
scheduler: {}
---
apiVersion: kubeproxy.config.k8s.io/v1alpha1
kind: KubeProxyConfiguration
mode: "ipvs"
参考地址:https://blog.csdn.net/xixihahalelehehe/article/details/108184267