###################### 通过私有仓库安装k8s集群 ################################
#把master下载的镜像推送到自建harbor上面,这里需要提前在harbor仓库创建kubeadm项目。
#kubeadm安装k8s
k8s集群主机分布
192.128.232.9 nginx01,keepalived01
192.128.232.10 nginx02,keepalived02
192.128.232.11 master01,dns
192.128.232.12 master02
192.128.232.13 master03
192.128.232.14 node01
192.128.232.16 node02
192.128.232.17 node03
192.128.232.18 devops,harbor
192.128.232.15 vip地址
一:k8s初始化安装,更新repo源,所以节点都需要初始化.
[root@master ~]# yum install -y yum-utils device-mapper-persistent-data lvm2
[root@master ~]# yum install wget net-tools telnet tree nmap sysstat lrzsz dos2unix bind-utils ntpdate -y
[root@master ~]# yum -y install bash-completion
[root@master ~]# yum-config-manager --add-repo http://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo
[root@master ~]# yum install https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm
[root@master yum.repos.d]# cat > kubernetes.repo << EOF
[kubernetes]
name=Kubernetes
baseurl=https://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64/
enabled=1
gpgcheck=1
repo_gpgcheck=1
gpgkey=https://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg https://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpg
EOF
[root@master ~]# rm /etc/localtime -rf
[root@master ~]# ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime
[root@master ~]# ntpdate 0.asia.pool.ntp.org
[root@master ~]# yum repolist && yum makecache fast
# Disable the SELinux.
[root@master01 ~]# sed -i 's/^SELINUX=.*/SELINUX=disabled/' /etc/selinux/config
#关闭swap
[root@master01 ~]# swapoff -a
[root@master01 ~]# sed -i.bak '/swap/s/^/#/' /etc/fstab
# Turn off and disable the firewalld.
[root@master ~]# systemctl stop firewalld
[root@master ~]# systemctl disable firewalld
[root@master01 ~]# ssh-keygen -f ~/.ssh/id_rsa -N ''
[root@master01 ~]# cat k8s_ip.txt
192.128.232.9
192.128.232.10
192.128.232.11
192.128.232.12
192.128.232.13
192.128.232.14
192.128.232.16
192.128.232.17
192.128.232.18
[root@master01 ~]# for all_ip in `cat k8s_ip.txt`
do
echo ">>> ${all_ip}"
ssh-copy-id -i ~/.ssh/id_rsa.pub root@${all_ip}
done
二:安装dns服务
[root@master01 ~]# yum install bind -y
[root@master01 ~]# cat /etc/named.conf
options {
listen-on port 53 { 192.128.232.11; }; #dns主机ip
listen-on-v6 port 53 { ::1; };
directory "/var/named";
dump-file "/var/named/data/cache_dump.db";
statistics-file "/var/named/data/named_stats.txt";
memstatistics-file "/var/named/data/named_mem_stats.txt";
recursing-file "/var/named/data/named.recursing";
secroots-file "/var/named/data/named.secroots";
allow-query { any; };
forwarders { 192.128.232.2; }; #另外添加的,一般以网关转发,
dnssec-enable no; # 原本是yes
dnssec-validation no; # 原本是yes
recursion yes;
#参数注解
named.conf文件内容解析:
listen-on:监听端口,改为监听在内网,这样其它机器也可以用
allow-query:哪些客户端能通过自建的DNS查
forwarders:上级DNS是什么
# 检查修改情况,没有报错即可(即没有信息)
[root@master01 ~]# named-checkconf
#文件最后面添加下面内容,定义区域
[root@master01 ~]# cat >> /etc/named.rfc1912.zones <<EOF
#主机域
zone "host.com" IN {
type master;
file "host.com.zone";
allow-update { 192.128.232.11; };
};
#业务域
zone "od.com" IN {
type master;
file "od.com.zone";
allow-update { 192.128.232.11; };
};
EOF
#添加主机域文件
[root@master01 ~]# cat >> /var/named/host.com.zone <<EOF
$ORIGIN host.com.
$TTL 600 ; 10 minutes
@ IN SOA dns.host.com. dnsadmin.host.com. (
2020011201 ; serial
10800 ; refresh (3 hours)
900 ; retry (15 minutes)
604800 ; expire (1 week)
86400 ; minimum (1 day)
)
NS dns.host.com.
$TTL 60 ; 1 minute
dns A 192.128.232.11
nginx01 A 192.128.232.9
nginx02 A 192.128.232.10
master01 A 192.128.232.11
master02 A 192.128.232.12
master03 A 192.128.232.13
node01 A 192.128.232.16
node02 A 192.128.232.17
EOF
#添加业务域文件
[root@master01 ~]# cat > /var/named/od.com.zone<<EOF
$ORIGIN od.com.
$TTL 600 ; 10 minutes
@ IN SOA dns.od.com. dnsadmin.od.com. (
2020011201 ; serial
10800 ; refresh (3 hours)
900 ; retry (15 minutes)
604800 ; expire (1 week)
86400 ; minimum (1 day)
)
NS dns.od.com.
$TTL 60 ; 1 minute
dns A 192.128.232.11
EOF
# 看一下有没有报错
[root@master01 ~]# named-checkconf
[root@master01 ~]# systemctl start named
[root@master01 ~]# netstat -luntp|grep 53
#参数注解
TTL 600:指定IP包被路由器丢弃之前允许通过的最大网段数量
10 minutes:过期时间10分钟
SOA:一个域权威记录的相关信息,后面有5组参数分别设定了该域相关部分
dnsadmin.od.com. 一个假的邮箱
serial:记录的时间
$ORIGIN:即下列的域名自动补充od.com,如dns,外面看来是dns.od.com
netstat -luntp:显示 tcp,udp 的端口和进程等相关情况
# master01机器,检查主机域是否解析
[root@master01 ~]# dig -t A master01.host.com @192.128.232.11 +short
192.128.232.11
# 配置linux客户端和win客户端都能使用这个服务,修改,定义dns解析ip为自建dns服务器
[root@master01 ~]# vi /etc/sysconfig/network-scripts/ifcfg-eth0
DNS1=192.128.232.11 #注意是DNS1,如果DNS有可能失败后面测试
[root@master01 ~]# systemctl restart network
# /etc/resolv.conf文件里会自动生成 nameserver 192.128.232.11 内容,
[root@master01 ~]# cat /etc/resolv.conf
# Generated by NetworkManager
nameserver 192.128.232.11
[root@master01 ~]# ping www.baidu.com
[root@master01 ~]# ping master01.host.com
三:安装harbor私有仓库
#安装harbor私有仓库,安装在192.128.232.18
[root@devops ~]# cd /opt
[root@devops ~]# mkdir src
[root@devops ~]# cd src/
# 可以去这个地址下载,也可以直接用我用的软件包
https://github.com/goharbor/harbor/releases
[root@devops ~]# src]# tar xf harbor-offline-installer-v2.0.1.tgz -C /opt/
[root@devops ~]# cd /opt/harbor/
[root@devops harbor]# cp harbor.yml.tmpl harbor.yml
#修改harbor配置文件,修改下面四个选项
[root@devops harbor]# vi harbor.yml
hostname: harbor.od.com
http:
port: 180
data_volume: /data/harbor
location: /data/harbor/logs
#安装harbor私有仓库
[root@devops harbor]# mkdir -p /data/harbor/logs
[root@devops harbor]# yum install docker-compose -y
[root@devops harbor]# ./install.sh
[root@devops harbor]# docker-compose ps
[root@devops harbor]# docker ps -a
[root@devops harbor]# rpm -Uvh http://nginx.org/packages/centos/7/noarch/RPMS/nginx-release-centos-7-0.el7.ngx.noarch.rpm
[root@devops harbor]# yum -y install nginx
###相关报错问题:
yum的时候报:/var/run/yum.pid 已被锁定,PID 为 1610 的另一个程序正在运行。
另外一个程序锁定了 yum;等待它退出……
网上统一的解决办法:直接在终端运行 rm -f /var/run/yum.pid 将该文件删除,然后再次运行yum。
###
[root@devops harbor]# vi /etc/nginx/conf.d/harbor.od.com.conf
server {
listen 80;
server_name harbor.od.com;
client_max_body_size 1000m;
location / {
proxy_pass http://127.0.0.1:180;
}
}
[root@devops harbor]# nginx -t
[root@devops harbor]# systemctl start nginx
[root@devops harbor]# systemctl enable nginx
# 在自建DNS12机器解析域名:
[root@master01 ~]# vi /var/named/od.com.zone
# 注意serial前滚一个序号
# 最下面添加域名
harbor A 192.128.232.18
[root@master01 ~]# systemctl restart named
[root@master01 ~]# dig -t A harbor.od.com +short
192.128.232.18
#在浏览器上打开harbor.od.com,并创建kubeadm项目
账号:admin
密码:Harbor12345
四:安装docker环境,集群每个节点
#kubeadm,kubelet,docker-ce,kubectl四个程序都安装同一个版本
[root@master ~]# yum list kubelet --showduplicates | sort -r
[root@master ~]# yum -y install kubectl-1.18.18 kubelet-1.18.18 kubeadm-1.18.18 docker-ce-20.10.6
[root@master ~]# modprobe br_netfilter
[root@master ~]# cat > /etc/sysctl.d/kubernetes.conf << EOF
net.bridge.bridge-nf-call-iptables=1
net.bridge.bridge-nf-call-ip6tables=1
net.ipv4.ip_forward=1
vm.swappiness=0
vm.overcommit_memory=1
vm.panic_on_oom=0
fs.inotify.max_user_watches=89100
EOF
[root@master ~]# sysctl -p /etc/sysctl.d/kubernetes.conf
[root@master ~]# systemctl enable docker && systemctl start docker && systemctl enable kubelet
// 设置 registry-mirrors 镜像加速器,可以提升获取 docker 官方镜像的速度
// 设置 cgroupdriver 为 systemd,和 kubelet 的保持一致
[root@master ~]# mkdir /data/docker -p
[root@master ~]# cat > /etc/docker/daemon.json << EOF
{
"registry-mirrors": ["https://4wvlvmti.mirror.aliyuncs.com"],
"storage-driver": "overlay2",
"insecure-registries": ["registry.access.redhat.com","quay.io","harbor.od.com"],
"graph": "/data/docker",
"exec-opts": ["native.cgroupdriver=systemd"],
"live-restore": true
}
EOF
[root@master ~]# systemctl daemon-reload
[root@master ~]# systemctl restart docker
[root@master ~]# yum -y install ipvsadm ipset sysstat conntrack libseccomp
[root@master ~]# hostnamectl set-hostname master01
[root@master01 ~]# cat >> /etc/hosts << EOF
192.128.232.9 nginx01
192.128.232.10 nginx02
192.128.232.11 master01
192.128.232.12 master02
192.128.232.13 master03
192.128.232.14 node01
192.128.232.16 node02
192.128.232.17 node03
EOF
[root@master01 ~]# cat > /etc/sysconfig/modules/ipvs.modules <<EOF
#!/bin/sh
modprobe -- ip_vs
modprobe -- ip_vs_nq
modprobe -- ip_vs_rr
modprobe -- ip_vs_wrr
modprobe -- ip_vs_sh
modprobe -- nf_conntrack_ipv4
EOF
[root@master01 ~]# chmod 755 /etc/sysconfig/modules/ipvs.modules && bash /etc/sysconfig/modules/ipvs.modules && lsmod | grep -e ip_vs -e nf_conntrack_ipv4
ip_vs_sh 12688 0
ip_vs_wrr 12697 0
ip_vs_rr 12600 0
ip_vs 145458 6 ip_vs_rr,ip_vs_sh,ip_vs_wrr
nf_conntrack_ipv4 15053 2
nf_defrag_ipv4 12729 1 nf_conntrack_ipv4
nf_conntrack 139264 7 ip_vs,nf_nat,nf_nat_ipv4,xt_conntrack,nf_nat_masquerade_ipv4,nf_conntrack_netlink,nf_conntrack_ipv4
libcrc32c 12644 4 xfs,ip_vs,nf_nat,nf_conntrack
#关闭swap
[root@master ~]# rpm -ql kubelet
/etc/kubernetes/manifests
/etc/sysconfig/kubelet
/usr/bin/kubelet
/usr/lib/systemd/system/kubelet.service
[root@master ~]# cat > /etc/sysconfig/kubelet << EOF
KUBELET_EXTRA_ARGS="--fail-swap-on=false"
EOF
#设置开机自启kubelet,但不启动
[root@master ~]# systemctl enable kubelet docker
[root@master ~]# systemctl start docker
[root@master01 ~]# systemctl enable kubelet && systemctl start kubelet
# kubectl命令补全
[root@master01 ~]# echo "source <(kubectl completion bash)" >> ~/.bash_profile
[root@master01 ~]# source ~/.bash_profile
五:二台nginx安装keepalived,nginx
[root@nginx01 ~]# rpm -Uvh http://nginx.org/packages/centos/7/noarch/RPMS/nginx-release-centos-7-0.el7.ngx.noarch.rpm
[root@nginx01 ~]# yum -y install nginx keepalived
[root@nginx01 ~]# vi /etc/keepalived/keepalived.conf
! Configuration File for keepalived
global_defs {
router_id 192.128.232.9
}
vrrp_script chk_nginx {
script "/etc/keepalived/check_port.sh 80"
interval 2
weight -20
}
vrrp_instance VI_1 {
state BACKUP #全部节点设置BACKUP,以优先级高暂时为master。
interface eth0 #对应主机的网卡名称
virtual_router_id 251 #id号要一致
priority 100
advert_int 1
nopreempt #当优先级高的主机宕机后,再次恢复时,不会抢夺vip,防止业务频繁切换。
unicast_peer { #写上其他安装keepalived主机的ip
192.128.232.10
}
authentication {
auth_type PASS
auth_pass 11111111
}
track_script {
chk_nginx
}
virtual_ipaddress {
192.128.232.15/24 #vip地址
}
}
[root@nginx01 ~]# vi /etc/keepalived/check_port.sh
#!/bin/bash
#使用方法:
#在keepalived的配置文件中
#vrrp_script check_port {#创建一个vrrp_script脚本,检查配置
# script "/etc/keepalived/check_port.sh 6443" #配置监听的端口
# interval 2 #检查脚本的频率,单位(秒)
#}
CHK_PORT=$1
if [ -n "$CHK_PORT" ];then
PORT_PROCESS=`ss -lnt|awk -F':' '{print $2}'|awk '{print $1}'|grep "^$CHK_PORT$"|wc -l`
echo $PORT_PROCESS
if [ $PORT_PROCESS -eq 0 ];then
echo "Port $CHK_PORT Is Not Used,End."
systemctl stop keepalived
fi
else
echo "Check Port Cant Be Empty!"
fi
[root@nginx01 kubernetes]# systemctl restart keepalived
[root@nginx01 kubernetes]# systemctl enable keepalived
#nginx配置转发apiservice
[root@nginx01 kubernetes]# cat >> /etc/nginx/nginx.conf<<EOF
stream {
upstream kube-apiserver {
server 192.128.232.11:6443 max_fails=3 fail_timeout=30s;
server 192.128.232.12:6443 max_fails=3 fail_timeout=30s;
server 192.128.232.13:6443 max_fails=3 fail_timeout=30s;
}
server {
listen 6443;
proxy_connect_timeout 2s;
proxy_timeout 900s;
proxy_pass kube-apiserver;
}
}
EOF
[root@nginx01 kubernetes]# nginx -t
[root@nginx01 kubernetes]# systemctl start nginx
[root@nginx01 kubernetes]# systemctl enable nginx
#############################################################################
#nginx02的keepalived配置
[root@nginx02 .ssh]# vi /etc/keepalived/keepalived.conf
! Configuration File for keepalived
global_defs {
router_id 192.128.232.10
}
vrrp_script chk_nginx {
script "/etc/keepalived/check_port.sh 80"
interval 2
weight -20
}
vrrp_instance VI_1 {
state BACKUP #全部节点设置BACKUP,以优先级高暂时为master。
interface eth0 #对应主机的网卡名称
virtual_router_id 251 #id号要一致
priority 90
advert_int 1
nopreempt #当优先级高的主机宕机后,再次恢复时,不会抢夺vip,防止业务频繁切换。
unicast_peer { #写上其他安装keepalived主机的ip
192.128.232.9
}
authentication {
auth_type PASS
auth_pass 11111111
}
track_script {
chk_nginx
}
virtual_ipaddress {
192.128.232.15/24 #vip地址
}
}
[root@nginx02 ~]# vi /etc/keepalived/check_port.sh
#!/bin/bash
#使用方法:
#在keepalived的配置文件中
#vrrp_script check_port {#创建一个vrrp_script脚本,检查配置
# script "/etc/keepalived/check_port.sh 6443" #配置监听的端口
# interval 2 #检查脚本的频率,单位(秒)
#}
CHK_PORT=$1
if [ -n "$CHK_PORT" ];then
PORT_PROCESS=`ss -lnt|awk -F':' '{print $2}'|awk '{print $1}'|grep "^$CHK_PORT$"|wc -l`
echo $PORT_PROCESS
if [ $PORT_PROCESS -eq 0 ];then
echo "Port $CHK_PORT Is Not Used,End."
systemctl stop keepalived
fi
else
echo "Check Port Cant Be Empty!"
fi
[root@nginx02 kubernetes]# systemctl restart keepalived
[root@nginx02 kubernetes]# systemctl enable keepalived
#nginx配置转发apiservice
[root@nginx02 kubernetes]# cat >> /etc/nginx/nginx.conf <<EOF
stream {
upstream kube-apiserver {
server 192.128.232.11:6443 max_fails=3 fail_timeout=30s;
server 192.128.232.12:6443 max_fails=3 fail_timeout=30s;
server 192.128.232.13:6443 max_fails=3 fail_timeout=30s;
}
server {
listen 6443;
proxy_connect_timeout 2s;
proxy_timeout 900s;
proxy_pass kube-apiserver;
}
}
EOF
[root@nginx02 kubernetes]# nginx -t
[root@nginx02 kubernetes]# systemctl start nginx
[root@nginx02 kubernetes]# systemctl enable nginx
###################################################################################################################################
#部署k8s之前,需要重新编译kubeadm,因为kubeadm默认证书有效期是1年
六:编译kubeadm版本,更新k8s证书有效期
获取源码
访问:https://github.com/kubernetes...
#或者去官方网下载对应版本的地址:kubernetes跟kubeadm安装版本一致,否则替换出问题。
https://github.com/kubernetes/kubernetes/archive/refs/tags/kubernetes-1.18.18.zip
下载对应的版本,
[root@master01 k8s]# mkdir /data/k8s/
[root@master01 k8s]# cd /data/k8s/
[root@master01 k8s]# wget https://github.com/kubernetes/kubernetes/archive/refs/tags/kubernetes-1.18.18.zip
[root@master01 k8s]# unzip kubernetes-1.18.18.zip
[root@master01 k8s]# ln -sv kubernetes-1.18.18 kubernetes
####### 修改证书有效期代码
### 修改 CA 有效期为 90 年(默认为 10 年)
[root@master01 k8s]# cd /data/k8s/kubernetes
[root@master01 k8s]# vi ./staging/src/k8s.io/client-go/util/cert/cert.go
// 这个方法里面NotAfter: now.Add(duration365d * 10).UTC()
// 默认有效期就是10年,改成90年
// 按/NotAfter查找
func NewSelfSignedCACert(cfg Config, key crypto.Signer) (*x509.Certificate, error) {
now := time.Now()
tmpl := x509.Certificate{
SerialNumber: new(big.Int).SetInt64(0),
Subject: pkix.Name{
CommonName: cfg.CommonName,
Organization: cfg.Organization,
},
NotBefore: now.UTC(),
// NotAfter: now.Add(duration365d * 10).UTC(),
NotAfter: now.Add(duration365d * 90).UTC(), #修改证书有效期90年
KeyUsage: x509.KeyUsageKeyEncipherment | x509.KeyUsageDigitalSignature | x509.KeyUsageCertSign,
BasicConstraintsValid: true,
IsCA: true,
}
certDERBytes, err := x509.CreateCertificate(cryptorand.Reader, &tmpl, &tmpl, key.Public(), key)
if err != nil {
return nil, err
}
return x509.ParseCertificate(certDERBytes)
}
修改证书有效期为 90 年(默认为 1 年)
[root@master01 k8s]# cd /data/k8s/kubernetes
[root@master01 k8s]# vi ./cmd/kubeadm/app/constants/constants.go
// 就是这个常量定义CertificateValidity,改成* 90年
const (
// KubernetesDir is the directory Kubernetes owns for storing various configuration files
KubernetesDir = "/etc/kubernetes"
// ManifestsSubDirName defines directory name to store manifests
ManifestsSubDirName = "manifests"
// TempDirForKubeadm defines temporary directory for kubeadm
// should be joined with KubernetesDir.
TempDirForKubeadm = "tmp"
// CertificateValidity defines the validity for all the signed certificates generated by kubeadm
// CertificateValidity = time.Hour * 24 * 365
CertificateValidity = time.Hour * 24 * 365 * 90 #修改证书有效期90年
// CACertAndKeyBaseName defines certificate authority base name
CACertAndKeyBaseName = "ca"
// CACertName defines certificate name
CACertName = "ca.crt"
// CAKeyName defines certificate name
CAKeyName = "ca.key"
源代码改好了,接下来就是编译kubeadm了
3. 编译
软件包准备,需要go环境
CentOS:
[root@master01 kubernetes]# yum install gcc make rsync jq -y
GoLang 环境
查看 kube-cross 的 TAG 版本号
[root@master01 kubernetes]# cd /data/k8s/kubernetes
[root@master01 kubernetes]# cat ./build/build-image/cross/VERSION
v1.15.10-legacy-1
下载go版本
安装Go环境:
官方地址:https://golang.google.cn/dl/
[root@master01 kubernetes]# cd /usr/local/
[root@master01 local]# wget https://golang.google.cn/dl/go1.15.10.linux-amd64.tar.gz
[root@master01 local]# tar -zxf go1.15.10.linux-amd64.tar.gz
#配置go环境
[root@master01 local]# cat >> /etc/profile <<EOF
#go setting
export GOROOT=/usr/local/go
export PATH=$PATH:$GOROOT/bin
export GOPATH=/root/go
export PATH=$PATH:$GOPATH/BIN
EOF
#生效
[root@master01 local]# source /etc/profile
# 编译kubeadm, 这里主要编译kubeadm 即可,
如果全部更新,就使用命令:make cross
[root@master01 local]# cd /data/k8s/kubernetes
[root@master01 local]# make all WHAT=cmd/kubeadm GOFLAGS=-v
# 编译kubelet
[root@master01 local]# cd /data/k8s/kubernetes
[root@master01 local]# make all WHAT=cmd/kubelet GOFLAGS=-v
# 编译kubectl
[root@master01 local]# cd /data/k8s/kubernetes
[root@master01 local]# make all WHAT=cmd/kubectl GOFLAGS=-v
#kubeadm替换
[root@master01 local]# mv /usr/bin/kubeadm /usr/bin/kubeadm_backup
[root@master01 local]# cd /data/k8s/kubernetes
[root@master01 local]# cp _output/local/bin/linux/amd64/kubeadm /usr/bin/kubeadm
[root@master01 local]# chmod +x /usr/bin/kubeadm
#查看kubeadm版本
[root@master01 ~]# kubeadm version
kubeadm version: &version.Info{Major:"1", Minor:"18", GitVersion:"v1.18.18",
七:下载对应的版本,打标签上传到私有仓库harbor
如果k8s.gcr.io下载不了,可以使用harbor进行下载镜像安装master,下面就是测试使用harbor安装k8s集群。
#查看kubeadm config所需的镜像
[root@master01 ~]# kubeadm config images list
I1203 15:19:52.696209 21678 version.go:255] remote version is much newer: v1.22.4; falling back to: stable-1.18
W1203 15:19:53.163851 21678 configset.go:202] WARNING: kubeadm cannot validate component configs for API groups [kubelet.config.k8s.io kubeproxy.config.k8s.io]
k8s.gcr.io/kube-apiserver:v1.18.20
k8s.gcr.io/kube-controller-manager:v1.18.20
k8s.gcr.io/kube-scheduler:v1.18.20
k8s.gcr.io/kube-proxy:v1.18.20
k8s.gcr.io/pause:3.2
k8s.gcr.io/etcd:3.4.3-0
k8s.gcr.io/coredns:1.6.7
#下面镜像
[root@master01 ~]# kubeadm config images push
[root@master01 ~]# docker images|grep k8s >k8s_images.txt
[root@master01 ~]# vi k8s_images_tag_push.sh
#!/bin/bash
cat /root/k8s_images.txt |while read line
do
ImageId=`echo $line|awk '{print $3}'`
ImageName=`echo $line|awk -F'/' '{print $2}'|awk '{print $1}'`
ImageVersion=`echo $line|awk '{print $2}'`
docker tag $ImageId harbor.od.com/kubeadm/${ImageName}:${ImageVersion}
docker push harbor.od.com/kubeadm/${ImageName}:${ImageVersion}
done
[root@master01 ~]# chmod +x k8s_images_tag_push.sh
#登录harbor.od.com.由于harbor这个是不安全的,所以在安装docker的时候,在 /etc/docker/daemon.json文件里,
添加了 "insecure-registries": ["registry.access.redhat.com","quay.io","harbor.od.com"] ,这里定义了,所以没有配置https也是可以的。
#登录harbor仓库
[root@master01 ~]# docker login harbor.od.com
Username: admin
Password:
WARNING! Your password will be stored unencrypted in /root/.docker/config.json.
Configure a credential helper to remove this warning. See
https://docs.docker.com/engine/reference/commandline/login/#credentials-store
Login Succeeded
#上传镜像后。查看harbor仓库是否有了。
[root@master01 ~]# ./k8s_images_tag_push.sh
#修改初始化文件,指定harbor仓库下载镜像
[root@master01 ~]# kubeadm config print init-defaults ClusterConfiguration >kubeadm-config.yaml
#上面输出内容跟下面有所区别,两者结合修改
[root@master01 ~]# cat kubeadm-config.yaml
apiVersion: kubeadm.k8s.io/v1beta2 #根据kubeadm config print init-defaults ClusterConfiguration 执行的输出内容一致。
kind: ClusterConfiguration
kubernetesVersion: v1.18.20 #这里要特别注意,要跟kubeadm config images list 拉取的版本一致,是v1.18.20,跟k8s.gcr.io拉取镜像有点区别。因为这里指定版本,就会去harbor拉取指定版本。
imageRepository: harbor.od.com/kubeadm #指定harbor仓库kubeadm项目下载镜像
apiServer:
certSANs: #填写所有kube-apiserver节点的hostname、IP、VIP
- master01
- master02
- master03
- node01
- node02
- node03
- 192.128.232.11
- 192.128.232.12
- 192.128.232.13
- 192.128.232.14
- 192.128.232.15
- 192.128.232.16
- 192.128.232.17
- 192.128.232.18
- 192.128.232.19
- 192.128.232.20
controlPlaneEndpoint: "192.128.232.15:6443" #vip地址
networking:
podSubnet: "10.244.0.0/16"
serviceSubnet: "10.96.0.0/12"
---
apiVersion: kubeproxy.config.k8s.io/v1alpha1
kind: KubeProxyConfiguration
featureGates:
SupportIPVSProxyMode: true
mode: ipvs # kube-proxy 模式
#初始化k8s集群。同时要注意,vip一定要跟初始化机器在同一台上面。
[root@master01 ~]# kubeadm init --config kubeadm-config.yaml
To start using your cluster, you need to run the following as a regular user:
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
You should now deploy a pod network to the cluster.
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
https://kubernetes.io/docs/concepts/cluster-administration/addons/
You can now join any number of control-plane nodes by copying certificate authorities
and service account keys on each node and then running the following as root:
kubeadm join 192.128.232.15:6443 --token pqpfkz.0gdupmp5uk22ym6c \
--discovery-token-ca-cert-hash sha256:d084adeb638b5398d146d563a768c784e44777c883c8308615bc88cb2f1f9fbc \
--control-plane
Then you can join any number of worker nodes by running the following on each as root:
kubeadm join 192.128.232.15:6443 --token pqpfkz.0gdupmp5uk22ym6c \
--discovery-token-ca-cert-hash sha256:d084adeb638b5398d146d563a768c784e44777c883c8308615bc88cb2f1f9fbc
#查看docker的镜像,果然都是从harbor私有仓库下载。
[root@master01 ~]# docker images
REPOSITORY TAG IMAGE ID CREATED SIZE
harbor.od.com/kubeadm/kube-proxy v1.18.20 27f8b8d51985 5 months ago 117MB
harbor.od.com/kubeadm/kube-apiserver v1.18.20 7d8d2960de69 5 months ago 173MB
harbor.od.com/kubeadm/kube-controller-manager v1.18.20 e7c545a60706 5 months ago 162MB
harbor.od.com/kubeadm/kube-scheduler v1.18.20 a05a1a79adaa 5 months ago 96.1MB
harbor.od.com/kubeadm/pause 3.2 80d28bedfe5d 21 months ago 683kB
harbor.od.com/kubeadm/coredns 1.6.7 67da37a9a360 22 months ago 43.8MB
harbor.od.com/kubeadm/etcd 3.4.3-0 303ce5db0e90 2 years ago 288MB
[root@master01 ~]# mkdir -p $HOME/.kube
[root@master01 ~]# sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
[root@master01 ~]# sudo chown $(id -u):$(id -g) $HOME/.kube/config
[root@master01 ~]# kubectl get cs
NAME STATUS MESSAGE ERROR
controller-manager Unhealthy Get http://127.0.0.1:10252/healthz: dial tcp 127.0.0.1:10252: connect: connection refused
scheduler Unhealthy Get http://127.0.0.1:10251/healthz: dial tcp 127.0.0.1:10251: connect: connection refused
etcd-0 Healthy {"health":"true"}
[root@master01 ~]# vi /etc/kubernetes/manifests/kube-scheduler.yaml
apiVersion: v1
kind: Pod
metadata:
creationTimestamp: null
labels:
component: kube-scheduler
tier: control-plane
name: kube-scheduler
namespace: kube-system
spec:
containers:
- command:
- kube-scheduler
- --authentication-kubeconfig=/etc/kubernetes/scheduler.conf
- --authorization-kubeconfig=/etc/kubernetes/scheduler.conf
- --bind-address=127.0.0.1
- --kubeconfig=/etc/kubernetes/scheduler.conf
- --leader-elect=true
# - --port=0 #注释这行
image: harbor.od.com/kubeadm/kube-scheduler:v1.18.20
imagePullPolicy: IfNotPresent
[root@master01 ~]# vi /etc/kubernetes/manifests/kube-controller-manager.yaml
apiVersion: v1
kind: Pod
metadata:
creationTimestamp: null
labels:
component: kube-controller-manager
tier: control-plane
name: kube-controller-manager
namespace: kube-system
spec:
containers:
- command:
- kube-controller-manager
- --allocate-node-cidrs=true
- --authentication-kubeconfig=/etc/kubernetes/controller-manager.conf
- --authorization-kubeconfig=/etc/kubernetes/controller-manager.conf
- --bind-address=127.0.0.1
- --client-ca-file=/etc/kubernetes/pki/ca.crt
- --cluster-cidr=10.244.0.0/16
- --cluster-name=kubernetes
- --cluster-signing-cert-file=/etc/kubernetes/pki/ca.crt
- --cluster-signing-key-file=/etc/kubernetes/pki/ca.key
- --controllers=*,bootstrapsigner,tokencleaner
- --kubeconfig=/etc/kubernetes/controller-manager.conf
- --leader-elect=true
- --node-cidr-mask-size=24
# - --port=0 #注释这行
- --requestheader-client-ca-file=/etc/kubernetes/pki/front-proxy-ca.crt
- --root-ca-file=/etc/kubernetes/pki/ca.crt
[root@master01 ~]# systemctl restart kubelet
[root@master01 ~]# kubectl get cs
NAME STATUS MESSAGE ERROR
etcd-0 Healthy {"health":"true"}
scheduler Healthy ok
controller-manager Healthy ok
[root@master01 ~]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
master01 Ready master 58m v1.18.18
3. 加载环境变量
[root@master01 ~]# echo "export KUBECONFIG=/etc/kubernetes/admin.conf" >> ~/.bash_profile
[root@master01 ~]# source .bash_profile
本文所有操作都在root用户下执行,若为非root用户,则执行如下操作:
mkdir -p $HOME/.kube
cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
chown $(id -u):$(id -g) $HOME/.kube/config
4. 安装flannel网络
在master01上新建flannel网络
[root@master01 ~]# wget https://raw.githubusercontent.com/coreos/flannel/2140ac876ef134e0ed5af15c65e414cf26827915/Documentation/kube-flannel.yml
[root@master01 ~]# kubectl apply -f kube-flannel.yml
#查看证书过期时间
[root@master01 ~]# kubeadm alpha certs check-expiration
[check-expiration] Reading configuration from the cluster...
[check-expiration] FYI: You can look at this config file with 'kubectl -n kube-system get cm kubeadm-config -oyaml'
[check-expiration] Error reading configuration from the Cluster. Falling back to default configuration
W1207 10:20:28.086073 7242 configset.go:202] WARNING: kubeadm cannot validate component configs for API groups [kubelet.config.k8s.io kubeproxy.config.k8s.io]
CERTIFICATE EXPIRES RESIDUAL TIME CERTIFICATE AUTHORITY EXTERNALLY MANAGED
admin.conf Nov 12, 2111 07:57 UTC 89y no
apiserver Nov 12, 2111 07:57 UTC 89y ca no
apiserver-etcd-client Nov 12, 2111 07:57 UTC 89y etcd-ca no
apiserver-kubelet-client Nov 12, 2111 07:57 UTC 89y ca no
controller-manager.conf Nov 12, 2111 07:57 UTC 89y no
etcd-healthcheck-client Nov 12, 2111 07:57 UTC 89y etcd-ca no
etcd-peer Nov 12, 2111 07:57 UTC 89y etcd-ca no
etcd-server Nov 12, 2111 07:57 UTC 89y etcd-ca no
front-proxy-client Nov 12, 2111 07:57 UTC 89y front-proxy-ca no
scheduler.conf Nov 12, 2111 07:57 UTC 89y no
CERTIFICATE AUTHORITY EXPIRES RESIDUAL TIME EXTERNALLY MANAGED
ca Nov 12, 2111 07:57 UTC 89y no
etcd-ca Nov 12, 2111 07:57 UTC 89y no
front-proxy-ca Nov 12, 2111 07:57 UTC 89y no
5.备份镜像
[root@node01 ~]# docker save $(docker images | grep -vE 'REPOSITORY | redis' | awk 'BEGIN{OFS=":";ORS=" "}{print $1,$2}') -o export.tar
#导入到node节点
[root@node01 ~]# docker load -i export.tar
二:安装master02节点
1.证书分发,master01操作
#master01发放证书到devops
#分发证书到其它两个master节点
[root@master01 ~]# cat >cert-main-master.sh<<EOF
USER=root # customizable
CONTROL_PLANE_IPS="192.128.232.12 192.128.232.13" #分发到其它两个master节点
for host in ${CONTROL_PLANE_IPS}; do
ssh $host mkdir -p /etc/kubernetes/pki/etcd
scp /etc/kubernetes/pki/ca.crt "${USER}"@$host:/etc/kubernetes/pki/
scp /etc/kubernetes/pki/ca.key "${USER}"@$host:/etc/kubernetes/pki/
scp /etc/kubernetes/pki/sa.key "${USER}"@$host:/etc/kubernetes/pki/
scp /etc/kubernetes/pki/sa.pub "${USER}"@$host:/etc/kubernetes/pki/
scp /etc/kubernetes/pki/front-proxy-ca.crt "${USER}"@$host:/etc/kubernetes/pki/
scp /etc/kubernetes/pki/front-proxy-ca.key "${USER}"@$host:/etc/kubernetes/pki/
scp /etc/kubernetes/pki/etcd/ca.crt "${USER}"@$host:/etc/kubernetes/pki/etcd/
# Quote this line if you are using external etcd
scp /etc/kubernetes/pki/etcd/ca.key "${USER}"@$host:/etc/kubernetes/pki/etcd/
done
[root@master01 ~]# ./cert-main-master.sh
ca.crt 100% 1029 967.6KB/s 00:00
ca.key 100% 1679 1.5MB/s 00:00
sa.key 100% 1675 128.8KB/s 00:00
sa.pub 100% 451 182.4KB/s 00:00
front-proxy-ca.crt 100% 1038 1.1MB/s 00:00
front-proxy-ca.key 100% 1675 65.3KB/s 00:00
ca.crt 100% 1021 91.0KB/s 00:00
ca.key 100% 1679 718.3KB/s 00:00
2.登录harbor,master02操作
[root@master02 ~]# docker login harbor.od.com
Username: admin
Password:
WARNING! Your password will be stored unencrypted in /root/.docker/config.json.
Configure a credential helper to remove this warning. See
https://docs.docker.com/engine/reference/commandline/login/#credentials-store
Login Succeeded
#拷贝编译好的kubeadm到集群其它节点
[root@master02 ~]# scp master01:/usr/bin/kubeadm /usr/bin/kubeadm
#devops加入集群,
[root@master02 ~]# kubeadm join 192.128.232.15:6443 --token pqpfkz.0gdupmp5uk22ym6c \
--discovery-token-ca-cert-hash sha256:d084adeb638b5398d146d563a768c784e44777c883c8308615bc88cb2f1f9fbc \
--control-plane
。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。
This node has joined the cluster and a new control plane instance was created:
* Certificate signing request was sent to apiserver and approval was received.
* The Kubelet was informed of the new secure connection details.
* Control plane (master) label and taint were applied to the new node.
* The Kubernetes control plane instances scaled up.
* A new etcd member was added to the local/stacked etcd cluster.
To start administering your cluster from this node, you need to run the following as a regular user:
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
Run 'kubectl get nodes' to see this node join the cluster.
#上面成功加入到master集群里,devops节点上面镜像是从harbor私有仓库下载的。
[root@master02 ~]# mkdir -p $HOME/.kube
[root@master02 ~]# sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
[root@master02 ~]# sudo chown $(id -u):$(id -g) $HOME/.kube/config
[root@master02 ~]# docker images|grep harbor.od.com
harbor.od.com/kubeadm/kube-proxy v1.18.20 27f8b8d51985 5 months ago 117MB
harbor.od.com/kubeadm/kube-apiserver v1.18.20 7d8d2960de69 5 months ago 173MB
harbor.od.com/kubeadm/kube-scheduler v1.18.20 a05a1a79adaa 5 months ago 96.1MB
harbor.od.com/kubeadm/kube-controller-manager v1.18.20 e7c545a60706 5 months ago 162MB
harbor.od.com/kubeadm/pause 3.2 80d28bedfe5d 22 months ago 683kB
harbor.od.com/kubeadm/coredns 1.6.7 67da37a9a360 22 months ago 43.8MB
harbor.od.com/kubeadm/etcd 3.4.3-0 303ce5db0e90 2 years ago 288MB
3.修改两台nginx的apiserver转发地址,只转发到master02节点。
[root@nginx01 kubernetes]# vi /etc/nginx/nginx.conf
stream {
upstream kube-apiserver {
# server 192.128.232.11:6443 max_fails=3 fail_timeout=30s;
server 192.128.232.12:6443 max_fails=3 fail_timeout=30s;
# server 192.128.232.13:6443 max_fails=3 fail_timeout=30s;
}
server {
listen 6443;
proxy_connect_timeout 2s;
proxy_timeout 900s;
proxy_pass kube-apiserver;
}
}
[root@nginx01 kubernetes]# systemctl restart nginx
4.在master02操作
[root@master02 ~]# kubectl get cs
NAME STATUS MESSAGE ERROR
controller-manager Unhealthy Get http://127.0.0.1:10252/healthz: dial tcp 127.0.0.1:10252: connect: connection refused
scheduler Unhealthy Get http://127.0.0.1:10251/healthz: dial tcp 127.0.0.1:10251: connect: connection refused
etcd-0 Healthy {"health":"true"}
[root@master02 ~]# vi /etc/kubernetes/manifests/kube-scheduler.yaml
apiVersion: v1
kind: Pod
metadata:
creationTimestamp: null
labels:
component: kube-scheduler
tier: control-plane
name: kube-scheduler
namespace: kube-system
spec:
containers:
- command:
- kube-scheduler
- --authentication-kubeconfig=/etc/kubernetes/scheduler.conf
- --authorization-kubeconfig=/etc/kubernetes/scheduler.conf
- --bind-address=127.0.0.1
- --kubeconfig=/etc/kubernetes/scheduler.conf
- --leader-elect=true
# - --port=0 #注释这行
image: harbor.od.com/kubeadm/kube-scheduler:v1.18.20
imagePullPolicy: IfNotPresent
[root@master02 ~]# vi /etc/kubernetes/manifests/kube-controller-manager.yaml
apiVersion: v1
kind: Pod
metadata:
creationTimestamp: null
labels:
component: kube-controller-manager
tier: control-plane
name: kube-controller-manager
namespace: kube-system
spec:
containers:
- command:
- kube-controller-manager
- --allocate-node-cidrs=true
- --authentication-kubeconfig=/etc/kubernetes/controller-manager.conf
- --authorization-kubeconfig=/etc/kubernetes/controller-manager.conf
- --bind-address=127.0.0.1
- --client-ca-file=/etc/kubernetes/pki/ca.crt
- --cluster-cidr=10.244.0.0/16
- --cluster-name=kubernetes
- --cluster-signing-cert-file=/etc/kubernetes/pki/ca.crt
- --cluster-signing-key-file=/etc/kubernetes/pki/ca.key
- --controllers=*,bootstrapsigner,tokencleaner
- --kubeconfig=/etc/kubernetes/controller-manager.conf
- --leader-elect=true
- --node-cidr-mask-size=24
# - --port=0 #注释这行
- --requestheader-client-ca-file=/etc/kubernetes/pki/front-proxy-ca.crt
- --root-ca-file=/etc/kubernetes/pki/ca.crt
- --service-account-private-key-file=/etc/kubernetes/pki/sa.key
- --service-cluster-ip-range=10.96.0.0/12
- --use-service-account-credentials=true
image: harbor.od.com/kubeadm/kube-controller-manager:v1.18.20
imagePullPolicy: IfNotPresent
[root@master02 ~]# systemctl restart kubelet
[root@master02 ~]# kubectl get cs
NAME STATUS MESSAGE ERROR
etcd-0 Healthy {"health":"true"}
scheduler Healthy ok
controller-manager Healthy ok
[root@master02 ~]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
master01 Ready master 141m v1.18.18
master02 Ready master 8m57s v1.18.18
[root@master02 ~]# kubeadm alpha certs check-expiration
[check-expiration] Reading configuration from the cluster...
[check-expiration] FYI: You can look at this config file with 'kubectl -n kube-system get cm kubeadm-config -oyaml'
[check-expiration] Error reading configuration from the Cluster. Falling back to default configuration
W1207 10:31:58.191758 18746 configset.go:202] WARNING: kubeadm cannot validate component configs for API groups [kubelet.config.k8s.io kubeproxy.config.k8s.io]
CERTIFICATE EXPIRES RESIDUAL TIME CERTIFICATE AUTHORITY EXTERNALLY MANAGED
admin.conf Nov 12, 2111 10:11 UTC 89y no
apiserver Nov 12, 2111 10:11 UTC 89y ca no
apiserver-etcd-client Nov 12, 2111 10:11 UTC 89y etcd-ca no
apiserver-kubelet-client Nov 12, 2111 10:11 UTC 89y ca no
controller-manager.conf Nov 12, 2111 10:11 UTC 89y no
etcd-healthcheck-client Nov 12, 2111 10:11 UTC 89y etcd-ca no
etcd-peer Nov 12, 2111 10:11 UTC 89y etcd-ca no
etcd-server Nov 12, 2111 10:11 UTC 89y etcd-ca no
front-proxy-client Nov 12, 2111 10:11 UTC 89y front-proxy-ca no
scheduler.conf Nov 12, 2111 10:11 UTC 89y no
CERTIFICATE AUTHORITY EXPIRES RESIDUAL TIME EXTERNALLY MANAGED
ca Nov 12, 2111 07:57 UTC 89y no
etcd-ca Nov 12, 2111 07:57 UTC 89y no
front-proxy-ca Nov 12, 2111 07:57 UTC 89y no
#跟master02同样的操作把master03加入k8s集群
[root@master02 ~]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
master01 Ready master 35m v1.18.18
master02 Ready master 28m v1.18.18
master03 Ready master 23m v1.18.18
结论:上面就是master节点的高可用部署全部过程。
五:故障演练
1.关闭master01节点,看集群是否能继续使用
[root@master02 ~]# kubectl get cs
NAME STATUS MESSAGE
scheduler Healthy ok
controller-manager Healthy ok
etcd-0 Healthy {"health":"true"}
#查看master节点,master01节点出现问题了
[root@devops ~]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
master01 NotReady master 35m v1.18.18
master02 Ready master 28m v1.18.18
master03 Ready master 23m v1.18.18
[root@devops ~]# kubectl get pods -n kube-system
NAME READY STATUS RESTARTS AGE
coredns-55b448c5dd-48hf4 1/1 Running 0 17m
coredns-55b448c5dd-8bs9b 1/1 Running 0 17m
etcd-master01 1/1 Running 0 17m
etcd-master02 1/1 Running 0 10m
etcd-master03 1/1 Running 0 6m
kube-apiserver-master01 1/1 Running 0 17m
kube-apiserver-master02 1/1 Running 0 10m
kube-apiserver-master03 1/1 Running 0 6m1s
kube-controller-manager-master01 0/1 CrashLoopBackOff 1 15m
kube-controller-manager-master02 1/1 Running 1 7m22s
kube-controller-manager-master03 1/1 Running 0 3m26s
kube-flannel-ds-amd64-75z97 1/1 Running 0 6m9s
kube-flannel-ds-amd64-7q6pz 1/1 Running 1 10m
kube-flannel-ds-amd64-fg8zb 1/1 Running 0 13m
kube-proxy-9l66g 1/1 Running 0 10m
kube-proxy-sjx5q 1/1 Running 0 6m9s
kube-proxy-wc7gb 1/1 Running 0 17m
kube-scheduler-master01 1/1 Running 2 15m
kube-scheduler-master02 1/1 Running 1 7m22s
kube-scheduler-master03 1/1 Running 0 3m26s
#先删除这个master节点,再查看master01节点的具体问题,重新加入k8s集群
[root@devops ~]# kubectl delete node master01
node "master01" deleted
[root@devops ~]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
master02 Ready master 34m v1.18.18
master03 Ready master 29m v1.18.18
#这里master01节点容器正在Terminating,
[root@master01 ~]# kubectl get pods -n kube-system -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
coredns-55b448c5dd-7t8zb 1/1 Running 0 37m 10.244.1.2 devops <none> <none>
coredns-55b448c5dd-nwmgq 1/1 Running 0 37m 10.244.2.2 master03 <none> <none>
etcd-master01 1/1 Terminating 1 13m 192.128.232.11 master01 <none> <none>
etcd-master02 1/1 Running 0 52m 192.128.232.12 devops <none> <none>
etcd-master03 1/1 Running 0 47m 192.128.232.13 master03 <none> <none>
kube-apiserver-master01 1/1 Terminating 1 13m 192.128.232.11 master01 <none> <none>
kube-apiserver-master02 1/1 Running 0 52m 192.128.232.12 devops <none> <none>
kube-apiserver-master03 1/1 Running 0 47m 192.128.232.13 master03 <none> <none>
kube-controller-manager-master01 1/1 Terminating 4 13m 192.128.232.11 master01 <none> <none>
kube-controller-manager-master02 1/1 Running 2 49m 192.128.232.12 devops <none> <none>
kube-controller-manager-master03 1/1 Running 0 45m 192.128.232.13 master03 <none> <none>
kube-flannel-ds-amd64-75z97 1/1 Running 0 47m 192.128.232.13 master03 <none> <none>
kube-flannel-ds-amd64-7q6pz 1/1 Running 1 52m 192.128.232.12 devops <none> <none>
kube-flannel-ds-amd64-gtcpb 1/1 Terminating 2 13m 192.128.232.11 master01 <none> <none>
kube-proxy-9l66g 1/1 Running 0 52m 192.128.232.12 devops <none> <none>
kube-proxy-qgbr8 1/1 Terminating 0 13m 192.128.232.11 master01 <none> <none>
kube-proxy-sjx5q 1/1 Running 0 47m 192.128.232.13 master03 <none> <none>
kube-scheduler-master01 1/1 Terminating 4 13m 192.128.232.11 master01 <none> <none>
kube-scheduler-master02 1/1 Running 3 49m 192.128.232.12 devops <none> <none>
kube-scheduler-master03 1/1 Running 0 45m 192.128.232.13 master03 <none> <none>
#现在k8s集群只有devops跟master03
[root@devops ~]# kubectl get pods -n kube-system -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
coredns-55b448c5dd-7t8zb 1/1 Running 0 20m 10.244.1.2 devops <none> <none>
coredns-55b448c5dd-nwmgq 1/1 Running 0 20m 10.244.2.2 master03 <none> <none>
etcd-devops 1/1 Running 0 35m 192.128.232.12 devops <none> <none>
etcd-master03 1/1 Running 0 30m 192.128.232.13 master03 <none> <none>
kube-apiserver-devops 1/1 Running 0 35m 192.128.232.12 devops <none> <none>
kube-apiserver-master03 1/1 Running 0 30m 192.128.232.13 master03 <none> <none>
kube-controller-manager-devops 1/1 Running 1 32m 192.128.232.12 devops <none> <none>
kube-controller-manager-master03 1/1 Running 0 28m 192.128.232.13 master03 <none> <none>
kube-flannel-ds-amd64-75z97 1/1 Running 0 30m 192.128.232.13 master03 <none> <none>
kube-flannel-ds-amd64-7q6pz 1/1 Running 1 35m 192.128.232.12 devops <none> <none>
kube-proxy-9l66g 1/1 Running 0 35m 192.128.232.12 devops <none> <none>
kube-proxy-sjx5q 1/1 Running 0 30m 192.128.232.13 master03 <none> <none>
kube-scheduler-devops 1/1 Running 1 32m 192.128.232.12 devops <none> <none>
kube-scheduler-master03 1/1 Running 0 28m 192.128.232.13 master03 <none> <none>
#需要马上修复或者重新添加一个master进来,如果在挂掉一个master,那集群就挂了,因为etcd集群数据会全部失去。
八,安装node节点
##当你的token忘了或者过期,解决办法如下:
#第一种情况,token过期
1.先获取token
#查看当前未过期token
[root@master01 ~]# kubeadm token list | awk -F" " '{print $1}' |tail -n 1
#如果过期上面没有输出 ,可先执行此命令,创建新的token
[root@master01 ~]# kubeadm token create --print-join-command
#第二种情况,token未过期
1.列出token
[root@master01 ~]# kubeadm token list | awk -F" " '{print $1}' |tail -n 1
jd0u21.4ydhozszk7255xxb
2.获取CA公钥的哈希值
[root@master01 ~]# openssl x509 -pubkey -in /etc/kubernetes/pki/ca.crt | openssl rsa -pubin -outform der 2>/dev/null | openssl dgst -sha256 -hex | sed 's/^ .* //'
(stdin)= d084adeb638b5398d146d563a768c784e44777c883c8308615bc88cb2f1f9fbc
3.从节点加入集群
[root@master01 ~]# kubeadm join 192.128.232.15:6443 --token jd0u21.4ydhozszk7255xxb --discovery-token-ca-cert-hash sha256:d084adeb638b5398d146d563a768c784e44777c883c8308615bc88cb2f1f9fbc
4.添加node01到k8s集群
[root@node01 ~]# kubeadm join 192.128.232.15:6443 --token ml3xl4.lxb0gclu3uhbpxoy --discovery-token-ca-cert-hash sha256:10d33f646d7f9b35502133045e67b30aeddbd456b7f7fecbb5922751c34828f2
W1208 11:18:51.965242 1664 join.go:346] [preflight] WARNING: JoinControlPane.controlPlane settings will be ignored when control-plane flag is not set.
[preflight] Running pre-flight checks
[WARNING SystemVerification]: this Docker version is not on the list of validated versions: 20.10.6. Latest validated version: 19.03
[preflight] Reading configuration from the cluster...
[preflight] FYI: You can look at this config file with 'kubectl -n kube-system get cm kubeadm-config -oyaml'
[kubelet-start] Downloading configuration for the kubelet from the "kubelet-config-1.18" ConfigMap in the kube-system namespace
[kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
[kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"
[kubelet-start] Starting the kubelet
[kubelet-start] Waiting for the kubelet to perform the TLS Bootstrap...
This node has joined the cluster:
* Certificate signing request was sent to apiserver and a response was received.
* The Kubelet was informed of the new secure connection details.
Run 'kubectl get nodes' on the control-plane to see this node join the cluster.
5.查看node01添加到k8s集群,由于上面把master01进行剔除k8s集群,
[root@master02 ~]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
master02 Ready master 18h v1.18.18
master03 Ready master 18h v1.18.18
node01 Ready <none> 15s v1.18.18
6.查看两台master上是否存在污点,不让pod调度到master节点,污点后面会详细介绍
下面两台都有污点了。如果没有污点,Taints是没有任何值的。
[root@master02 ~]# kubectl describe node devops|grep Taints
Taints: node-role.kubernetes.io/master:NoSchedule
[root@master02 ~]# kubectl describe node master03|grep Taints
Taints: node-role.kubernetes.io/master:NoSchedule
7.简单交付一个deployment到k8s集群
[root@master02 ~]# cat >nginx-dp.yaml<<EOF
apiVersion: apps/v1
kind: Deployment
metadata:
name: nginx-deployment
spec:
selector:
matchLabels:
app: nginx
replicas: 2 # tells deployment to run 2 pods matching the template
template:
metadata:
labels:
app: nginx
spec:
containers:
- name: nginx
image: nginx:1.16
ports:
- containerPort: 80
volumeMounts:
- name: localtime
mountPath: /etc/localtime
volumes:
- name: localtime
hostPath:
path: /usr/share/zoneinfo/Asia/Shanghai
EOF
8.pod只是调度到了node01节点上,master节点上不调度。
[root@master02 ~]# kubectl get pods -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
nginx-deployment-767cbb69b8-jss4k 1/1 Running 0 12m 10.244.0.2 node01 <none> <none>
nginx-deployment-767cbb69b8-t79kx 1/1 Running 0 12m 10.244.0.3 node01 <none> <none>
七、etcd的作用
Etcd是Kubernetes集群中的一个十分重要的组件,用于保存集群所有的网络配置和对象的状态信息
1.tcd的常用操作命令
a.将etcdctl命令复制到hosts主机
[root@master01 manifests]# docker ps -a|grep k8s_etcd
6f67d5325e6e 303ce5db0e90 "etcd --advertise-cl…" 9 minutes ago Up 9 minutes k8s_etcd_etcd-master01_kube-system_dbdfd4ec66544be175f95fdae2031419_222
b.拷贝etcd容器的命令到主机上
[root@master01 manifests]# docker cp k8s_etcd_etcd-master01_kube-system_dbdfd4ec66544be175f95fdae2031419_222:/usr/local/bin/etcdctl /usr/local/bin/
c.查看命令是否生效了
[root@master01 manifests]# etcdctl version
etcdctl version: 3.4.3
API version: 3.4
d.查看etcd集群成员节点
[root@master01 manifests]# etcdctl --endpoints=https://[127.0.0.1]:2379 --cacert=/etc/kubernetes/pki/etcd/ca.crt \
--cert=/etc/kubernetes/pki/etcd/healthcheck-client.crt --key=/etc/kubernetes/pki/etcd/healthcheck-client.key member list -w table
+------------------+---------+----------+-----------------------------+-----------------------------+------------+
| ID | STATUS | NAME | PEER ADDRS | CLIENT ADDRS | IS LEARNER |
+------------------+---------+----------+-----------------------------+-----------------------------+------------+
| 326ec117eddf797 | started | master01 | https://192.128.232.11:2380 | https://192.128.232.11:2379 | false |
| 27a323610abdf106 | started | master02 | https://192.128.232.12:2380 | https://192.128.232.12:2379 | false |
| 945a20b0e323f57f | started | master03 | https://192.128.232.13:2380 | https://192.128.232.13:2379 | false |
+------------------+---------+----------+-----------------------------+-----------------------------+------------+
e.查看节点的状态信息
[root@master01 manifests]# etcdctl --endpoints=https://[127.0.0.1]:2379 --cacert=/etc/kubernetes/pki/etcd/ca.crt --cert=/etc/kubernetes/pki/etcd/healthcheck-client.crt --key=/etc/kubernetes/pki/etcd/healthcheck-client.key endpoint status -w table
+--------------------------+-----------------+---------+---------+-----------+------------+-----------+------------+--------------------+--------+
| ENDPOINT | ID | VERSION | DB SIZE | IS LEADER | IS LEARNER | RAFT TERM | RAFT INDEX | RAFT APPLIED INDEX | ERRORS |
+--------------------------+-----------------+---------+---------+-----------+------------+-----------+------------+--------------------+--------+
| https://[127.0.0.1]:2379 | 326ec117eddf797 | 3.4.3 | 3.2 MB | true | false | 13650 | 40194 | 40194 | |
+--------------------------+-----------------+---------+---------+-----------+------------+-----------+------------+--------------------+--------+