kubeadm 高可用k8s(v1.23.5)

野猪佩挤

已于 2022-05-01 00:00:31 修改

阅读量317

点赞数

分类专栏： k8s Kubernetes 文章标签： kubernetes

于 2022-04-30 23:08:16 首次发布

本文链接：https://blog.csdn.net/weixin_42562106/article/details/124519761

版权

Kubernetes 同时被 2 个专栏收录

112 篇文章 14 订阅

订阅专栏

k8s

90 篇文章 6 订阅

订阅专栏

内核参数安排

cat << EOF >  /etc/sysctl.d/k8s.conf

#############################################################################################
# 调整虚拟内存
#################################################################################

# Default: 30
# 0 - 任何情况下都不使用swap。
# 1 - 除非内存不足（OOM），否则不使用swap。
vm.swappiness = 0

# 内存分配策略
#0 - 表示内核将检查是否有足够的可用内存供应用进程使用；如果有足够的可用内存，内存申请允许；否则，内存申请失败，并把错误返回给应用进程。
#1 - 表示内核允许分配所有的物理内存，而不管当前的内存状态如何。
#2 - 表示内核允许分配超过所有物理内存和交换空间总和的内存
vm.overcommit_memory=1

# OOM时处理
# 1关闭，等于0时，表示当内存耗尽时，内核会触发OOM killer杀掉最耗内存的进程。
vm.panic_on_oom=0

# vm.dirty_background_ratio 用于调整内核如何处理必须刷新到磁盘的脏页。
# Default value is 10.
# 该值是系统内存总量的百分比，在许多情况下将此值设置为5是合适的。
# 此设置不应设置为零。
vm.dirty_background_ratio = 5

# 内核强制同步操作将其刷新到磁盘之前允许的脏页总数
# 也可以通过更改 vm.dirty_ratio 的值（将其增加到默认值30以上（也占系统内存的百分比））来增加
# 推荐 vm.dirty_ratio 的值在60到80之间。
vm.dirty_ratio = 60

# vm.max_map_count 计算当前的内存映射文件数。
# mmap 限制（vm.max_map_count）的最小值是打开文件的ulimit数量（cat /proc/sys/fs/file-max）。
# 每128KB系统内存 map_count应该大约为1。 因此，在32GB系统上，max_map_count为262144。
# Default: 65530
vm.max_map_count = 2097152

#############################################################################################
# 调整文件
#############################################################################################

fs.may_detach_mounts = 1

# 增加文件句柄和inode缓存的大小，并限制核心转储。
fs.file-max = 2097152
fs.nr_open = 2097152
fs.suid_dumpable = 0

# 文件监控
fs.inotify.max_user_instances=8192
fs.inotify.max_user_watches=524288
fs.inotify.max_queued_events=16384

#############################################################################################
# 调整网络设置
#############################################################################################

# 为每个套接字的发送和接收缓冲区分配的默认内存量。
net.core.wmem_default = 25165824
net.core.rmem_default = 25165824

# 为每个套接字的发送和接收缓冲区分配的最大内存量。
net.core.wmem_max = 25165824
net.core.rmem_max = 25165824

# 除了套接字设置外，发送和接收缓冲区的大小
# 必须使用net.ipv4.tcp_wmem和net.ipv4.tcp_rmem参数分别设置TCP套接字。
# 使用三个以空格分隔的整数设置这些整数，分别指定最小，默认和最大大小。
# 最大大小不能大于使用net.core.wmem_max和net.core.rmem_max为所有套接字指定的值。
# 合理的设置是最小4KiB，默认64KiB和最大2MiB缓冲区。
net.ipv4.tcp_wmem = 20480 12582912 25165824
net.ipv4.tcp_rmem = 20480 12582912 25165824

# 增加最大可分配的总缓冲区空间
# 以页为单位（4096字节）进行度量
net.ipv4.tcp_mem = 65536 25165824 262144
net.ipv4.udp_mem = 65536 25165824 262144

# 为每个套接字的发送和接收缓冲区分配的最小内存量。
net.ipv4.udp_wmem_min = 16384
net.ipv4.udp_rmem_min = 16384

# 启用TCP窗口缩放，客户端可以更有效地传输数据，并允许在代理方缓冲该数据。
net.ipv4.tcp_window_scaling = 1

# 提高同时接受连接数。
net.ipv4.tcp_max_syn_backlog = 10240

# 将net.core.netdev_max_backlog的值增加到大于默认值1000
# 可以帮助突发网络流量，特别是在使用数千兆位网络连接速度时，
# 通过允许更多的数据包排队等待内核处理它们。
net.core.netdev_max_backlog = 65536

# 增加选项内存缓冲区的最大数量
net.core.optmem_max = 25165824

# 被动TCP连接的SYNACK次数。
net.ipv4.tcp_synack_retries = 2

# 允许的本地端口范围。
net.ipv4.ip_local_port_range = 2048 65535

# 防止TCP时间等待
# Default: net.ipv4.tcp_rfc1337 = 0
net.ipv4.tcp_rfc1337 = 1

# 减少tcp_fin_timeout连接的时间默认值
net.ipv4.tcp_fin_timeout = 15

# 积压套接字的最大数量。
# Default is 128.
net.core.somaxconn = 32768

# 打开syncookies以进行SYN洪水攻击保护。
net.ipv4.tcp_syncookies = 1

# 避免Smurf攻击
# 发送伪装的ICMP数据包，目的地址设为某个网络的广播地址，源地址设为要攻击的目的主机，
# 使所有收到此ICMP数据包的主机都将对目的主机发出一个回应，使被攻击主机在某一段时间内收到成千上万的数据包
net.ipv4.icmp_echo_ignore_broadcasts = 1

# 为icmp错误消息打开保护
net.ipv4.icmp_ignore_bogus_error_responses = 1

# 启用自动缩放窗口。
# 如果延迟证明合理，这将允许TCP缓冲区超过其通常的最大值64K。
net.ipv4.tcp_window_scaling = 1

# 打开并记录欺骗，源路由和重定向数据包
net.ipv4.conf.all.log_martians = 1
net.ipv4.conf.default.log_martians = 1

# 告诉内核有多少个未附加的TCP套接字维护用户文件句柄。 万一超过这个数字，
# 孤立的连接会立即重置，并显示警告。
# Default: net.ipv4.tcp_max_orphans = 65536
net.ipv4.tcp_max_orphans = 65536

# 不要在关闭连接时缓存指标
net.ipv4.tcp_no_metrics_save = 1

# 启用RFC1323中定义的时间戳记：
# Default: net.ipv4.tcp_timestamps = 1
net.ipv4.tcp_timestamps = 1

# 启用选择确认。
# Default: net.ipv4.tcp_sack = 1
net.ipv4.tcp_sack = 1

# 增加 tcp-time-wait 存储桶池大小，以防止简单的DOS攻击。
# net.ipv4.tcp_tw_recycle 已从Linux 4.12中删除。请改用net.ipv4.tcp_tw_reuse。
net.ipv4.tcp_max_tw_buckets = 14400
net.ipv4.tcp_tw_reuse = 1

# accept_source_route 选项使网络接口接受设置了严格源路由（SSR）或松散源路由（LSR）选项的数据包。
# 以下设置将丢弃设置了SSR或LSR选项的数据包。
net.ipv4.conf.all.accept_source_route = 0
net.ipv4.conf.default.accept_source_route = 0

# 打开反向路径过滤
net.ipv4.conf.all.rp_filter = 1
net.ipv4.conf.default.rp_filter = 1

# 禁用ICMP重定向接受
net.ipv4.conf.all.accept_redirects = 0
net.ipv4.conf.default.accept_redirects = 0
net.ipv4.conf.all.secure_redirects = 0
net.ipv4.conf.default.secure_redirects = 0

# 禁止发送所有IPv4 ICMP重定向数据包。
net.ipv4.conf.all.send_redirects = 0
net.ipv4.conf.default.send_redirects = 0

# 开启IP转发.
net.ipv4.ip_forward = 1

# 禁止IPv6
net.ipv6.conf.lo.disable_ipv6=1
net.ipv6.conf.all.disable_ipv6 = 1
net.ipv6.conf.default.disable_ipv6 = 1

# 要求iptables不对bridge的数据进行处理
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
net.bridge.bridge-nf-call-arptables = 1

# arp缓存
# 存在于 ARP 高速缓存中的最少层数，如果少于这个数，垃圾收集器将不会运行。缺省值是 128
net.ipv4.neigh.default.gc_thresh1=2048
# 保存在 ARP 高速缓存中的最多的记录软限制。垃圾收集器在开始收集前，允许记录数超过这个数字 5 秒。缺省值是 512
net.ipv4.neigh.default.gc_thresh2=4096
# 保存在 ARP 高速缓存中的最多记录的硬限制，一旦高速缓存中的数目高于此，垃圾收集器将马上运行。缺省值是 1024
net.ipv4.neigh.default.gc_thresh3=8192

# 持久连接
net.ipv4.tcp_keepalive_time = 600
net.ipv4.tcp_keepalive_intvl = 30
net.ipv4.tcp_keepalive_probes = 10

# conntrack表
net.nf_conntrack_max=1048576
net.netfilter.nf_conntrack_max=1048576
net.netfilter.nf_conntrack_buckets=262144
net.netfilter.nf_conntrack_tcp_timeout_fin_wait=30
net.netfilter.nf_conntrack_tcp_timeout_time_wait=30
net.netfilter.nf_conntrack_tcp_timeout_close_wait=15
net.netfilter.nf_conntrack_tcp_timeout_established=300

#############################################################################################
# 调整内核参数
#############################################################################################

# 地址空间布局随机化（ASLR）是一种用于操作系统的内存保护过程，可防止缓冲区溢出攻击。
# 这有助于确保与系统上正在运行的进程相关联的内存地址不可预测，
# 因此，与这些流程相关的缺陷或漏洞将更加难以利用。
# Accepted values: 0 = 关闭, 1 = 保守随机化, 2 = 完全随机化
kernel.randomize_va_space = 2

# 调高 PID 数量
kernel.pid_max = 65536
kernel.threads-max=30938

# coredump
kernel.core_pattern=core

# 决定了检测到soft lockup时是否自动panic，缺省值是0
kernel.softlockup_all_cpu_backtrace=1
kernel.softlockup_panic=1
EOF
sysctl --system
sysctl -p /etc/sysctl.d/k8s.conf

开启ipvs的前置条件

cat > /etc/sysconfig/modules/ipvs.modules <<EOF
#!/bin/bash
modprobe -- ip_vs
modprobe -- ip_vs_rr
modprobe -- ip_vs_wrr
modprobe -- ip_vs_sh
modprobe -- nf_conntrack
modprobe br_netfilter
EOF
modprobe br_netfilter

安装k8srpm包

cat<<END> install-1.sh 
#!/bin/bash
#yum源配置
systemctl stop firewalld.service
systemctl disable firewalld.service
yum install ipset ipvsadm wget bash-completion.noarch -y
yum install -y yum-utils device-mapper-persistent-data lvm2
yum-config-manager --add-repo http://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo
cat <<EOF > /etc/yum.repos.d/kubernetes.repo
[kubernetes]
name=Kubernetes
baseurl=https://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64/
enabled=1
gpgcheck=0
repo_gpgcheck=0
gpgkey=https://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg
https://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpg
EOF
#limit 限制
[ ! -f /etc/security/limits.conf_bak ] && cp /etc/security/limits.conf{,_bak}
cat << EOF >> /etc/security/limits.conf
root soft nofile 655360
root hard nofile 655360
root soft nproc 655360
root hard nproc 655360
root soft core unlimited
root hard core unlimited

* soft nofile 655360
* hard nofile 655360
* soft nproc 655360
* hard nproc 655360
* soft core unlimited
* hard core unlimited
EOF

[ ! -f /etc/systemd/system.conf_bak ] && cp /etc/systemd/system.conf{,_bak}
cat << EOF >> /etc/systemd/system.conf
DefaultLimitCORE=infinity
DefaultLimitNOFILE=655360
DefaultLimitNPROC=655360
EOF

# 关闭selinux
setenforce 0
sed -i 's/^SELINUX=enforcing$/SELINUX=disabled/' /etc/selinux/config
grep --color=auto '^SELINUX' /etc/selinux/config
#关闭Swap
swapoff -a
sed -i 's/.*swap.*/#&/' /etc/fstab 
yum makecache fast -y
yum install -y kubelet kubeadm kubectl && yum install -y  containerd.io
systemctl enable kubelet containerd
yum install -y keepalived haproxy
modprobe br_netfilter
sysctl --system
sysctl -p /etc/sysctl.d/k8s.conf
chmod +x /etc/sysconfig/modules/ipvs.modules && /etc/sysconfig/modules/ipvs.modules
lsmod | grep -e ip_vs -e nf_conntrack_ipv4
cut -f1 -d " " /proc/modules | grep -e ip_vs -e nf_conntrack_ipv4
lsmod | grep ip_vs
END

containerd 配置

cat<<END> install-2.sh 
#!/bin/bash
containerd config default > /etc/containerd/config.toml
sed -i "s#k8s.gcr.io/pause:3.5#registry.aliyuncs.com/google_containers/pause:3.5#g" /etc/containerd/config.toml
sed -i "s#https://registry-1.docker.io#https://0k0953tv.mirror.aliyuncs.com#g" /etc/containerd/config.toml
sed -i 's/SystemdCgroup = false/SystemdCgroup = true/' /etc/containerd/config.toml
systemctl restart containerd
systemctl status containerd.service
#wget https://github.com/kubernetes-sigs/cri-tools/releases/download/v1.22.0/crictl-v1.22.0-linux-amd64.tar.gz
tar zxvf crictl-v1.23.0-linux-amd64.tar.gz -C /usr/local/bin

cat > /etc/crictl.yaml <<EOF
runtime-endpoint: unix:///run/containerd/containerd.sock
image-endpoint: unix:///run/containerd/containerd.sock
timeout: 10
debug: false
EOF
crictl config runtime-endpoint unix:/run/containerd/containerd.sock
echo "测试拉取镜像"
crictl pull nginx
crictl images
END

以上所有节点上执行安装

配置高可用软件 haproxy和 keepalived

注意! 每个masrer上haproxy的配置必须一致

==替换以下IP ==

server master01 192.168.8.120:6443 check
server master02 192.168.8.121:6443 check
server master03 192.168.8.122:6443 check

cat >/etc/haproxy/haproxy.cfg<<"EOF"
# /etc/haproxy/haproxy.cfg
#---------------------------------------------------------------------
# Global settings
#---------------------------------------------------------------------
global
    log /dev/log local0
    log /dev/log local1 notice
    daemon

#---------------------------------------------------------------------
# common defaults that all the 'listen' and 'backend' sections will
# use if not designated in their block
#---------------------------------------------------------------------
defaults
    mode                    http
    log                     global
    option                  httplog
    option                  dontlognull
    option http-server-close
    option forwardfor       except 127.0.0.0/8
    option                  redispatch
    retries                 1
    timeout http-request    10s
    timeout queue           20s
    timeout connect         5s
    timeout client          20s
    timeout server          20s
    timeout http-keep-alive 10s
    timeout check           10s

frontend monitor-in
 bind *:89
 mode http
 option httplog
 monitor-uri /monit

listen stats
  bind    *:88
  mode    http
  stats   enable
  stats   hide-version
  stats   uri       /stats
  stats   refresh   30s
  stats   realm     Haproxy\ Statistics
  stats   auth      admin:admin

#---------------------------------------------------------------------
# apiserver frontend which proxys to the masters
#---------------------------------------------------------------------
frontend apiserver
    bind *:16443
    mode tcp
    option tcplog
    default_backend apiserver

#---------------------------------------------------------------------
# round robin balancing for apiserver
#---------------------------------------------------------------------
backend apiserver
    option httpchk GET /healthz
    http-check expect status 200
    mode tcp
    option ssl-hello-chk
    balance     roundrobin
        server master01 192.168.8.120:6443 check
        server master02 192.168.8.121:6443 check
        server master03 192.168.8.122:6443 check
EOF

每个masrer上keepalived配置不一样，注意区分

需要修改处

例如： # master01 配置：
修改网卡名
interface ens160
修改成你当前IP
mcast_src_ip 192.168.8.120

 #定义vip
  virtual_ipaddress {
      192.168.8.88
  }

每个masrer上keepalived配置不一样，注意区分

cat >/etc/keepalived/keepalived.conf<<"EOF"
! Configuration File for keepalived
global_defs {
  router_id LVS_DEVEL
script_user root
  enable_script_security
}
vrrp_script chk_apiserver {
  script "/etc/keepalived/check_apiserver.sh"
  interval 5
  weight -5
  fall 2 
rise 1
}
vrrp_instance VI_1 {
  state MASTER
  interface ens160
  mcast_src_ip 192.168.8.120
  virtual_router_id 51
  priority 100
  advert_int 2
  authentication {
      auth_type PASS
      auth_pass K8SHA_KA_AUTH
  }
  virtual_ipaddress {
      192.168.8.88
  }
  track_script {
     chk_apiserver
  }
}
EOF

#Master02 配置

#Master02 配置：
cat >/etc/keepalived/keepalived.conf<<"EOF"
! Configuration File for keepalived
global_defs {
  router_id LVS_DEVEL
script_user root
  enable_script_security
}
vrrp_script chk_apiserver {
  script "/etc/keepalived/check_apiserver.sh"
 interval 5
  weight -5
  fall 2 
rise 1
}
vrrp_instance VI_1 {
  state BACKUP
  interface ens160
  mcast_src_ip 192.168.8.121
  virtual_router_id 51
  priority 99
  advert_int 2
  authentication {
      auth_type PASS
      auth_pass K8SHA_KA_AUTH
  }
  virtual_ipaddress {
      192.168.8.88
  }
  track_script {
     chk_apiserver
  }
}
EOF

#Master03 配置：

#Master03 配置：
cat >/etc/keepalived/keepalived.conf<<"EOF"
! Configuration File for keepalived
global_defs {
  router_id LVS_DEVEL
script_user root
  enable_script_security
}
vrrp_script chk_apiserver {
  script "/etc/keepalived/check_apiserver.sh"
interval 5
  weight -5
  fall 2 
rise 1
}
vrrp_instance VI_1 {
  state BACKUP
  interface ens160
  mcast_src_ip 192.168.8.122
  virtual_router_id 51
  priority 98
  advert_int 2
  authentication {
      auth_type PASS
      auth_pass K8SHA_KA_AUTH
  }
  virtual_ipaddress {
      192.168.8.88
  }
   track_script {
     chk_apiserver
  }
EOF

健康检测脚本每台mastser上必须有

cat > /etc/keepalived/check_apiserver.sh <<"EOF"
#!/bin/bash
err=0
for k in $(seq 1 3)
do
  check_code=$(pgrep haproxy)
  if [[ $check_code == "" ]]; then
      err=$(expr $err + 1)
      sleep 1
      continue
  else
      err=0
      break
  fi
done

if [[ $err != "0" ]]; then
  echo "systemctl stop keepalived"
  systemctl stop keepalived
  exit 1
else
  exit 0
fi
EOF
chmod +x /etc/keepalived/check_apiserver.sh

最后在每台master启动高可用软件和代理

systemctl enable keepalived.service
systemctl start keepalived.service
systemctl enable haproxy.service 
systemctl start haproxy.service 
systemctl status haproxy.service 
systemctl status keepalived.service

初始化k8s配置

kubeadm config print init-defaults > kubeadm-init.yaml

需要修改处

advertiseAddress: 192.168.10.80  <<<<-----你的当前IP 

criSocket: /run/containerd/containerd.sock  <<<<----替换成containerd

type: CoreDNS   <<----手动添加

- effect: NoSchedule  <<<<-----打污点

key: node-role.kubernetes.io/master  <<<<-----打污点

authorization-mode: Node,RBAC <<------RBAC控制
 
service-node-port-range: 80-65535  <<---端口范围设置
 
controlPlaneEndpoint: 192.168.10.88:16443     <<<<---你的vip和端口

imageRepository: registry.aliyuncs.com/google_containers  <<<----无法科学上网需要修改成阿里地址

podSubnet: 172.16.0.0/16  <<---定义IP范围

需要手动编辑

apiVersion: kubeadm.k8s.io/v1beta3
bootstrapTokens:
- groups:
  - system:bootstrappers:kubeadm:default-node-token
  token: abcdef.0123456789abcdef
  ttl: 24h0m0s
  usages:
  - signing
  - authentication
kind: InitConfiguration
localAPIEndpoint:
  advertiseAddress: 192.168.10.80  <<<<-----你的当前IP 
  bindPort: 6443
nodeRegistration:
  criSocket: /run/containerd/containerd.sock  <<<<----替换成containerd
  imagePullPolicy: IfNotPresent
  name: m-80.com   <<<<---当前主机名
  taints:   
  - effect: NoSchedule  <<<<-----打污点
    key: node-role.kubernetes.io/master  <<<<-----打污点
---
apiServer:
  extraArgs:
    authorization-mode: Node,RBAC <<------RBAC控制
    service-node-port-range: 80-65535  <<---端口范围设置
  timeoutForControlPlane: 4m0s 
apiVersion: kubeadm.k8s.io/v1beta3
certificatesDir: /etc/kubernetes/pki
clusterName: kubernetes
controlPlaneEndpoint: 192.168.10.88:16443     <<<<---你的vip和端口
controllerManager: {}
dns:
  type: CoreDNS  <<----手动添加
etcd:
  local:
    dataDir: /var/lib/etcd
imageRepository: registry.aliyuncs.com/google_containers  <<<----无法科学上网需要修改成阿里地址
kind: ClusterConfiguration
kubernetesVersion: 1.23.0
networking:
  dnsDomain: cluster.local
  serviceSubnet: 10.96.0.0/16   
  podSubnet: 172.16.0.0/16  <<---定义IP范围非常关键需要不要和当前网卡IP一个段即可
scheduler: {}
---
apiVersion: kubelet.config.k8s.io/v1beta1
kind: KubeletConfiguration
cgroupDriver: systemd
maxPods: 248

最后初始化集群

kubeadm init --config kubeadm-init.yaml --upload-certs

输出以下算成功
在这里插入图片描述
加入后其他master也需要执行

 mkdir -p $HOME/.kube
 cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
 chown $(id -u):$(id -g) $HOME/.kube/config

最后安装k8s网卡

wget https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml

因为每个人网卡IP段不同
“Network”: “172.16.0.0/16” 必须要和podSubnet: 172.16.0.0/16 一致
vim 修改成host-gw

  net-conf.json: |
    {
      "Network": "172.16.0.0/16",
      "Backend": {
        "Type": "host-gw"
      }
    }

编辑完后创建网卡

kubectl  apply -f kube-flannel.yml

附

kubectl命令补全

echo "source <(kubectl completion bash)" >> /etc/profile
source /etc/profile

24小时后token过期重新创建

kubeadm token create --print-join-command

k8s设置节点不可调度

#设置不可调度
kubectl cordon node07
 
#取消节点不可调度
kubectl uncordon node07
 
#驱逐节点的pod
kubectl drain --ignore-daemonsets --delete-local-data node07
 
#删除节点
kubectl delete node node07

查看证书

for i in $(ls /etc/kubernetes/pki/*.crt); do echo $i; openssl x509 -enddate -noout -in $i; done
#查看etcd
for i in $(ls /etc/kubernetes/pki/etcd/*.crt); do echo $i; openssl x509 -enddate -noout -in $i; done

容器日志清理

for i in `find /var/lib/containers/ -name *-json.log`; do cat /dev/null > $i;done

再再再再最后etcd定时快照还是要做的，上传对象存储我就不介绍了

etcdctl --endpoints="https://192.168.10.80:2379" \
 --cacert=/etc/kubernetes/pki/etcd/ca.crt \
 --key=/etc/kubernetes/pki/etcd/healthcheck-client.key \
 --cert=/etc/kubernetes/pki/etcd/healthcheck-client.crt \
  snapshot save /tmp/etcddb-`hostname`_date`date +%Y_%m_%d-%H-%M`.db

野猪佩挤

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
kubeadm 高可用k8s(v1.23.5)

内核参数安排cat << EOF > /etc/sysctl.d/k8s.conf############################################################################################## 调整虚拟内存################################################################################## Default: 30#
复制链接

扫一扫