初始化环境:
#!/bin/bash
timedatectl set-timezone Asia/Shanghai
gpasswd -a $USER docker
#newgrp docker
docker stop $(docker ps -a -q)
docker rm $(docker ps -a -q)
docker system prune -f
docker volume rm $(docker volume ls -q)
docker image rm $(docker image ls -q)
rm -rf /etc/ceph \
/etc/cni \
/etc/kubernetes \
/opt/cni \
/opt/rke \
/run/secrets/kubernetes.io \
/run/calico \
/run/flannel \
/var/lib/calico \
/var/lib/etcd \
/var/lib/cni \
/var/lib/kubelet \
/var/lib/rancher/rke/log \
/var/log/containers \
/var/log/pods \
/var/run/calico
echo "
net.bridge.bridge-nf-call-ip6tables=1
net.bridge.bridge-nf-call-iptables=1
net.ipv4.ip_forward=1
net.ipv4.conf.all.forwarding=1
net.ipv4.neigh.default.gc_thresh1=4096
net.ipv4.neigh.default.gc_thresh2=6144
net.ipv4.neigh.default.gc_thresh3=8192
net.ipv4.neigh.default.gc_interval=60
net.ipv4.neigh.default.gc_stale_time=120
# 参考 https://github.com/prometheus/node_exporter#disabled-by-default
kernel.perf_event_paranoid=-1
#sysctls for k8s node config
net.ipv4.tcp_slow_start_after_idle=0
net.core.rmem_max=16777216
fs.inotify.max_user_watches=524288
kernel.softlockup_all_cpu_backtrace=1
kernel.softlockup_panic=0
kernel.watchdog_thresh=30
fs.file-max=2097152
fs.inotify.max_user_instances=8192
fs.inotify.max_queued_events=16384
vm.max_map_count=262144
fs.may_detach_mounts=1
net.core.netdev_max_backlog=16384
net.ipv4.tcp_wmem=4096 12582912 16777216
net.core.wmem_max=16777216
net.core.somaxconn=32768
net.ipv4.ip_forward=1
net.ipv4.tcp_max_syn_backlog=8096
net.ipv4.tcp_rmem=4096 12582912 16777216
net.ipv6.conf.all.disable_ipv6=1
net.ipv6.conf.default.disable_ipv6=1
net.ipv6.conf.lo.disable_ipv6=1
kernel.yama.ptrace_scope=0
vm.swappiness=0
# 可以控制core文件的文件名中是否添加pid作为扩展。
kernel.core_uses_pid=1
# Do not accept source routing
net.ipv4.conf.default.accept_source_route=0
net.ipv4.conf.all.accept_source_route=0
# Promote secondary addresses when the primary address is removed
net.ipv4.conf.default.promote_secondaries=1
net.ipv4.conf.all.promote_secondaries=1
# Enable hard and soft link protection
fs.protected_hardlinks=1
fs.protected_symlinks=1
# 源路由验证
# see details in https://help.aliyun.com/knowledge_detail/39428.html
net.ipv4.conf.all.rp_filter=0
net.ipv4.conf.default.rp_filter=0
net.ipv4.conf.default.arp_announce = 2
net.ipv4.conf.lo.arp_announce=2
net.ipv4.conf.all.arp_announce=2
# see details in https://help.aliyun.com/knowledge_detail/41334.html
net.ipv4.tcp_max_tw_buckets=5000
net.ipv4.tcp_syncookies=1
net.ipv4.tcp_fin_timeout=30
net.ipv4.tcp_synack_retries=2
kernel.sysrq=1
" >> /etc/sysctl.conf
sysctl -p
cat >> /etc/security/limits.conf <<EOF
* soft nofile 65535
* hard nofile 65536
EOF
touch /etc/docker/daemon.json
cat > /etc/docker/daemon.json <<EOF
{
"log-driver": "json-file",
"log-opts": {
"max-size": "100m",
"max-file": "3"
},
"max-concurrent-downloads": 10,
"max-concurrent-uploads": 10,
"registry-mirrors": ["https://7bezldxe.mirror.aliyuncs.com"],
"storage-driver": "overlay2",
"storage-opts": [
"overlay2.override_kernel_check=true"
]
}
EOF
systemctl daemon-reload && systemctl restart docker
下载必要的工具:
[ec2-user@ip-10-6-217-126 toolket]$ ll
total 66376
-rwxr-xr-x 1 ec2-user ec2-user 1911 Aug 25 14:08 clean.sh
-rw-r--r-- 1 ec2-user ec2-user 12741413 Aug 25 14:07 helm-v3.3.0-linux-amd64.tar.gz
drwxr-xr-x 3 ec2-user ec2-user 20 Mar 25 23:33 kubernetes
-rw-r--r-- 1 ec2-user ec2-user 13231874 Aug 25 14:20 kubernetes-client-linux-amd64.tar.gz
drwxr-xr-x 2 ec2-user ec2-user 50 Aug 12 05:41 linux-amd64
-rw-r--r-- 1 ec2-user ec2-user 8710 Aug 25 14:07 rancher-2.4.5.tgz
-rw-r--r-- 1 ec2-user ec2-user 7447 Aug 25 14:07 rancher-cluster.yml
-rwxr-xr-x 1 ec2-user ec2-user 41966325 Aug 25 14:07 rke_linux-amd64
解压kubectl并拷贝到bin目录:
[ec2-user@ip-10-6-217-126 toolket]$ tar zxvf kubernetes-client-linux-amd64.tar.gz
kubernetes/
kubernetes/client/
kubernetes/client/bin/
kubernetes/client/bin/kubectl
[ec2-user@ip-10-6-217-126 toolket]$ sudo cp kubernetes/client/bin/kubectl /usr/local/bin/
[ec2-user@ip-10-6-217-126 toolket]$ sudo cp rke_linux-amd64 /usr/local/bin/rke
[ec2-user@ip-10-6-217-126 toolket]$ tar zxvf helm-v3.3.0-linux-amd64.tar.gz
linux-amd64/
linux-amd64/README.md
linux-amd64/helm
linux-amd64/LICENSE
[ec2-user@ip-10-6-217-126 toolket]$ sudo cp linux-amd64/helm /usr/local/bin/
如果之前安装过k8s,需要执行clean.sh脚本清理环境:
# 停止服务
systemctl disable kubelet.service
systemctl disable kube-scheduler.service
systemctl disable kube-proxy.service
systemctl disable kube-controller-manager.service
systemctl disable kube-apiserver.service
systemctl stop kubelet.service
systemctl stop kube-scheduler.service
systemctl stop kube-proxy.service
systemctl stop kube-controller-manager.service
systemctl stop kube-apiserver.service
# 删除所有容器
docker rm -f $(docker ps -qa)
# 删除所有容器卷
docker volume rm $(docker volume ls -q)
# 卸载mount目录
for mount in $(mount | grep tmpfs | grep '/var/lib/kubelet' | awk '{ print $3 }') /var/lib/kubelet /var/lib/rancher; do umount $mount; done
# 备份目录
mv /etc/kubernetes /etc/kubernetes-bak-$(date +"%Y%m%d%H%M")
mv /var/lib/etcd /var/lib/etcd-bak-$(date +"%Y%m%d%H%M")
mv /var/lib/rancher /var/lib/rancher-bak-$(date +"%Y%m%d%H%M")
mv /opt/rke /opt/rke-bak-$(date +"%Y%m%d%H%M")
# 删除残留路径
rm -rf /etc/ceph \
/etc/cni \
/opt/cni \
/run/secrets/kubernetes.io \
/run/calico \
/run/flannel \
/var/lib/calico \
/var/lib/cni \
/var/lib/kubelet \
/var/log/containers \
/var/log/pods \
/var/run/calico
# 清理网络接口
network_interface=`ls /sys/class/net`
for net_inter in $network_interface;
do
if ! echo $net_inter | grep -qiE 'lo|docker0|eth*|ens*';then
ip link delete $net_inter
fi
done
# 清理残留进程
port_list='80 443 6443 2376 2379 2380 8472 9099 10250 10254'
for port in $port_list
do
pid=`netstat -atlnup|grep $port |awk '{print $7}'|awk -F '/' '{print $1}'|grep -v -|sort -rnk2|uniq`
if [[ -n $pid ]];then
kill -9 $pid
fi
done
pro_pid=`ps -ef |grep -v grep |grep kube|awk '{print $2}'`
if [[ -n $pro_pid ]];then
kill -9 $pro_pid
fi
生成ssh key:
[ec2-user@ip-10-6-217-126 toolket]$ ssh-keygen
Generating public/private rsa key pair.
Enter file in which to save the key (/home/ec2-user/.ssh/id_rsa):
Enter passphrase (empty for no passphrase):
Enter same passphrase again:
Your identification has been saved in /home/ec2-user/.ssh/id_rsa.
Your public key has been saved in /home/ec2-user/.ssh/id_rsa.pub.
The key fingerprint is:
SHA256:dUw+9uau7x+Nhof+r3V0Nay0ChufZNRF9M5eLd4KcBM ec2-user@ip-10-6-217-126.cn-northwest-1.compute.internal
The key's randomart image is:
+---[RSA 2048]----+
| . o+ |
| +. o .|
| ..Eo oo|
| ..o.+oo+|
| So.ooo+ B|
| BooB *+|
| . ++ *.*|
| . = oo|
| o=B+o|
+----[SHA256]-----+
编辑.ssh/authorized_keys文件,添加.ssh/id_rsa.pub公钥内容:
[ec2-user@ip-10-6-217-126 ~]$ vim .ssh/authorized_keys
编辑rancher-cluster.yml文件,用于安装RKE集群,可以单节点安装:
nodes:
- address: 10.6.217.126
user: ec2-user
role: [controlplane, worker, etcd]
services:
etcd:
# 开启自动备份
## rke版本大于等于0.2.x或rancher版本大于等于v2.2.0时使用
backup_config:
enabled: true # 设置true启用ETCD自动备份,设置false禁用;
interval_hours: 12 # 快照创建间隔时间,不加此参数,默认5分钟;
retention: 3 # etcd备份保留份数;
services:
kube-api:
extra_args:
watch-cache: true
default-watch-cache-size: 1500
# 事件保留时间,默认1小时
event-ttl: 1h0m0s
# 默认值400,设置0为不限制,一般来说,每25~30个Pod有15个并行
max-requests-inflight: 800
# 默认值200,设置0为不限制
max-mutating-requests-inflight: 400
# kubelet操作超时,默认5s
kubelet-timeout: 5s
services:
kube-controller:
extra_args:
# 修改每个节点子网大小(cidr掩码长度),默认为24,可用IP为254个;23,可用IP为510个;22,可用IP为1022个;
node-cidr-mask-size: "24"
feature-gates: "TaintBasedEvictions=false"
# 控制器定时与节点通信以检查通信是否正常,周期默认5s
node-monitor-period: "5s"
## 当节点通信失败后,再等一段时间kubernetes判定节点为notready状态。
## 这个时间段必须是kubelet的nodeStatusUpdateFrequency(默认10s)的整数倍,
## 其中N表示允许kubelet同步节点状态的重试次数,默认40s。
node-monitor-grace-period: "20s"
## 再持续通信失败一段时间后,kubernetes判定节点为unhealthy状态,默认1m0s。
node-startup-grace-period: "30s"
## 再持续失联一段时间,kubernetes开始迁移失联节点的Pod,默认5m0s。
pod-eviction-timeout: "1m"
# 默认5. 同时同步的deployment的数量。
concurrent-deployment-syncs: 5
# 默认5. 同时同步的endpoint的数量。
concurrent-endpoint-syncs: 5
# 默认20. 同时同步的垃圾收集器工作器的数量。
concurrent-gc-syncs: 20
# 默认10. 同时同步的命名空间的数量。
concurrent-namespace-syncs: 10
# 默认5. 同时同步的副本集的数量。
concurrent-replicaset-syncs: 5
# 默认5m0s. 同时同步的资源配额数。(新版本中已弃用)
# concurrent-resource-quota-syncs: 5m0s
# 默认1. 同时同步的服务数。
concurrent-service-syncs: 1
# 默认5. 同时同步的服务帐户令牌数。
concurrent-serviceaccount-token-syncs: 5
# 默认30s. 同步deployment的周期。
deployment-controller-sync-period: 30s
# 默认15s。同步PV和PVC的周期。
pvclaimbinder-sync-period: 15s
services:
kubelet:
extra_args:
feature-gates: "TaintBasedEvictions=false"
# 指定pause镜像
pod-infra-container-image: "rancher/pause:3.1"
# 传递给网络插件的MTU值,以覆盖默认值,设置为0(零)则使用默认的1460
network-plugin-mtu: "1500"
# 修改节点最大Pod数量
max-pods: "250"
# 密文和配置映射同步时间,默认1分钟
sync-frequency: "3s"
# Kubelet进程可以打开的文件数(默认1000000),根据节点配置情况调整
max-open-files: "2000000"
# 与apiserver会话时的并发数,默认是10
kube-api-burst: "30"
# 与apiserver会话时的 QPS,默认是5,QPS = 并发量/平均响应时间
kube-api-qps: "15"
# kubelet默认一次拉取一个镜像,设置为false可以同时拉取多个镜像,
# 前提是存储驱动要为overlay2,对应的Dokcer也需要增加下载并发数,参考[docker配置](/rancher2x/install-prepare/best-practices/docker/)
serialize-image-pulls: "false"
# 拉取镜像的最大并发数,registry-burst不能超过registry-qps。
# 仅当registry-qps大于0(零)时生效,(默认10)。如果registry-qps为0则不限制(默认5)。
registry-burst: "10"
registry-qps: "0"
cgroups-per-qos: "true"
cgroup-driver: "cgroupfs"
# 节点资源预留
enforce-node-allocatable: "pods"
system-reserved: "cpu=0.25,memory=200Mi"
kube-reserved: "cpu=0.25,memory=1500Mi"
# POD驱逐,这个参数只支持内存和磁盘。
## 硬驱逐阈值
### 当节点上的可用资源降至保留值以下时,就会触发强制驱逐。强制驱逐会强制kill掉POD,不会等POD自动退出。
eviction-hard: "memory.available<300Mi,nodefs.available<10%,imagefs.available<15%,nodefs.inodesFree<5%"
## 软驱逐阈值
### 以下四个参数配套使用,当节点上的可用资源少于这个值时但大于硬驱逐阈值时候,会等待eviction-soft-grace-period设置的时长;
### 等待中每10s检查一次,当最后一次检查还触发了软驱逐阈值就会开始驱逐,驱逐不会直接Kill POD,先发送停止信号给POD,然后等待eviction-max-pod-grace-period设置的时长;
### 在eviction-max-pod-grace-period时长之后,如果POD还未退出则发送强制kill POD"
eviction-soft: "memory.available<500Mi,nodefs.available<50%,imagefs.available<50%,nodefs.inodesFree<10%"
eviction-soft-grace-period: "memory.available=1m30s,nodefs.available=1m30s,imagefs.available=1m30s,nodefs.inodesFree=1m30s"
eviction-max-pod-grace-period: "30"
eviction-pressure-transition-period: "30s"
# 指定kubelet多长时间向master发布一次节点状态。注意: 它必须与kube-controller中的nodeMonitorGracePeriod一起协调工作。(默认 10s)
node-status-update-frequency: 10s
# 设置cAdvisor全局的采集行为的时间间隔,主要通过内核事件来发现新容器的产生。默认1m0s
global-housekeeping-interval: 1m0s
# 每个已发现的容器的数据采集频率。默认10s
housekeeping-interval: 10s
# 所有运行时请求的超时,除了长时间运行的 pull, logs, exec and attach。超时后,kubelet将取消请求,抛出错误,然后重试。(默认2m0s)
runtime-request-timeout: 2m0s
# 指定kubelet计算和缓存所有pod和卷的卷磁盘使用量的间隔。默认为1m0s
volume-stats-agg-period: 1m0s
# 可以选择定义额外的卷绑定到服务
extra_binds:
- "/usr/libexec/kubernetes/kubelet-plugins:/usr/libexec/kubernetes/kubelet-plugins"
- "/etc/iscsi:/etc/iscsi"
- "/sbin/iscsiadm:/sbin/iscsiadm"
services:
kubeproxy:
extra_args:
# 默认使用iptables进行数据转发,如果要启用ipvs,则此处设置为`ipvs`,一并添加下面的`extra_binds`
proxy-mode: ""
# 与kubernetes apiserver通信并发数,默认10;
kube-api-burst: 20
# 与kubernetes apiserver通信时使用QPS,默认值5,QPS=并发量/平均响应时间
kube-api-qps: 10
extra_binds:
- "/lib/modules:/lib/modules"
services:
scheduler:
extra_args:
kube-api-burst:
extra_binds: []
extra_env: []
# 当使用外部 TLS 终止,并且使用 ingress-nginx v0.22或以上版本时,必须。
ingress:
provider: nginx
options:
use-forwarded-headers: "true"
简洁配置:
nodes:
- address: 10.6.217.126
user: ec2-user
role: [controlplane, worker, etcd]
services:
etcd:
# 开启自动备份
## rke版本大于等于0.2.x或rancher版本大于等于v2.2.0时使用
backup_config:
enabled: true # 设置true启用ETCD自动备份,设置false禁用;
interval_hours: 12 # 快照创建间隔时间,不加此参数,默认5分钟;
retention: 3 # etcd备份保留份数;
# 当使用外部 TLS 终止,并且使用 ingress-nginx v0.22或以上版本时,必须。
ingress:
provider: nginx
options:
use-forwarded-headers: "true"
安装RKE:
[ec2-user@ip-10-6-217-126 toolket]$ rke up --config ./rancher-cluster.yml
INFO[0000] Running RKE version: v1.1.4
INFO[0000] Initiating Kubernetes cluster
INFO[0000] [dialer] Setup tunnel for host [10.6.217.126]
INFO[0000] Checking if container [cluster-state-deployer] is running on host [10.6.217.126], try #1
INFO[0000] Image [rancher/rke-tools:v0.1.59] exists on host [10.6.217.126]
INFO[0000] Starting container [cluster-state-deployer] on host [10.6.217.126], try #1
INFO[0000] [state] Successfully started [cluster-state-deployer] container on host [10.6.217.126]
INFO[0000] [certificates] Generating CA kubernetes certificates
INFO[0000] [certificates] Generating Kubernetes API server aggregation layer requestheader client CA certificates
INFO[0001] [certificates] GenerateServingCertificate is disabled, checking if there are unused kubelet certificates
INFO[0001] [certificates] Generating Kubernetes API server certificates
INFO[0001] [certificates] Generating Service account token key
INFO[0001] [certificates] Generating Kube Controller certificates
INFO[0001] [certificates] Generating Kube Scheduler certificates
INFO[0001] [certificates] Generating Kube Proxy certificates
INFO[0001] [certificates] Generating Node certificate
INFO[0001] [certificates] Generating admin certificates and kubeconfig
INFO[0002] [certificates] Generating Kubernetes API server proxy client certificates
INFO[0002] [certificates] Generating kube-etcd-10-6-217-126 certificate and key
INFO[0002] Successfully Deployed state file at [./rancher-cluster.rkestate]
INFO[0002] Building Kubernetes cluster
INFO[0002] [dialer] Setup tunnel for host [10.6.217.126]
INFO[0002] [network] Deploying port listener containers
INFO[0002] Image [rancher/rke-tools:v0.1.59] exists on host [10.6.217.126]
INFO[0002] Starting container [rke-etcd-port-listener] on host [10.6.217.126], try #1
INFO[0003] [network] Successfully started [rke-etcd-port-listener] container on host [10.6.217.126]
INFO[0003] Image [rancher/rke-tools:v0.1.59] exists on host [10.6.217.126]
INFO[0003] Starting container [rke-cp-port-listener] on host [10.6.217.126], try #1
INFO[0004] [network] Successfully started [rke-cp-port-listener] container on host [10.6.217.126]
INFO[0004] Image [rancher/rke-tools:v0.1.59] exists on host [10.6.217.126]
INFO[0004] Starting container [rke-worker-port-listener] on host [10.6.217.126], try #1
INFO[0005] [network] Successfully started [rke-worker-port-listener] container on host [10.6.217.126]
INFO[0005] [network] Port listener containers deployed successfully
INFO[0005] [network] Running control plane -> etcd port checks
INFO[0005] Image [rancher/rke-tools:v0.1.59] exists on host [10.6.217.126]
INFO[0005] Starting container [rke-port-checker] on host [10.6.217.126], try #1
INFO[0005] [network] Successfully started [rke-port-checker] container on host [10.6.217.126]
INFO[0006] Removing container [rke-port-checker] on host [10.6.217.126], try #1
INFO[0006] [network] Running control plane -> worker port checks
INFO[0006] Image [rancher/rke-tools:v0.1.59] exists on host [10.6.217.126]
INFO[0006] Starting container [rke-port-checker] on host [10.6.217.126], try #1
INFO[0006] [network] Successfully started [rke-port-checker] container on host [10.6.217.126]
INFO[0006] Removing container [rke-port-checker] on host [10.6.217.126], try #1
INFO[0006] [network] Running workers -> control plane port checks
INFO[0006] Image [rancher/rke-tools:v0.1.59] exists on host [10.6.217.126]
INFO[0006] Starting container [rke-port-checker] on host [10.6.217.126], try #1
INFO[0007] [network] Successfully started [rke-port-checker] container on host [10.6.217.126]
INFO[0007] Removing container [rke-port-checker] on host [10.6.217.126], try #1
INFO[0007] [network] Checking KubeAPI port Control Plane hosts
INFO[0007] [network] Removing port listener containers
INFO[0007] Removing container [rke-etcd-port-listener] on host [10.6.217.126], try #1
INFO[0007] [remove/rke-etcd-port-listener] Successfully removed container on host [10.6.217.126]
INFO[0007] Removing container [rke-cp-port-listener] on host [10.6.217.126], try #1
INFO[0007] [remove/rke-cp-port-listener] Successfully removed container on host [10.6.217.126]
INFO[0007] Removing container [rke-worker-port-listener] on host [10.6.217.126], try #1
INFO[0008] [remove/rke-worker-port-listener] Successfully removed container on host [10.6.217.126]
INFO[0008] [network] Port listener containers removed successfully
INFO[0008] [certificates] Deploying kubernetes certificates to Cluster nodes
INFO[0008] Checking if container [cert-deployer] is running on host [10.6.217.126], try #1
INFO[0008] Image [rancher/rke-tools:v0.1.59] exists on host [10.6.217.126]
INFO[0008] Starting container [cert-deployer] on host [10.6.217.126], try #1
INFO[0008] Checking if container [cert-deployer] is running on host [10.6.217.126], try #1
INFO[0013] Checking if container [cert-deployer] is running on host [10.6.217.126], try #1
INFO[0013] Removing container [cert-deployer] on host [10.6.217.126], try #1
INFO[0013] [reconcile] Rebuilding and updating local kube config
INFO[0013] Successfully Deployed local admin kubeconfig at [./kube_config_rancher-cluster.yml]
INFO[0013] [certificates] Successfully deployed kubernetes certificates to Cluster nodes
INFO[0013] [file-deploy] Deploying file [/etc/kubernetes/audit-policy.yaml] to node [10.6.217.126]
INFO[0013] Image [rancher/rke-tools:v0.1.59] exists on host [10.6.217.126]
INFO[0014] Starting container [file-deployer] on host [10.6.217.126], try #1
INFO[0014] Successfully started [file-deployer] container on host [10.6.217.126]
INFO[0014] Waiting for [file-deployer] container to exit on host [10.6.217.126]
INFO[0014] Waiting for [file-deployer] container to exit on host [10.6.217.126]
INFO[0014] Container [file-deployer] is still running on host [10.6.217.126]
INFO[0015] Waiting for [file-deployer] container to exit on host [10.6.217.126]
INFO[0015] Removing container [file-deployer] on host [10.6.217.126], try #1
INFO[0015] [remove/file-deployer] Successfully removed container on host [10.6.217.126]
INFO[0015] [/etc/kubernetes/audit-policy.yaml] Successfully deployed audit policy file to Cluster control nodes
INFO[0015] [reconcile] Reconciling cluster state
INFO[0015] [reconcile] This is newly generated cluster
INFO[0015] Pre-pulling kubernetes images
INFO[0015] Image [rancher/hyperkube:v1.18.6-rancher1] exists on host [10.6.217.126]
INFO[0015] Kubernetes images pulled successfully
INFO[0015] [etcd] Building up etcd plane..
INFO[0015] Image [rancher/rke-tools:v0.1.59] exists on host [10.6.217.126]
INFO[0015] Starting container [etcd-fix-perm] on host [10.6.217.126], try #1
INFO[0016] Successfully started [etcd-fix-perm] container on host [10.6.217.126]
INFO[0016] Waiting for [etcd-fix-perm] container to exit on host [10.6.217.126]
INFO[0016] Waiting for [etcd-fix-perm] container to exit on host [10.6.217.126]
INFO[0016] Container [etcd-fix-perm] is still running on host [10.6.217.126]
INFO[0017] Waiting for [etcd-fix-perm] container to exit on host [10.6.217.126]
INFO[0017] Removing container [etcd-fix-perm] on host [10.6.217.126], try #1
INFO[0017] [remove/etcd-fix-perm] Successfully removed container on host [10.6.217.126]
INFO[0017] Image [rancher/coreos-etcd:v3.4.3-rancher1] exists on host [10.6.217.126]
INFO[0017] Starting container [etcd] on host [10.6.217.126], try #1
INFO[0018] [etcd] Successfully started [etcd] container on host [10.6.217.126]
INFO[0018] [etcd] Running rolling snapshot container [etcd-snapshot-once] on host [10.6.217.126]
INFO[0018] Image [rancher/rke-tools:v0.1.59] exists on host [10.6.217.126]
INFO[0018] Starting container [etcd-rolling-snapshots] on host [10.6.217.126], try #1
INFO[0018] [etcd] Successfully started [etcd-rolling-snapshots] container on host [10.6.217.126]
INFO[0023] Image [rancher/rke-tools:v0.1.59] exists on host [10.6.217.126]
INFO[0023] Starting container [rke-bundle-cert] on host [10.6.217.126], try #1
INFO[0024] [certificates] Successfully started [rke-bundle-cert] container on host [10.6.217.126]
INFO[0024] Waiting for [rke-bundle-cert] container to exit on host [10.6.217.126]
INFO[0024] Container [rke-bundle-cert] is still running on host [10.6.217.126]
INFO[0025] Waiting for [rke-bundle-cert] container to exit on host [10.6.217.126]
INFO[0025] [certificates] successfully saved certificate bundle [/opt/rke/etcd-snapshots//pki.bundle.tar.gz] on host [10.6.217.126]
INFO[0025] Removing container [rke-bundle-cert] on host [10.6.217.126], try #1
INFO[0025] Image [rancher/rke-tools:v0.1.59] exists on host [10.6.217.126]
INFO[0025] Starting container [rke-log-linker] on host [10.6.217.126], try #1
INFO[0026] [etcd] Successfully started [rke-log-linker] container on host [10.6.217.126]
INFO[0026] Removing container [rke-log-linker] on host [10.6.217.126], try #1
INFO[0026] [remove/rke-log-linker] Successfully removed container on host [10.6.217.126]
INFO[0026] [etcd] Successfully started etcd plane.. Checking etcd cluster health
INFO[0026] [controlplane] Building up Controller Plane..
INFO[0026] Checking if container [service-sidekick] is running on host [10.6.217.126], try #1
INFO[0026] Image [rancher/rke-tools:v0.1.59] exists on host [10.6.217.126]
INFO[0026] Image [rancher/hyperkube:v1.18.6-rancher1] exists on host [10.6.217.126]
INFO[0026] Starting container [kube-apiserver] on host [10.6.217.126], try #1
INFO[0027] [controlplane] Successfully started [kube-apiserver] container on host [10.6.217.126]
INFO[0027] [healthcheck] Start Healthcheck on service [kube-apiserver] on host [10.6.217.126]
INFO[0035] [healthcheck] service [kube-apiserver] on host [10.6.217.126] is healthy
INFO[0035] Image [rancher/rke-tools:v0.1.59] exists on host [10.6.217.126]
INFO[0035] Starting container [rke-log-linker] on host [10.6.217.126], try #1
INFO[0036] [controlplane] Successfully started [rke-log-linker] container on host [10.6.217.126]
INFO[0036] Removing container [rke-log-linker] on host [10.6.217.126], try #1
INFO[0036] [remove/rke-log-linker] Successfully removed container on host [10.6.217.126]
INFO[0036] Image [rancher/hyperkube:v1.18.6-rancher1] exists on host [10.6.217.126]
INFO[0036] Starting container [kube-controller-manager] on host [10.6.217.126], try #1
INFO[0037] [controlplane] Successfully started [kube-controller-manager] container on host [10.6.217.126]
INFO[0037] [healthcheck] Start Healthcheck on service [kube-controller-manager] on host [10.6.217.126]
INFO[0042] [healthcheck] service [kube-controller-manager] on host [10.6.217.126] is healthy
INFO[0042] Image [rancher/rke-tools:v0.1.59] exists on host [10.6.217.126]
INFO[0042] Starting container [rke-log-linker] on host [10.6.217.126], try #1
INFO[0042] [controlplane] Successfully started [rke-log-linker] container on host [10.6.217.126]
INFO[0042] Removing container [rke-log-linker] on host [10.6.217.126], try #1
INFO[0043] [remove/rke-log-linker] Successfully removed container on host [10.6.217.126]
INFO[0043] Image [rancher/hyperkube:v1.18.6-rancher1] exists on host [10.6.217.126]
INFO[0043] Starting container [kube-scheduler] on host [10.6.217.126], try #1
INFO[0043] [controlplane] Successfully started [kube-scheduler] container on host [10.6.217.126]
INFO[0043] [healthcheck] Start Healthcheck on service [kube-scheduler] on host [10.6.217.126]
INFO[0048] [healthcheck] service [kube-scheduler] on host [10.6.217.126] is healthy
INFO[0048] Image [rancher/rke-tools:v0.1.59] exists on host [10.6.217.126]
INFO[0049] Starting container [rke-log-linker] on host [10.6.217.126], try #1
INFO[0049] [controlplane] Successfully started [rke-log-linker] container on host [10.6.217.126]
INFO[0049] Removing container [rke-log-linker] on host [10.6.217.126], try #1
INFO[0049] [remove/rke-log-linker] Successfully removed container on host [10.6.217.126]
INFO[0049] [controlplane] Successfully started Controller Plane..
INFO[0049] [authz] Creating rke-job-deployer ServiceAccount
INFO[0049] [authz] rke-job-deployer ServiceAccount created successfully
INFO[0049] [authz] Creating system:node ClusterRoleBinding
INFO[0049] [authz] system:node ClusterRoleBinding created successfully
INFO[0049] [authz] Creating kube-apiserver proxy ClusterRole and ClusterRoleBinding
INFO[0049] [authz] kube-apiserver proxy ClusterRole and ClusterRoleBinding created successfully
INFO[0049] Successfully Deployed state file at [./rancher-cluster.rkestate]
INFO[0049] [state] Saving full cluster state to Kubernetes
INFO[0049] [state] Successfully Saved full cluster state to Kubernetes ConfigMap: full-cluster-state
INFO[0049] [worker] Building up Worker Plane..
INFO[0049] Checking if container [service-sidekick] is running on host [10.6.217.126], try #1
INFO[0049] [sidekick] Sidekick container already created on host [10.6.217.126]
INFO[0049] Image [rancher/hyperkube:v1.18.6-rancher1] exists on host [10.6.217.126]
INFO[0049] Starting container [kubelet] on host [10.6.217.126], try #1
INFO[0050] [worker] Successfully started [kubelet] container on host [10.6.217.126]
INFO[0050] [healthcheck] Start Healthcheck on service [kubelet] on host [10.6.217.126]
INFO[0055] [healthcheck] service [kubelet] on host [10.6.217.126] is healthy
INFO[0055] Image [rancher/rke-tools:v0.1.59] exists on host [10.6.217.126]
INFO[0055] Starting container [rke-log-linker] on host [10.6.217.126], try #1
INFO[0055] [worker] Successfully started [rke-log-linker] container on host [10.6.217.126]
INFO[0055] Removing container [rke-log-linker] on host [10.6.217.126], try #1
INFO[0056] [remove/rke-log-linker] Successfully removed container on host [10.6.217.126]
INFO[0056] Image [rancher/hyperkube:v1.18.6-rancher1] exists on host [10.6.217.126]
INFO[0056] Starting container [kube-proxy] on host [10.6.217.126], try #1
INFO[0056] [worker] Successfully started [kube-proxy] container on host [10.6.217.126]
INFO[0056] [healthcheck] Start Healthcheck on service [kube-proxy] on host [10.6.217.126]
INFO[0056] [healthcheck] service [kube-proxy] on host [10.6.217.126] is healthy
INFO[0056] Image [rancher/rke-tools:v0.1.59] exists on host [10.6.217.126]
INFO[0056] Starting container [rke-log-linker] on host [10.6.217.126], try #1
INFO[0057] [worker] Successfully started [rke-log-linker] container on host [10.6.217.126]
INFO[0057] Removing container [rke-log-linker] on host [10.6.217.126], try #1
INFO[0057] [remove/rke-log-linker] Successfully removed container on host [10.6.217.126]
INFO[0057] [worker] Successfully started Worker Plane..
INFO[0057] Image [rancher/rke-tools:v0.1.59] exists on host [10.6.217.126]
INFO[0057] Starting container [rke-log-cleaner] on host [10.6.217.126], try #1
INFO[0058] [cleanup] Successfully started [rke-log-cleaner] container on host [10.6.217.126]
INFO[0058] Removing container [rke-log-cleaner] on host [10.6.217.126], try #1
INFO[0058] [remove/rke-log-cleaner] Successfully removed container on host [10.6.217.126]
INFO[0058] [sync] Syncing nodes Labels and Taints
INFO[0058] [sync] Successfully synced nodes Labels and Taints
INFO[0058] [network] Setting up network plugin: canal
INFO[0058] [addons] Saving ConfigMap for addon rke-network-plugin to Kubernetes
INFO[0058] [addons] Successfully saved ConfigMap for addon rke-network-plugin to Kubernetes
INFO[0058] [addons] Executing deploy job rke-network-plugin
INFO[0078] [addons] Setting up coredns
INFO[0078] [addons] Saving ConfigMap for addon rke-coredns-addon to Kubernetes
INFO[0078] [addons] Successfully saved ConfigMap for addon rke-coredns-addon to Kubernetes
INFO[0078] [addons] Executing deploy job rke-coredns-addon
INFO[0083] [addons] CoreDNS deployed successfully
INFO[0083] [dns] DNS provider coredns deployed successfully
INFO[0083] [addons] Setting up Metrics Server
INFO[0083] [addons] Saving ConfigMap for addon rke-metrics-addon to Kubernetes
INFO[0083] [addons] Successfully saved ConfigMap for addon rke-metrics-addon to Kubernetes
INFO[0083] [addons] Executing deploy job rke-metrics-addon
INFO[0088] [addons] Metrics Server deployed successfully
INFO[0088] [ingress] Setting up nginx ingress controller
INFO[0088] [addons] Saving ConfigMap for addon rke-ingress-controller to Kubernetes
INFO[0088] [addons] Successfully saved ConfigMap for addon rke-ingress-controller to Kubernetes
INFO[0088] [addons] Executing deploy job rke-ingress-controller
INFO[0093] [ingress] ingress controller nginx deployed successfully
INFO[0093] [addons] Setting up user addons
INFO[0093] [addons] no user addons defined
INFO[0093] Finished building Kubernetes cluster successfully
[ec2-user@ip-10-6-217-126 toolket]$ docker ps
CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES
183fc321581e rancher/pause:3.1 "/pause" 10 seconds ago Up 10 seconds k8s_POD_nginx-ingress-controller-zptj2_ingress-nginx_6f45cdd8-bf43-4c3c-bac4-0a807ae06264_0
b3a17b94c215 rancher/pause:3.1 "/pause" 24 seconds ago Up 23 seconds k8s_POD_canal-hs8gc_kube-system_014f34fe-d5ce-48b9-b2a9-de1a7d62ff98_0
a6dc1c024f7f rancher/hyperkube:v1.18.6-rancher1 "/opt/rke-tools/entr…" 44 seconds ago Up 43 seconds kube-proxy
72b70d6d69f7 rancher/hyperkube:v1.18.6-rancher1 "/opt/rke-tools/entr…" 50 seconds ago Up 50 seconds kubelet
a4c09ee15b54 rancher/hyperkube:v1.18.6-rancher1 "/opt/rke-tools/entr…" 57 seconds ago Up 56 seconds kube-scheduler
9d0c1c618ba4 rancher/hyperkube:v1.18.6-rancher1 "/opt/rke-tools/entr…" About a minute ago Up About a minute kube-controller-manager
4559750e14e6 rancher/hyperkube:v1.18.6-rancher1 "/opt/rke-tools/entr…" About a minute ago Up About a minute kube-apiserver
d5557332a025 rancher/rke-tools:v0.1.59 "/opt/rke-tools/rke-…" About a minute ago Up About a minute etcd-rolling-snapshots
fa63f04f2550 rancher/coreos-etcd:v3.4.3-rancher1 "/usr/local/bin/etcd…" About a minute ago Up About a minute etcd
检查安装状态:
[ec2-user@ip-10-6-217-126 toolket]$ cp kube_config_rancher-cluster.yml ~/.kube/config
[ec2-user@ip-10-6-217-126 toolket]$ kubectl get nodes
NAME STATUS ROLES AGE VERSION
10.6.217.126 Ready controlplane,etcd,worker 3m17s v1.18.6
[ec2-user@ip-10-6-217-126 toolket]$ kubectl get pods --all-namespaces
NAMESPACE NAME READY STATUS RESTARTS AGE
ingress-nginx default-http-backend-598b7d7dbd-ch4gt 1/1 Running 0 3m4s
ingress-nginx nginx-ingress-controller-zptj2 1/1 Running 0 3m4s
kube-system canal-hs8gc 2/2 Running 0 3m18s
kube-system coredns-849545576b-58qdp 1/1 Running 0 3m14s
kube-system coredns-autoscaler-5dcd676cbd-qb67v 1/1 Running 0 3m14s
kube-system metrics-server-697746ff48-6gczh 1/1 Running 0 3m9s
kube-system rke-coredns-addon-deploy-job-z8gzv 0/1 Completed 0 3m15s
kube-system rke-ingress-controller-deploy-job-x9nqh 0/1 Completed 0 3m5s
kube-system rke-metrics-addon-deploy-job-tfxqp 0/1 Completed 0 3m10s
kube-system rke-network-plugin-deploy-job-dt9sr 0/1 Completed 0 3m35s
将以下文件的副本保存在安全的位置:
rancher-cluster.yml
: RKE 集群配置文件。kube_config_rancher-cluster.yml
: 集群的Kubeconfig 文件,此文件包含用于访问集群的凭据。rancher-cluster.rkestate
: Kubernetes 集群状态文件,此文件包含用于完全访问集群的凭据。
添加Helm Chart 仓库:
[ec2-user@ip-10-6-217-126 toolket]$ helm repo add rancher-stable http://rancher-mirror.oss-cn-beijing.aliyuncs.com/server-charts/stable
"rancher-stable" has been added to your repositories
创建Rancher命名空间:
[ec2-user@ip-10-6-217-126 toolket]$ kubectl create namespace cattle-system
namespace/cattle-system created
创建Rancher安装模板,指定域名、外部负载中止SSL:
[ec2-user@ip-10-6-217-126 toolket]$ helm template rancher ./rancher-2.4.5.tgz --output-dir . \
> --namespace cattle-system \
> --set hostname=rancher.example.cn \
> --set tls=external
wrote ./rancher/templates/serviceAccount.yaml
wrote ./rancher/templates/clusterRoleBinding.yaml
wrote ./rancher/templates/service.yaml
wrote ./rancher/templates/deployment.yaml
wrote ./rancher/templates/ingress.yaml
安装Rancher:
[ec2-user@ip-10-6-217-126 toolket]$ kubectl -n cattle-system apply -R -f ./rancher
clusterrolebinding.rbac.authorization.k8s.io/rancher created
deployment.apps/rancher created
ingress.extensions/rancher created
service/rancher created
serviceaccount/rancher created
查看安装进度及状态:
[ec2-user@ip-10-6-217-126 toolket]$ kubectl -n cattle-system rollout status deploy/rancher
Waiting for deployment "rancher" rollout to finish: 0 of 3 updated replicas are available...
Waiting for deployment "rancher" rollout to finish: 1 of 3 updated replicas are available...
Waiting for deployment spec update to be observed...
Waiting for deployment "rancher" rollout to finish: 1 of 3 updated replicas are available...
Waiting for deployment "rancher" rollout to finish: 2 of 3 updated replicas are available...
deployment "rancher" successfully rolled out
安装Gitlab:
gitlab/gitlab-ce:latest
新建数据卷
/var/log/gitlab
/var/opt/gitlab
/etc/gitlab
映射80端口用于外网访问,添加以下参数解决413错误:
metadata:
annotations:
nginx.ingress.kubernetes.io/proxy-body-size: 1024m
nginx.ingress.kubernetes.io/proxy-connect-timeout: "30"
nginx.ingress.kubernetes.io/proxy-read-timeout: "1800"
nginx.ingress.kubernetes.io/proxy-send-timeout: "1800"
最好绑定运行主机。
Jenkins安装:
jenkins/jenkins:lts
映射数据卷:
/var/jenkins_home
映射主机docker.sock,需要访问权限:赋权限 sudo chmod o+rw /var/run/docker.sock
/var/run/docker.sock
复制 docker命令到jenkins_home工作目录用于在容器中使用该命令
sudo cp /usr/bin/docker /opt/jenkins-home/tools/hudson.tasks.Maven_MavenInstallation/maven3/bin/docker
复制宿主机的 .docker/config.json授权文件,用于容器内连接私有镜像仓库
sudo cp .docker/config.json /opt/jenkins-home/.docker/config.json
Jenkins配置
1. 环境变量
PATH+EXTRA /var/jenkins_home/tools/hudson.tasks.Maven_MavenInstallation/maven3/bin
2. 配置JDK
/usr/local/openjdk-8
映射8080端口到公网访问
最好绑定运行主机。