文章目录
1 备份证书文件
本环境使用kubeasz工具安装,CNI插件为kube-ovn
# 查看原始k8s证书目录
tree /etc/kubernetes/
/etc/kubernetes/
├── kube-controller-manager.kubeconfig
├── kubelet.kubeconfig
├── kube-proxy.kubeconfig
├── kube-scheduler.kubeconfig
└── ssl
├── aggregator-proxy-key.pem
├── aggregator-proxy.pem
├── ca-key.pem
├── ca.pem
├── etcd-key.pem
├── etcd.pem
├── kubernetes-key.pem
└── kubernetes.pem
# kubeasz脚本安装得证书文件则再/etc/kubeasz/clusters/k8s-01/ssl/这个目录下
2 恢复etcd集群
2.1 生成证书
mkdir /root/ca && cd /root/ca
cat > ca-config.json << EOF
{
"signing": {
"default": {
"expiry": "438000h"
},
"profiles": {
"kubernetes": {
"usages": [
"signing",
"key encipherment",
"server auth",
"client auth"
],
"expiry": "438000h"
}
},
"profiles": {
"kcfg": {
"usages": [
"signing",
"key encipherment",
"client auth"
],
"expiry": "438000h"
}
}
}
}
EOF
cat > ca-csr.json << EOF
{
"CN": "kubernetes",
"key": {
"algo": "rsa",
"size": 2048
},
"names": [
{
"C": "CN",
"ST": "HangZhou",
"L": "XS",
"O": "k8s",
"OU": "System"
}
],
"ca": {
"expiry": "876000h"
}
}
EOF
cfssl gencert -initca ca-csr.json | cfssljson -bare ca
mkdir -p /etc/kubernetes/ssl
cp ca*pem /etc/kubernetes/ssl
export NODE_IP=XXXX
cat > etcd-csr.json <<EOF
{
"CN": "etcd",
"hosts": [
"${NODE_IP}",
"127.0.0.1"
],
"key": {
"algo": "rsa",
"size": 2048
},
"names": [
{
"C": "CN",
"ST": "HangZhou",
"L": "XS",
"O": "k8s",
"OU": "System"
}
]
}
EOF
cfssl gencert -ca=ca.pem \
-ca-key=ca-key.pem \
-config=ca-config.json \
-profile=kubernetes etcd-csr.json | cfssljson -bare etcd
cp etcd*pem /etc/kubernetes/ssl/
2.2 修改etcd得服务配置文件
# 修改配置文件中得IP地址
vim /etc/systemd/system/etcd.service
[Unit]
Description=Etcd Server
After=network.target
After=network-online.target
Wants=network-online.target
Documentation=https://github.com/coreos
[Service]
Type=notify
WorkingDirectory=/var/lib/etcd/
ExecStart=/opt/kube/bin/etcd \
--name=etcd-XXXX \
--cert-file=/etc/kubernetes/ssl/etcd.pem \
--key-file=/etc/kubernetes/ssl/etcd-key.pem \
--peer-cert-file=/etc/kubernetes/ssl/etcd.pem \
--peer-key-file=/etc/kubernetes/ssl/etcd-key.pem \
--trusted-ca-file=/etc/kubernetes/ssl/ca.pem \
--peer-trusted-ca-file=/etc/kubernetes/ssl/ca.pem \
--initial-advertise-peer-urls=https://XXXX:2380 \
--listen-peer-urls=https://XXXX:2380 \
--listen-client-urls=https://XXXX:2379,http://127.0.0.1:2379 \
--advertise-client-urls=https://XXXX:2379 \
--initial-cluster-token=etcd-cluster-0 \
--initial-cluster=etcd-XXXX=https://XXXX:2380 \
--initial-cluster-state=new \
--data-dir=/var/lib/etcd \
--snapshot-count=50000 \
--auto-compaction-retention=1 \
--max-request-bytes=10485760 \
--auto-compaction-mode=periodic \
--quota-backend-bytes=8589934592
Restart=always
RestartSec=15
LimitNOFILE=65536
OOMScoreAdjust=-999
[Install]
WantedBy=multi-user.target
2.3 重启etcd服务
systemctl daemon-reload
systemctl restart etcd
# 服务启动正常
systemctl status etcd
● etcd.service - Etcd Server
Loaded: loaded (/etc/systemd/system/etcd.service; enabled; vendor preset: enabled)
Active: active (running) since Wed 2021-10-27 15:25:09 CST; 36min ago
Docs: https://github.com/coreos
Main PID: 124559 (etcd)
Tasks: 23 (limit: 19101)
Memory: 117.3M
CGroup: /system.slice/etcd.service
3 生成kube/config证书
cd /root/ca
cat > admin-csr.json <<EOF
{
"CN": "admin",
"hosts": [],
"key": {
"algo": "rsa",
"size": 2048
},
"names": [
{
"C": "CN",
"ST": "HangZhou",
"L": "XS",
"O": "system:masters",
"OU": "System"
}
]
}
EOF
cfssl gencert \
-ca=ca.pem \
-ca-key=ca-key.pem \
-config=ca-config.json \
-profile=kubernetes \
admin-csr.json | cfssljson -bare admin
APISERVER="https://XXXX:6443"
kubectl config set-cluster kubernetes \
--certificate-authority=ca.pem \
--embed-certs=true \
--server=${APISERVER} \
--kubeconfig=admin.kubeconfig
kubectl config set-credentials admin \
--client-certificate=admin.pem \
--client-key=admin-key.pem \
--embed-certs=true \
--kubeconfig=admin.kubeconfig
kubectl config set-context default \
--cluster=kubernetes \
--user=admin \
--kubeconfig=admin.kubeconfig
kubectl config use-context default --kubeconfig=admin.kubeconfig
cp admin.kubeconfig ~/.kube/config
4 修复kube-apiserver
4.1 生成证书
cat > kubernetes-csr.json <<EOF
{
"CN": "kubernetes",
"hosts": [
"127.0.0.1",
"XXXX",
"10.68.0.1",
"172.20.0.1",
"10.1.1.1",
"k8s.test.io",
"kubernetes",
"kubernetes.default",
"kubernetes.default.svc",
"kubernetes.default.svc.cluster",
"kubernetes.default.svc.cluster.local"
],
"key": {
"algo": "rsa",
"size": 2048
},
"names": [
{
"C": "CN",
"ST": "HangZhou",
"L": "XS",
"O": "k8s",
"OU": "System"
}
]
}
EOF
cfssl gencert \
-ca=ca.pem \
-ca-key=ca-key.pem \
-config=ca-config.json \
-profile=kubernetes \
kubernetes-csr.json | cfssljson -bare kubernetes
cp kubernetes-key.pem kubernetes.pem /etc/kubernetes/ssl/
cat > aggregator-proxy-csr.json <<EOF
{
"CN": "aggregator",
"hosts": [],
"key": {
"algo": "rsa",
"size": 2048
},
"names": [
{
"C": "CN",
"ST": "HangZhou",
"L": "XS",
"O": "k8s",
"OU": "System"
}
]
}
EOF
cfssl gencert \
-ca=ca.pem \
-ca-key=ca-key.pem \
-config=ca-config.json \
-profile=kubernetes \
aggregator-proxy-csr.json | cfssljson -bare aggregator-proxy
cp kubernetes.pem kubernetes-key.pem aggregator-proxy.pem aggregator-proxy-key.pem /etc/kubernetes/ssl
4.2 修改kube-apiserver服务配置文件
# 将IP地址修改为现在得IP
vim /etc/systemd/system/kube-apiserver.service
[Unit]
Description=Kubernetes API Server
Documentation=https://github.com/GoogleCloudPlatform/kubernetes
After=network.target
[Service]
ExecStart=/opt/kube/bin/kube-apiserver \
--advertise-address=XXXX \
--allow-privileged=true \
--anonymous-auth=false \
--api-audiences=api,istio-ca \
--authorization-mode=Node,RBAC \
--bind-address=XXXX \
--client-ca-file=/etc/kubernetes/ssl/ca.pem \
--endpoint-reconciler-type=lease \
--etcd-cafile=/etc/kubernetes/ssl/ca.pem \
--etcd-certfile=/etc/kubernetes/ssl/kubernetes.pem \
--etcd-keyfile=/etc/kubernetes/ssl/kubernetes-key.pem \
--etcd-servers=https://XXXX:2379 \
--kubelet-certificate-authority=/etc/kubernetes/ssl/ca.pem \
--kubelet-client-certificate=/etc/kubernetes/ssl/kubernetes.pem \
--kubelet-client-key=/etc/kubernetes/ssl/kubernetes-key.pem \
--service-account-issuer=kubernetes.default.svc \
--service-account-signing-key-file=/etc/kubernetes/ssl/ca-key.pem \
--service-account-key-file=/etc/kubernetes/ssl/ca.pem \
--service-cluster-ip-range=10.68.0.0/16 \
--service-node-port-range=30000-32767 \
--tls-cert-file=/etc/kubernetes/ssl/kubernetes.pem \
--tls-private-key-file=/etc/kubernetes/ssl/kubernetes-key.pem \
--requestheader-client-ca-file=/etc/kubernetes/ssl/ca.pem \
--requestheader-allowed-names=aggregator \
--requestheader-extra-headers-prefix=X-Remote-Extra- \
--requestheader-group-headers=X-Remote-Group \
--requestheader-username-headers=X-Remote-User \
--proxy-client-cert-file=/etc/kubernetes/ssl/aggregator-proxy.pem \
--proxy-client-key-file=/etc/kubernetes/ssl/aggregator-proxy-key.pem \
--enable-aggregator-routing=true \
--v=2
Restart=always
RestartSec=5
Type=notify
LimitNOFILE=65536
[Install]
WantedBy=multi-user.target
4.3 重启服务
systemctl daemon-reload
systemctl restart kube-apiserver
5 修复kube-controller-manager
5.1 生成证书
cat > kube-controller-manager-csr.json <<EOF
{
"CN": "system:kube-controller-manager",
"hosts": [],
"key": {
"algo": "rsa",
"size": 2048
},
"names": [
{
"C": "CN",
"ST": "HangZhou",
"L": "XS",
"O": "system:kube-controller-manager",
"OU": "System"
}
]
}
EOF
cfssl gencert \
-ca=ca.pem \
-ca-key=ca-key.pem \
-config=ca-config.json \
-profile=kubernetes \
kube-controller-manager-csr.json | cfssljson -bare kube-controller-manager
kubectl config set-cluster kubernetes \
--certificate-authority=ca.pem \
--embed-certs=true \
--server=${APISERVER} \
--kubeconfig=kube-controller-manager.kubeconfig
kubectl config set-credentials system:kube-controller-manager \
--client-certificate=kube-controller-manager.pem \
--client-key=kube-controller-manager-key.pem \
--embed-certs=true \
--kubeconfig=kube-controller-manager.kubeconfig
kubectl config set-context default \
--cluster=kubernetes \
--user=system:kube-controller-manager \
--kubeconfig=kube-controller-manager.kubeconfig
kubectl config use-context default --kubeconfig=kube-controller-manager.kubeconfig
cp kube-controller-manager.kubeconfig /etc/kubernetes
5.2 修改systemd文件
# ip替换
vim /etc/systemd/system/kube-controller-manager.service
[Unit]
Description=Kubernetes Controller Manager
Documentation=https://github.com/GoogleCloudPlatform/kubernetes
[Service]
ExecStart=/opt/kube/bin/kube-controller-manager \
--bind-address=XXXX \
--allocate-node-cidrs=true \
--cluster-cidr=172.20.0.0/16 \
--cluster-name=kubernetes \
--cluster-signing-cert-file=/etc/kubernetes/ssl/ca.pem \
--cluster-signing-key-file=/etc/kubernetes/ssl/ca-key.pem \
--kubeconfig=/etc/kubernetes/kube-controller-manager.kubeconfig \
--leader-elect=true \
--node-cidr-mask-size=24 \
--root-ca-file=/etc/kubernetes/ssl/ca.pem \
--service-account-private-key-file=/etc/kubernetes/ssl/ca-key.pem \
--service-cluster-ip-range=10.68.0.0/16 \
--use-service-account-credentials=true \
--v=2
Restart=always
RestartSec=5
[Install]
WantedBy=multi-user.target
5.3 重启服务
systemctl daemon-reload
systemctl restart kube-controller-manager
6 修复kube-scheduler
6.1 生成证书
cat > kube-scheduler-csr.json <<EOF
{
"CN": "system:kube-scheduler",
"hosts": [],
"key": {
"algo": "rsa",
"size": 2048
},
"names": [
{
"C": "CN",
"ST": "HangZhou",
"L": "XS",
"O": "system:kube-scheduler",
"OU": "System"
}
]
}
EOF
cfssl gencert \
-ca=ca.pem \
-ca-key=ca-key.pem \
-config=ca-config.json \
-profile=kubernetes \
kube-scheduler-csr.json | cfssljson -bare kube-scheduler
kubectl config set-cluster kubernetes \
--certificate-authority=ca.pem \
--embed-certs=true \
--server=${APISERVER} \
--kubeconfig=kube-scheduler.kubeconfig
kubectl config set-credentials system:kube-scheduler \
--client-certificate=kube-scheduler.pem \
--client-key=kube-scheduler-key.pem \
--embed-certs=true \
--kubeconfig=kube-scheduler.kubeconfig
kubectl config set-context default \
--cluster=kubernetes \
--user=system:kube-scheduler \
--kubeconfig=kube-scheduler.kubeconfig
kubectl config use-context default --kubeconfig=kube-scheduler.kubeconfig
cp kube-scheduler.kubeconfig /etc/kubernetes
6.2 修改systemd
# 修改ip
vim /etc/systemd/system/kube-scheduler.service
[Unit]
Description=Kubernetes Scheduler
Documentation=https://github.com/GoogleCloudPlatform/kubernetes
[Service]
ExecStart=/opt/kube/bin/kube-scheduler \
--bind-address=XXXX \
--kubeconfig=/etc/kubernetes/kube-scheduler.kubeconfig \
--leader-elect=true \
--v=2
Restart=always
RestartSec=5
[Install]
WantedBy=multi-user.target
6.3 重启服务
systemctl daemon-reload
systemctl restart kube-scheduler
7 kube-proxy修复
7.1 生成证书
cat > kube-proxy-csr.json <<EOF
{
"CN": "system:kube-proxy",
"hosts": [],
"key": {
"algo": "rsa",
"size": 2048
},
"names": [
{
"C": "CN",
"ST": "HangZhou",
"L": "XS",
"O": "k8s",
"OU": "System"
}
]
}
EOF
cfssl gencert -ca=ca.pem \
-ca-key=ca-key.pem \
-config=ca-config.json \
-profile=kubernetes kube-proxy-csr.json | cfssljson -bare kube-proxy
cp kube-proxy*.pem /etc/kubernetes/ssl/
kubectl config set-cluster kubernetes \
--certificate-authority=/etc/kubernetes/ssl/ca.pem \
--embed-certs=true \
--server=${APISERVER} \
--kubeconfig=kube-proxy.kubeconfig
kubectl config set-credentials kube-proxy \
--client-certificate=/etc/kubernetes/ssl/kube-proxy.pem \
--client-key=/etc/kubernetes/ssl/kube-proxy-key.pem \
--embed-certs=true \
--kubeconfig=kube-proxy.kubeconfig
kubectl config set-context default \
--cluster=kubernetes \
--user=kube-proxy \
--kubeconfig=kube-proxy.kubeconfig
kubectl config use-context default --kubeconfig=kube-proxy.kubeconfig
cp kube-proxy.kubeconfig /etc/kubernetes/
7.2 修改systemd
# 修改IP
vim /etc/systemd/system/kube-proxy.service
[Unit]
Description=Kubernetes Kube-Proxy Server
Documentation=https://github.com/GoogleCloudPlatform/kubernetes
After=network.target
[Service]
# kube-proxy 根据 --cluster-cidr 判断集群内部和外部流量,指定 --cluster-cidr 或 --masquerade-all 选项后,kube-proxy 会对访问 Service IP 的请求做 SNAT
WorkingDirectory=/var/lib/kube-proxy
ExecStart=/opt/kube/bin/kube-proxy \
--config=/var/lib/kube-proxy/kube-proxy-config.yaml
Restart=always
RestartSec=5
LimitNOFILE=65536
[Install]
WantedBy=multi-user.target
# 以下文件也需修改
vim /var/lib/kube-proxy/kube-proxy-config.yaml
7.3 重启服务
systemctl daemon-reload
systemctl restart kube-proxy
8 kubelet修复
8.1 生成证书
cat > kubelet-csr.json <<EOF
{
"CN": "system:node:XXXX",
"hosts": [
"127.0.0.1",
"XXXX"
],
"key": {
"algo": "rsa",
"size": 2048
},
"names": [
{
"C": "CN",
"ST": "HangZhou",
"L": "XS",
"O": "system:nodes",
"OU": "System"
}
]
}
EOF
cfssl gencert \
-ca=ca.pem \
-ca-key=ca-key.pem \
-config=ca-config.json \
-profile=kubernetes \
kubelet-csr.json | cfssljson -bare kubelet
kubectl config set-cluster kubernetes \
--certificate-authority=ca.pem \
--embed-certs=true \
--server=${APISERVER} \
--kubeconfig=kubelet.kubeconfig
kubectl config set-credentials system:node:XXXX \
--client-certificate=kubelet.pem \
--client-key=kubelet-key.pem \
--embed-certs=true \
--kubeconfig=kubelet.kubeconfig
kubectl config set-context default \
--cluster=kubernetes \
--user=system:node:XXXX \
--kubeconfig=kubelet.kubeconfig
kubectl config use-context default --kubeconfig=kubelet.kubeconfig
cp kubelet-key.pem /etc/kubernetes/ssl/
cp kubelet.pem /etc/kubernetes/ssl/
cp kubelet.kubeconfig /etc/kubernetes
8.2 修改systemd
# 修改ip
vim /etc/systemd/system/kubelet.service
[Unit]
Description=Kubernetes Kubelet
Documentation=https://github.com/GoogleCloudPlatform/kubernetes
[Service]
WorkingDirectory=/var/lib/kubelet
ExecStartPre=/bin/mkdir -p /sys/fs/cgroup/cpu/podruntime.slice
ExecStartPre=/bin/mkdir -p /sys/fs/cgroup/cpuacct/podruntime.slice
ExecStartPre=/bin/mkdir -p /sys/fs/cgroup/cpuset/podruntime.slice
ExecStartPre=/bin/mkdir -p /sys/fs/cgroup/memory/podruntime.slice
ExecStartPre=/bin/mkdir -p /sys/fs/cgroup/pids/podruntime.slice
ExecStartPre=/bin/mkdir -p /sys/fs/cgroup/systemd/podruntime.slice
ExecStartPre=/bin/mkdir -p /sys/fs/cgroup/cpu/system.slice
ExecStartPre=/bin/mkdir -p /sys/fs/cgroup/cpuacct/system.slice
ExecStartPre=/bin/mkdir -p /sys/fs/cgroup/cpuset/system.slice
ExecStartPre=/bin/mkdir -p /sys/fs/cgroup/memory/system.slice
ExecStartPre=/bin/mkdir -p /sys/fs/cgroup/pids/system.slice
ExecStartPre=/bin/mkdir -p /sys/fs/cgroup/systemd/system.slice
ExecStartPre=/bin/mkdir -p /sys/fs/cgroup/hugetlb/podruntime.slice
ExecStartPre=/bin/mkdir -p /sys/fs/cgroup/hugetlb/system.slice
ExecStart=/opt/kube/bin/kubelet \
--config=/var/lib/kubelet/config.yaml \
--cni-bin-dir=/opt/kube/bin \
--cni-conf-dir=/etc/cni/net.d \
--hostname-override=XXXX \
--image-pull-progress-deadline=5m \
--kubeconfig=/etc/kubernetes/kubelet.kubeconfig \
--network-plugin=cni \
--pod-infra-container-image=easzlab/pause-amd64:3.2 \
--root-dir=/var/lib/kubelet \
--v=2
Restart=always
RestartSec=5
[Install]
WantedBy=multi-user.target
# 此文件得ip也需修改
vim /var/lib/kubelet/config.yaml
8.3 重启服务
systemctl daemon-reload
systemctl restart kubelet
8.4 发现节点
# 删除之前得节点
kubectl get nodes
NAME STATUS ROLES AGE VERSION
172.27.125.193 NotReady master 8d v1.20.2
kubectl delete nodes 172.27.125.193
# 查看新nodes
root@ddddd:~/ca# kubectl get nodes
NAME STATUS ROLES AGE VERSION
172.18.30.170 NotReady <none> 9s v1.20.2
root@ddddd:~/ca# kubectl get nodes
NAME STATUS ROLES AGE VERSION
172.18.30.170 Ready <none> 101s v1.20.2
注意
kube-ovn 需要给nodes重新添加标签
kubectl label nodes 172.18.30.170 kube-ovn/role=master
问题
kubectl get pod -n kube-system
NAME READY STATUS RESTARTS AGE
coredns-5787695b7f-crws6 0/1 ContainerCreating 0 12h
dashboard-metrics-scraper-79c5968bdc-h258p 0/1 ContainerCreating 0 12h
kube-ovn-cni-nz4s8 0/1 CrashLoopBackOff 197 11h
kube-ovn-controller-5b5c995f45-tz4q9 0/1 Running 103 12h
kube-ovn-pinger-szlww 0/1 ContainerCreating 0 11h
kubernetes-dashboard-c4c6566d6-xf7lf 0/1 ContainerCreating 0 12h
metrics-server-8568cf894b-lgghc 0/1 ContainerCreating 0 12h
node-local-dns-5tqwc 1/1 Running 0 11h
ovn-central-64d9dd94f9-s2vpb 0/1 Running 0 12h
ovs-ovn-pdcf9 0/1 CrashLoopBackOff 164 11h
api server报错
10月 28 09:39:41 aaaa kube-apiserver[390595]: E1028 09:39:41.194334 390595 authentication.go:53] Unable to authenticate the request due to an error: [invalid bearer token, square/go-jose: error in c>
10月 28 09:39:41 aaaa kube-apiserver[390595]: E1028 09:39:41.302237 390595 authentication.go:53] Unable to authenticate the request due to an error: [invalid bearer token, square/go-jose: error in c>
10月 28 09:39:41 aaaa kube-apiserver[390595]: E1028 09:39:41.428900 390595 authentication.go:53] Unable to authenticate the request due to an error: [invalid bearer token, square/go-jose: error in c>
解决思路
由于我们重新生成了k8s得证书,所以之前得secrets都失效了,因此需删除之前得secrets,生成新的
kube-system命令空间重新生成secrets
kubectl get secrets -n kube-system | grep kubernetes.io/service-account-token | awk -F' ' '{print $1}' | xargs -I {} kubectl delete secrets {} -n kube-system
修改secrets之后,CNI pod可以启动成功,K8s服务可以正常使用!!!
kubectl get pod -n kube-system
NAME READY STATUS RESTARTS AGE
coredns-5787695b7f-crws6 0/1 Running 0 13h
dashboard-metrics-scraper-79c5968bdc-h258p 1/1 Running 0 13h
kube-ovn-cni-vmkhr 1/1 Running 1 9m18s
kube-ovn-controller-5b5c995f45-zl8n7 1/1 Running 0 10m
kube-ovn-pinger-ss6xv 1/1 Running 0 9m48s
kubernetes-dashboard-c4c6566d6-gtv8g 1/1 Running 0 83s
metrics-server-8568cf894b-pmnc6 1/1 Running 0 75s
node-local-dns-5tqwc 1/1 Running 0 13h
ovn-central-64d9dd94f9-kh4q5 1/1 Running 0 10m
ovs-ovn-wrwj9 1/1 Running 1 9m28s