1、備份
yum –y install etcd
a、手动备份
ETCDCTL_API=3 etcdctl --endpoints=https://[127.0.0.1]:2379 --cacert=/etc/kubernetes/pki/etcd/ca.crt --cert=/etc/kubernetes/pki/etcd/healthcheck-client.crt --key=/etc/kubernetes/pki/etcd/healthcheck-client.key snapshot save /backup/etcd-snapshot-$(date +%Y-%m-%d_%H:%M:%S_%Z).db
或者
ETCDCTL_API=3 etcdctl --endpoints=https://[127.0.0.1]:2379 --cert=/etc/kubernetes/pki/etcd/server.crt --key=/etc/kubernetes/pki/etcd/server.key --cacert=/etc/kubernetes/pki/etcd/ca.crt snapshot save caserver.db
scp -r .kube 10.194.28.110:/nfs/104
scp -r /etc/kubernetes/ 10.194.28.110:/nfs/104
scp -r /var/lib/kubelet/ 10.194.28.110:/nfs/104
scp etc-*.db 10.194.28.110:/nfs/104
b、自动备份
1> 创建cronjob
apiVersion: batch/v1beta1
kind: CronJob
metadata:
name: etcdbackup
namespace: kube-system
spec:
schedule: "0 0 * * *" #每日凌晨備份
jobTemplate:
spec:
template:
spec:
containers:
- name: backup
# Same image as in /etc/kubernetes/manifests/etcd.yaml
image: k8s.gcr.io/etcd:3.3.10 #依本機docker images version
env:
- name: ETCDCTL_API
value: "3"
command: ["/bin/sh"]
args: ["-c", "etcdctl --endpoints=https://127.0.0.1:2379 --cacert=/etc/kubernetes/pki/etcd/ca.crt --cert=/etc/kubernetes/pki/etcd/healthcheck-client.crt --key=/etc/kubernetes/pki/etcd/healthcheck-client.key snapshot save /backup/etcd-snapshot-$(date +%Y-%m-%d_%H:%M:%S_%Z).db"]
volumeMounts:
- mountPath: /etc/kubernetes/pki/etcd
name: etcd-certs
readOnly: true
- mountPath: /backup
name: backup
- mountPath: /etc/localtime
name: localtime
restartPolicy: OnFailure
nodeSelector:
node-role.kubernetes.io/master: ""
tolerations:
- key: "node-role.kubernetes.io/master"
effect: "NoSchedule"
hostNetwork: true
volumes:
- name: etcd-certs
hostPath:
path: /etc/kubernetes/pki/etcd
type: DirectoryOrCreate
- name: backup
nfs:
server: 10.192.30.121 #指定備份位置
path: /nfs/etcd_backup/
# type: DirectoryOrCreate
- name: localtime
hostPath:
path: /usr/share/zoneinfo/Asia/Shanghai
2> 創建cron腳本(同步到nfs)
cat etcd.sh
#/bin/bash
cp -r /root/.kube /nfs/kube/kube-$(date +%Y-%m-%d_%H%M%S_%Z)
cp -r /etc/kubernetes /nfs/kubernetes/kubernetes-$(date +%Y-%m-%d_%H%M%S_%Z)
cp -r /var/lib/kubelet /nfs/kubelet/kubelet-$(date +%Y-%m-%d_%H%M%S_%Z)
cp -r /var/lib/etcd /nfs/etcd/etcd-$(date +%Y-%m-%d_%H%M%S_%Z)
ETCDCTL_API=3 etcdctl --endpoints=https://[127.0.0.1]:2379 --cacert=/etc/kubernetes/pki/etcd/ca.crt --cert=/etc/kubernetes/pki/etcd/healthcheck-client.crt --key=/etc/kubernetes/pki/etcd/healthcheck-client.key snapshot save /nfs/etcd_db/etcd_$(date +%Y-%m-%d_%H%M%S_%Z).db
#crontab –e
0 0 * * * /home/etcd.sh
# chmod u+x /home/etcd.sh
#systemctl restart crond
2、恢復備份
a、etcd損壞
修改/etc/kubernetes/manifests/kube-apiserver.yaml& etcd.yaml鏡像版本使其不可用
Systemctl stop docker kubelet.service
rm –rf /var/lib/etcd
scp etcd-*.db 10.194.28.104:/root 複製etcd備份到master節點
ETCDCTL_API=3 etcdctl snapshot restore /backup/etcd-snapshot-2020-05-05_14\:21\:49_CST.db --data-dir=/var/lib/etcd
恢復/etc/kubernetes/manifests/kube-apiserver.yaml& etcd.yaml鏡像版本
systemctl start docker
systemctl start kubelet.service
etcd kube-apiserver pod 等啟動正常。
b、master節點損壞
修改hosts文件,host文件名,ip,安裝docker&kubelet kubeadm kubectl…
(使其恢復至初始化之前)
systemctl start docker
systemctl stop kubelet.service
ETCDCTL_API=3 etcdctl snapshot restore caserver.db --data-dir=/var/lib/etcd
分別再還原備份的三個目錄
scp –r .kube 10.194.28.104:/root
scp –r kubelet 10.194.28.104:/var/lib/
scp -r kubernetes/ 10.194.28.104:/etc/
systemctl start kubelet.service
如主節點網絡異常,刪除master節點的flannel和kube-proxy,自動重建成功。