etcd cluster 备份恢复实践

最新推荐文章于 2024-05-28 19:13:34 发布

hchen-gogogo

最新推荐文章于 2024-05-28 19:13:34 发布

阅读量456

点赞数 2

文章标签： kubernetes docker etcd backup

本文链接：https://blog.csdn.net/chenhongloves/article/details/120331441

版权

数据在手，天下我有。

。。。不善言语，不知道写点儿什么好，那就来点儿操作记录吧。

备份

创建 etcd cluster

创建 3 个 member 的 etcd cluster

如下 yaml 文件创建的 etcd 集群


apiVersion: v1
kind: Service
metadata:
  name: etcd-headless
  labels:
    component: etcd-headless
spec:
  clusterIP: None
  ports:
  - port: 2379
    name: client
  - port: 2380
    name: peer
  selector:
    component: etcd
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
  name: etcd
  labels:
    component: etcd
spec:
  selector:
    matchLabels:
      component: etcd
  serviceName: etcd-headless
  replicas: 3
  template:
    metadata:
      labels:
        component: etcd
    spec:
      affinity:
        podAntiAffinity:
          requiredDuringSchedulingIgnoredDuringExecution:
          - labelSelector:
              matchExpressions:
              - key: component
                operator: In
                values: ["etcd"]
            topologyKey: "kubernetes.io/hostname"
      containers:
      - name: etcd
        securityContext:
          privileged: true
          capabilities:
            add:
            - IPC_LOCK
        image: quay.io/coreos/etcd:v3.5.0
        imagePullPolicy: IfNotPresent
        env:
        - name: NAMESPACE
          valueFrom:
            fieldRef:
              fieldPath: metadata.namespace
        - name: POD_NAME
          valueFrom:
            fieldRef:
              fieldPath: metadata.name
        - name: NETWORK_HOST
          value: "0.0.0.0"
        - name: ETCD_CLUSTER
          value: "etcd-0=http://etcd-0.etcd-headless:2380,etcd-1=http://etcd-1.etcd-headless:2380,etcd-2=http://etcd-2.etcd-headless:2380"
        command: ["sh", "-c", "etcd -name $POD_NAME -data-dir /var/lib/etcd -advertise-client-urls http://$POD_NAME.etcd-headless:2379 -listen-client-urls http://0.0.0.0:2379 -listen-peer-urls http://0.0.0.0:2380 -initial-advertise-peer-urls http://$POD_NAME.etcd-headless:2380 -initial-cluster-token etcd-cluster -initial-cluster $ETCD_CLUSTER -initial-cluster-state new"]
        resources:
          requests:
            memory: "2048Mi"
            cpu: "1000m"
        ports:
        - containerPort: 2379
          name: service-port
        - containerPort: 2380
          name: peer-port
        livenessProbe:
          tcpSocket:
            port: service-port
          initialDelaySeconds: 20
          periodSeconds: 10
        volumeMounts:
        - name: data
          mountPath: /var/lib/etcd
  volumeClaimTemplates:
  - metadata:
      name: data
    spec:
      storageClassName: rook-cephfs
      accessModes: [ "ReadWriteOnce" ]
      resources:
        requests:
          storage: 32Gi

root@node1:/# kubectl -n restore apply -f etcd-cluster-sts.yaml
service/etcd-headless created
statefulset.apps/etcd created
root@node1:/# kubectl -n restore get pods
NAME     READY   STATUS              RESTARTS   AGE
etcd-0   1/1     Running             0          15s
etcd-1   0/1     ContainerCreating   0          10s

检查集群状态

root@node1:/# kubectl -n restore get pods
NAME     READY   STATUS    RESTARTS   AGE
etcd-0   1/1     Running   0          54s
etcd-1   1/1     Running   0          49s
etcd-2   1/1     Running   0          38s
root@node1:/# kubectl -n restore exec -it etcd-0  -- etcdctl member list -w table
+------------------+---------+--------+----------------------------------+----------------------------------+------------+
|        ID        | STATUS  |  NAME  |            PEER ADDRS            |           CLIENT ADDRS           | IS LEARNER |
+------------------+---------+--------+----------------------------------+----------------------------------+------------+
| 3f54ac025181a433 | started | etcd-1 | http://etcd-1.etcd-headless:2380 | http://etcd-1.etcd-headless:2379 |      false |
| 7c5422fe4922f16f | started | etcd-2 | http://etcd-2.etcd-headless:2380 | http://etcd-2.etcd-headless:2379 |      false |
| e4ad7553eba80d25 | started | etcd-0 | http://etcd-0.etcd-headless:2380 | http://etcd-0.etcd-headless:2379 |      false |
+------------------+---------+--------+----------------------------------+----------------------------------+------------+
root@node1:/# kubectl -n restore exec -it etcd-0  -- etcdctl --endpoints="http://etcd-1.etcd-headless:2379,http://etcd-2.etcd-headless:2379,http://etcd-0.etcd-headless:2379" endpoint health -w table
+----------------------------------+--------+------------+-------+
|             ENDPOINT             | HEALTH |    TOOK    | ERROR |
+----------------------------------+--------+------------+-------+
| http://etcd-0.etcd-headless:2379 |   true | 4.038685ms |       |
| http://etcd-1.etcd-headless:2379 |   true | 4.582809ms |       |
| http://etcd-2.etcd-headless:2379 |   true | 4.543312ms |       |
+----------------------------------+--------+------------+-------+
rroot@node1:/# kubectl -n restore exec -it etcd-0  -- etcdctl --endpoints="http://etcd-1.etcd-headless:2379,http://etcd-2.etcd-headless:2379,http://etcd-0.etcd-headless:2379" endpoint status -w table
+----------------------------------+------------------+---------+---------+-----------+------------+-----------+------------+--------------------+--------+
|             ENDPOINT             |        ID        | VERSION | DB SIZE | IS LEADER | IS LEARNER | RAFT TERM | RAFT INDEX | RAFT APPLIED INDEX | ERRORS |
+----------------------------------+------------------+---------+---------+-----------+------------+-----------+------------+--------------------+--------+
| http://etcd-1.etcd-headless:2379 | 3f54ac025181a433 |   3.5.0 |   25 kB |     false |      false |         2 |         11 |                 11 |        |
| http://etcd-2.etcd-headless:2379 | 7c5422fe4922f16f |   3.5.0 |   20 kB |     false |      false |         2 |         11 |                 11 |        |
| http://etcd-0.etcd-headless:2379 | e4ad7553eba80d25 |   3.5.0 |   20 kB |      true |      false |         2 |         11 |                 11 |        |
+----------------------------------+------------------+---------+---------+-----------+------------+-----------+------------+--------------------+--------+

来些伪数据

root@node1:/# kubectl -n restore exec -it etcd-0 -- etcdctl put foo bar
OK
root@node1:/# kubectl -n restore exec -it etcd-0 -- etcdctl put foo1 bar1
OK
root@node1:/# kubectl -n restore exec -it etcd-0 -- etcdctl put foo2 bar2
OK
root@node1:/# kubectl -n restore exec -it etcd-0 -- etcdctl put foo3 bar3
OK
root@node1:/# kubectl -n restore exec -it etcd-0 -- etcdctl put foo4 bar4
OK
root@node1:/# kubectl -n restore exec -it etcd-0 -- etcdctl put foo5 bar5
OK
root@node1:/# kubectl -n restore exec -it etcd-0 -- etcdctl get --prefix foo
foo
bar
foo1
bar1
foo2
bar2
foo3
bar3
foo4
bar4
foo5
bar5

备份

使用 etcdctl snapshot 命令备份并将备份的文件 copy 出来

root@node1:/# kubectl -n restore exec -it etcd-0  -- etcdctl --endpoints=http://127.0.0.1:2379  snapshot save /tmp/etcd-snapshot.db
{"level":"info","ts":1631784628.0328338,"caller":"snapshot/v3_snapshot.go:68","msg":"created temporary db file","path":"/tmp/etcd-snapshot.db.part"}
{"level":"info","ts":1631784628.0337093,"logger":"client","caller":"v3/maintenance.go:211","msg":"opened snapshot stream; downloading"}
{"level":"info","ts":1631784628.0337427,"caller":"snapshot/v3_snapshot.go:76","msg":"fetching snapshot","endpoint":"http://127.0.0.1:2379"}
{"level":"info","ts":1631784628.0348232,"logger":"client","caller":"v3/maintenance.go:219","msg":"completed snapshot read; closing"}
{"level":"info","ts":1631784628.0354915,"caller":"snapshot/v3_snapshot.go:91","msg":"fetched snapshot","endpoint":"http://127.0.0.1:2379","size":"20 kB","took":"now"}
{"level":"info","ts":1631784628.0355432,"caller":"snapshot/v3_snapshot.go:100","msg":"saved","path":"/tmp/etcd-snapshot.db"}
Snapshot saved at /tmp/etcd-snapshot.db
root@node1:/# kubectl -n restore exec -it etcd-0  -- etcdctl --endpoints=http://127.0.0.1:2379  snapshot status /tmp/etcd-snapshot.db -w table
Deprecated: Use `etcdutl snapshot status` instead.

+----------+----------+------------+------------+
|   HASH   | REVISION | TOTAL KEYS | TOTAL SIZE |
+----------+----------+------------+------------+
| 4b08d5e1 |        7 |         14 |      20 kB |
+----------+----------+------------+------------+
root@node1:/# kubectl -n restore cp etcd-0:/tmp/etcd-snapshot.db etcd-snapshot.db
tar: Removing leading `/' from member names
root@node1:/# ls -l etcd-snapshot.db
-rw-r--r-- 1 root root 20512 Sep 16 16:51 etcd-snapshot.db

摧毁集群

哈哈，开始干坏事情咯。按照自己喜欢的姿势去破环吧。记得把 pvc 一起删掉哟。

root@node1:/# kubectl -n restore delete -f etcd-cluster-sts.yaml
service "etcd-headless" deleted
statefulset.apps "etcd" deleted
root@node1:/# kubectl -n restore get pods
NAME     READY   STATUS        RESTARTS   AGE
etcd-0   1/1     Terminating   0          5m50s
etcd-1   1/1     Terminating   0          5m45s
etcd-2   1/1     Terminating   0          5m34s
root@node1:/# kubectl -n restore get pods
No resources found in restore namespace.
root@node1:/# kubectl -n restore get pvc
NAME          STATUS   VOLUME                                     CAPACITY   ACCESS MODES   STORAGECLASS   AGE
data-etcd-0   Bound    pvc-4890a3ff-8794-4181-bbb5-83b433a64ef2   32Gi       RWO            rook-cephfs    6m39s
data-etcd-1   Bound    pvc-82571561-688f-43d6-85ca-49ea6c7068ad   32Gi       RWO            rook-cephfs    6m34s
data-etcd-2   Bound    pvc-86981a8e-db4e-4b63-8d8a-850e98a19aa9   32Gi       RWO            rook-cephfs    6m23s
root@node1:/# kubectl -n restore delete pvc --all
persistentvolumeclaim "data-etcd-0" deleted
persistentvolumeclaim "data-etcd-1" deleted
persistentvolumeclaim "data-etcd-2" deleted
root@node1:/# kubectl -n restore get pvc
No resources found in restore namespace.

恢复

新建集群

通过上面的方式新建，全新集群，没有数据。

root@node1:/# kubectl -n restore apply -f etcd-cluster-sts.yaml
service/etcd-headless created
statefulset.apps/etcd created
...
...
...
root@node1:/# kubectl -n restore exec -it etcd-0 -- etcdctl get --prefix foo
root@node1:/#

停止集群

把 replicas 改为 0 即可

root@node1:/# kubectl -n restore patch statefulsets.apps etcd -p  '{"spec":{"replicas":0}}'
statefulset.apps/etcd patched
...
...
...
root@node1:/# kubectl -n restore get pods
No resources found in restore namespace.
root@node1:/# kubectl -n restore get pvc
NAME          STATUS   VOLUME                                     CAPACITY   ACCESS MODES   STORAGECLASS   AGE
data-etcd-0   Bound    pvc-6cd53023-8fcf-4602-b7a3-d69239f78016   32Gi       RWO            rook-cephfs    3m24s
data-etcd-1   Bound    pvc-86853c83-88a6-465d-9cfd-37877a2ab2ac   32Gi       RWO            rook-cephfs    3m4s
data-etcd-2   Bound    pvc-519249f9-027c-4179-9812-9b6fce9ff77a   32Gi       RWO            rook-cephfs    2m53s

创建恢复数据的pod

不要忘记把上面的 pvc 挂载进去。

apiVersion: v1
kind: Pod
metadata:
  name: restore-pod
  labels:
    app: restore-etcd
spec:
  containers:
  - name: restore
    image: quay.io/coreos/etcd:v3.5.0
    command: ['sh', '-c', 'sleep inf']
    volumeMounts:
    - mountPath: "/var/lib/etcd0"
      name: etcd-0
    - mountPath: "/var/lib/etcd1"
      name: etcd-1
    - mountPath: "/var/lib/etcd2"
      name: etcd-2
  volumes:
    - name: etcd-0
      persistentVolumeClaim:
        claimName: data-etcd-0
    - name: etcd-1
      persistentVolumeClaim:
        claimName: data-etcd-1
    - name: etcd-2
      persistentVolumeClaim:
        claimName: data-etcd-2

root@node1:/# kubectl -n restore apply -f restore.yaml
pod/restore-pod created
root@node1:/# kubectl -n restore get pods
NAME          READY   STATUS    RESTARTS   AGE
restore-pod   1/1     Running   0          39s

开始恢复

root@node1:/# kubectl -n restore cp etcd-snapshot.db restore-pod:/tmp/
root@node1:/# kubectl -n restore exec -it restore-pod -- etcdctl snapshot status /tmp/etcd-snapshot.db -w table
Deprecated: Use `etcdutl snapshot status` instead.

+----------+----------+------------+------------+
|   HASH   | REVISION | TOTAL KEYS | TOTAL SIZE |
+----------+----------+------------+------------+
| 4b08d5e1 |        7 |         14 |      20 kB |
+----------+----------+------------+------------+
root@node1:/# kubectl -n restore exec -it restore-pod -- sh
# echo 127.0.0.1 etcd-0.etcd-headless >>/etc/hosts
# echo 127.0.0.1 etcd-1.etcd-headless >>/etc/hosts
# echo 127.0.0.1 etcd-2.etcd-headless >>/etc/hosts
# cat /etc/hosts
# Kubernetes-managed hosts file.
...
127.0.0.1 etcd-0.etcd-headless
127.0.0.1 etcd-1.etcd-headless
127.0.0.1 etcd-2.etcd-headless
### 清空 pvc 中的数据
# rm -rf /var/lib/etcd0/* /var/lib/etcd1/* /var/lib/etcd2/*
# etcdctl snapshot restore /tmp/etcd-snapshot.db \
  --name etcd-0 \
  --initial-cluster etcd-0=http://etcd-0.etcd-headless:2380,etcd-1=http://etcd-1.etcd-headless:2380,etcd-2=http://etcd-2.etcd-headless:2380 \
  --initial-advertise-peer-urls http://etcd-0.etcd-headless:2380
# etcdctl snapshot restore /tmp/etcd-snapshot.db \
  --name etcd-1 \
  --initial-cluster etcd-0=http://etcd-0.etcd-headless:2380,etcd-1=http://etcd-1.etcd-headless:2380,etcd-2=http://etcd-2.etcd-headless:2380 \
  --initial-advertise-peer-urls http://etcd-1.etcd-headless:2380
# etcdctl snapshot restore /tmp/etcd-snapshot.db \
  --name etcd-2 \
  --initial-cluster etcd-0=http://etcd-0.etcd-headless:2380,etcd-1=http://etcd-1.etcd-headless:2380,etcd-2=http://etcd-2.etcd-headless:2380 \
  --initial-advertise-peer-urls http://etcd-2.etcd-headless:2380
# mv etcd-0.etcd/member /var/lib/etcd0
# mv etcd-1.etcd/member /var/lib/etcd1
# mv etcd-2.etcd/member /var/lib/etcd2
# exit
root@node1:/# kubectl -n restore delete -f restore.yaml
pod "restore-pod" deleted

启动集群


root@node1:/# kubectl -n restore patch statefulsets.apps etcd -p  '{"spec":{"replicas":3}}'
statefulset.apps/etcd patched
root@node1:/# kubectl -n restore get pods
NAME     READY   STATUS              RESTARTS   AGE
etcd-0   1/1     Running             0          12s
etcd-1   0/1     ContainerCreating   0          4s
root@node1:/# kubectl -n restore get pods
NAME     READY   STATUS    RESTARTS   AGE
etcd-0   1/1     Running   0          32s
etcd-1   1/1     Running   0          24s
etcd-2   1/1     Running   0          7s
root@node1:/# kubectl -n restore exec -it etcd-0 -- etcdctl member list -w table
+------------------+---------+--------+----------------------------------+----------------------------------+------------+
|        ID        | STATUS  |  NAME  |            PEER ADDRS            |           CLIENT ADDRS           | IS LEARNER |
+------------------+---------+--------+----------------------------------+----------------------------------+------------+
| 3f54ac025181a433 | started | etcd-1 | http://etcd-1.etcd-headless:2380 | http://etcd-1.etcd-headless:2379 |      false |
| 7c5422fe4922f16f | started | etcd-2 | http://etcd-2.etcd-headless:2380 | http://etcd-2.etcd-headless:2379 |      false |
| e4ad7553eba80d25 | started | etcd-0 | http://etcd-0.etcd-headless:2380 | http://etcd-0.etcd-headless:2379 |      false |
+------------------+---------+--------+----------------------------------+----------------------------------+------------+
root@node1:/# kubectl -n restore exec -it etcd-0 -- etcdctl --endpoints="http://etcd-1.etcd-headless:2379,http://etcd-2.etcd-headless:2379,http://etcd-0.etcd-headless:2379" endpoint status -w table
+----------------------------------+------------------+---------+---------+-----------+------------+-----------+------------+--------------------+--------+
|             ENDPOINT             |        ID        | VERSION | DB SIZE | IS LEADER | IS LEARNER | RAFT TERM | RAFT INDEX | RAFT APPLIED INDEX | ERRORS |
+----------------------------------+------------------+---------+---------+-----------+------------+-----------+------------+--------------------+--------+
| http://etcd-1.etcd-headless:2379 | 3f54ac025181a433 |   3.5.0 |   20 kB |     false |      false |         2 |          7 |                  7 |        |
| http://etcd-2.etcd-headless:2379 | 7c5422fe4922f16f |   3.5.0 |   20 kB |     false |      false |         2 |          7 |                  7 |        |
| http://etcd-0.etcd-headless:2379 | e4ad7553eba80d25 |   3.5.0 |   20 kB |      true |      false |         2 |          7 |                  7 |        |
+----------------------------------+------------------+---------+---------+-----------+------------+-----------+------------+--------------------+--------+
root@node1:/# kubectl -n restore exec -it etcd-0 -- etcdctl --endpoints="http://etcd-1.etcd-headless:2379,http://etcd-2.etcd-headless:2379,http://etcd-0.etcd-headless:2379" endpoint health -w table
+----------------------------------+--------+------------+-------+
|             ENDPOINT             | HEALTH |    TOOK    | ERROR |
+----------------------------------+--------+------------+-------+
| http://etcd-0.etcd-headless:2379 |   true | 3.659275ms |       |
| http://etcd-1.etcd-headless:2379 |   true |  3.89955ms |       |
| http://etcd-2.etcd-headless:2379 |   true | 3.797005ms |       |
+----------------------------------+--------+------------+-------+
root@node1:/# kubectl -n restore exec -it etcd-0 -- etcdctl get --prefix foo
foo
bar
foo1
bar1
foo2
bar2
foo3
bar3
foo4
bar4
foo5
bar5

hchen-gogogo

关注

2
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
etcd cluster 备份恢复实践

数据在手，天下我有。备份创建 etcd cluster不善言语。创建 3 个 member 的 etcd cluster如下 yaml 文件创建的 etcd 集群apiVersion: v1kind: Servicemetadata: name: etcd-headless labels: component: etcd-headlessspec: clusterIP: None ports: - port: 2379 name: cl...
复制链接

扫一扫