1.总结etcd的集群选举
etcd 采用 raft 算法进行角色的选举。
节点一般有三种角色状态,集群中每个节点只能处于leader,follower,candidate三种状态中的一种
Follower: 追随者
Candidate: 候选节点 短暂存在
Leader: 主节点
Term id最开始是一个整数默认值0,节点启动后基于termid进行相互投票,每当一个节点成为一个新的leader时,就会进入新的term,每个节点就会在termid上加1。
选举流程
首次选举:
1.1.各etcd节点启动后默认变成follower角色,默认termid为0,如果发现集群中没有leader,则会变成candidate角色并进行选举。
1.2.candidate向其他候选节点发送投票信息,默认投票给自己。
1.3 各候选节点相互收到另外的投票信息,然后对比日志是否比自己的更新,如果比自己的更新,则将自己的选票投给新的目的候选人,并回复一个包含自己最新日志信息的相应消息,如果C的日志更新,那么将会得到A,B,C的投票,则C全票当选,如果B挂了,得到A,C的投票,则C全票当选。
1.4 C向其他节点发送自己leader的心跳信息,以维护自己的身份,(heartbeat-intercal,默认100毫秒)
1.5 其他节点将角色切换为follower并向leader同步数据。
1.6 如果选举超时,则重新选举,如果选出来两个leader,则超过集群总数半数的生效。
后期选举:
1.7当一个follower节点在规定时间内未收到leader的信息时,它将转换为candidate状态,向其他节点发送投票请求(自己的termid和日志更新记录),并等待其他节点的响应,如果该candidate的 日志记录最新,则会获得多数投票,它会成为新的leader。
1.8 新的leader将自己的termid加1并通告其他节点。
1.9 如果旧的leader 恢复了,发现已经有新的leader ,则加入到已有的leader中并将自己的termid更新为和leader一致,在同一个任期内所有节点的termid是一致的。
2.掌握基于etcdctl增删改查etcd中的数据
查看所有的
以路径方式显示所有key信息
# etcdctl get / --prefix --keys-only
显示namespace的信息
# etcdctl get / --prefix --keys-only |grep namespace
显示出来,否则是乱码状态
上传auger 工具,chmod +x auger
# mv auger /usr/local/bin/
增加数据,改动数据是直接覆盖即可
删除数据
watch 机制
数据的备份
采用 wal 的机制,在执行真正的写操作前先写一个日志,预写日志。
wal 存放预写日志,最大的作用是记录了整个数据变化的全部过程。在etcd中,所有数据的修改在提交前,都要写入wal中。
单节点备份
root@etcd01:~# etcdctl snapshot save /tmp/etcd_`date +%F`.db
{"level":"info","ts":"2023-05-15T19:31:57.294+0800","caller":"snapshot/v3_snapshot.go:65","msg":"created temporary db file","path":"/tmp/etcd_2023-05-15.db.part"}
{"level":"info","ts":"2023-05-15T19:31:57.296+0800","logger":"client","caller":"v3/maintenance.go:211","msg":"opened snapshot stream; downloading"}
{"level":"info","ts":"2023-05-15T19:31:57.296+0800","caller":"snapshot/v3_snapshot.go:73","msg":"fetching snapshot","endpoint":"127.0.0.1:2379"}
{"level":"info","ts":"2023-05-15T19:31:57.355+0800","logger":"client","caller":"v3/maintenance.go:219","msg":"completed snapshot read; closing"}
{"level":"info","ts":"2023-05-15T19:31:57.362+0800","caller":"snapshot/v3_snapshot.go:88","msg":"fetched snapshot","endpoint":"127.0.0.1:2379","size":"4.0 MB","took":"now"}
{"level":"info","ts":"2023-05-15T19:31:57.362+0800","caller":"snapshot/v3_snapshot.go:97","msg":"saved","path":"/tmp/etcd_2023-05-15.db"}
Snapshot saved at /tmp/etcd_2023-05-15.db
单节点恢复
恢复的目录必须为空
3.对基于kubeasz对etcd进行数据备份及恢复,并验证etcd集群一leader多follower
root@deploy:/etc/kubeasz# ezctl backup k8s-01
ansible-playbook -i clusters/k8s-01/hosts -e @clusters/k8s-01/config.yml playbooks/94.backup.yml
2023-05-15 19:49:04 INFO cluster:k8s-01 backup begins in 5s, press any key to abort:
PLAY [localhost] *********************************************************************************************************************************
TASK [Gathering Facts] ***************************************************************************************************************************
ok: [localhost]
TASK [set NODE_IPS of the etcd cluster] **********************************************************************************************************
ok: [localhost]
TASK [get etcd cluster status] *******************************************************************************************************************
changed: [localhost]
TASK [debug] *************************************************************************************************************************************
ok: [localhost] => {
"ETCD_CLUSTER_STATUS": {
"changed": true,
"cmd": "for ip in 10.0.6.4 10.0.6.5 10.0.6.6 ;do ETCDCTL_API=3 /etc/kubeasz/bin/etcdctl --endpoints=https://\"$ip\":2379 --cacert=/etc/kubeasz/clusters/k8s-01/ssl/ca.pem --cert=/etc/kubeasz/clusters/k8s-01/ssl/etcd.pem --key=/etc/kubeasz/clusters/k8s-01/ssl/etcd-key.pem endpoint health; done",
"delta": "0:00:00.329028",
"end": "2023-05-15 19:49:11.767497",
"failed": false,
"rc": 0,
"start": "2023-05-15 19:49:11.438469",
"stderr": "",
"stderr_lines": [],
"stdout": "https://10.0.6.4:2379 is healthy: successfully committed proposal: took = 27.625627ms\nhttps://10.0.6.5:2379 is healthy: successfully committed proposal: took = 39.04102ms\nhttps://10.0.6.6:2379 is healthy: successfully committed proposal: took = 22.754143ms",
"stdout_lines": [
"https://10.0.6.4:2379 is healthy: successfully committed proposal: took = 27.625627ms",
"https://10.0.6.5:2379 is healthy: successfully committed proposal: took = 39.04102ms",
"https://10.0.6.6:2379 is healthy: successfully committed proposal: took = 22.754143ms"
]
}
}
TASK [get a running ectd node] *******************************************************************************************************************
changed: [localhost]
TASK [debug] *************************************************************************************************************************************
ok: [localhost] => {
"RUNNING_NODE.stdout": "10.0.6.4"
}
TASK [get current time] **************************************************************************************************************************
changed: [localhost]
TASK [make a backup on the etcd node] ************************************************************************************************************
changed: [localhost]
TASK [update the latest backup] ******************************************************************************************************************
changed: [localhost]
PLAY RECAP ***************************************************************************************************************************************
localhost : ok=9 changed=5 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0
root@deploy:/etc/kubeasz# ll /etc/kubeasz/clusters/k8s-01/backup/
total 7840
drwxr-xr-x 2 root root 4096 May 15 19:49 ./
drwxr-xr-x 5 root root 4096 Apr 26 16:04 ../
-rw------- 1 root root 4005920 May 15 19:49 snapshot.db
-rw------- 1 root root 4005920 May 15 19:49 snapshot_202305151949.db
如果是3.5.2的kubeasz 版本,记得替换3.5.3版本的文件
https://github.com/easzlab/kubeasz/tree/master/roles/cluster-restore/tasks
/etc/kubeasz/roles/cluster-restore/tasks/main.yml
root@deploy:/etc/kubeasz# vim roles/cluster-restore/tasks/
- name: 停止ectd 服务
service: name=etcd state=stopped
- name: 停止ectd 服务
service: name=etcd state=stopped
- name: 生成备份目录
file: name=/etcd_backup state=directory
- name: 准备指定的备份etcd 数据
copy:
src: "{{ cluster_dir }}/backup/{{ db_to_restore }}"
dest: "/etcd_backup/snapshot.db"
- name: etcd 数据恢复
shell: "cd /etcd_backup && \
ETCDCTL_API=3 {{ bin_dir }}/etcdctl snapshot restore snapshot.db \
--name etcd-{{ inventory_hostname }} \
--initial-cluster {{ ETCD_NODES }} \
--initial-cluster-token etcd-cluster-0 \
--initial-advertise-peer-urls https://{{ inventory_hostname }}:2380"
- name: 恢复数据至etcd 数据目录
shell: "cp -rf /etcd_backup/etcd-{{ inventory_hostname }}.etcd/member {{ ETCD_DATA_DIR }}/"
- name: 重启etcd 服务
service: name=etcd state=restarted
- name: 以轮询的方式等待服务同步完成
shell: "systemctl is-active etcd.service"
register: etcd_status
until: '"active" in etcd_status.stdout'
retries: 8
delay: 8
ezctl 恢复 etcd 数据
root@deploy:/etc/kubeasz# ezctl restore k8s-01
检查
root@etcd01:~# export NODE_IPS="10.0.6.4 10.0.6.5 10.0.6.6"
root@etcd01:~# for ip in ${NODE_IPS}; do
ETCDCTL_API=3 etcdctl \
--endpoints=https://${ip}:2379 \
--cacert=/etc/kubernetes/ssl/ca.pem \
--cert=/etc/kubernetes/ssl/etcd.pem \
--key=/etc/kubernetes/ssl/etcd-key.pem \
--write-out=table endpoint status; done
+-----------------------+------------------+---------+---------+-----------+------------+-----------+------------+--------------------+--------+
| ENDPOINT | ID | VERSION | DB SIZE | IS LEADER | IS LEARNER | RAFT TERM | RAFT INDEX | RAFT APPLIED INDEX | ERRORS |
+-----------------------+------------------+---------+---------+-----------+------------+-----------+------------+--------------------+--------+
| https://10.0.6.4:2379 | 39afe66105f20c66 | 3.5.5 | 4.0 MB | false | false | 2 | 1164 | 1164 | |
+-----------------------+------------------+---------+---------+-----------+------------+-----------+------------+--------------------+--------+
+-----------------------+------------------+---------+---------+-----------+------------+-----------+------------+--------------------+--------+
| ENDPOINT | ID | VERSION | DB SIZE | IS LEADER | IS LEARNER | RAFT TERM | RAFT INDEX | RAFT APPLIED INDEX | ERRORS |
+-----------------------+------------------+---------+---------+-----------+------------+-----------+------------+--------------------+--------+
| https://10.0.6.5:2379 | 653ea1f465e0984e | 3.5.5 | 4.0 MB | true | false | 2 | 1164 | 1164 | |
+-----------------------+------------------+---------+---------+-----------+------------+-----------+------------+--------------------+--------+
+-----------------------+------------------+---------+---------+-----------+------------+-----------+------------+--------------------+--------+
| ENDPOINT | ID | VERSION | DB SIZE | IS LEADER | IS LEARNER | RAFT TERM | RAFT INDEX | RAFT APPLIED INDEX | ERRORS |
+-----------------------+------------------+---------+---------+-----------+------------+-----------+------------+--------------------+--------+
| https://10.0.6.6:2379 | 76da05605422a48b | 3.5.5 | 4.0 MB | false | false | 2 | 1164 | 1164 | |
+-----------------------+------------------+---------+---------+-----------+------------+-----------+------------+--------------------+--------+
总结
etcd 数据恢复流程
当 etcd 集群宕机数量超过集群总节点数一半以上的时候,就会导致集群的宕机
恢复流程如下:
1.恢复服务器系统
2.重新部署etcd集群
3.停止kube-apiserver/controller-manager/kubelet/schedule/kube-proxy’
4.停止etcd集群
5.各个节点恢复同一备份数据
6.启动各节点并验证etcd集群
7.启动kube-apiserver/controller-manager/kubelet/kube-proxy/schedule
8.验证master状态和pod数据
4.总结pod基于coredns进行域名解析流程
root@master01:~# kubectl get svc -n kube-system
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
kube-dns ClusterIP 10.100.0.2 <none> 53/UDP,53/TCP,9153/TCP 19d
root@master01:~# kubectl get ep -n kube-system
NAME ENDPOINTS AGE
kube-dns 10.200.186.193:53,10.200.196.134:53,10.200.186.193:53 + 3 more... 19d
kubectl run centos01 --image=harbor.20180622.xyz/baseimages/centos:7.9 sleep 360000
kubectl run centos02 --image=harbor.20180622.xyz/baseimages/centos:7.9 sleep 360000
[root@centos02 /]# nslookup kubernetes.default.svc.cluster.local
Server: 10.100.0.2
Address: 10.100.0.2#53
Name: kubernetes.default.svc.cluster.local
Address: 10.100.0.1
跨namespace需要加上namespace
[root@centos02 /]# nslookup kube-dns.kube-system.svc.cluster.local
Server: 10.100.0.2
Address: 10.100.0.2#53
Name: kube-dns.kube-system.svc.cluster.local
Address: 10.100.0.2
[root@centos02 /]# cat /etc/resolv.conf
search default.svc.cluster.local svc.cluster.local cluster.local
nameserver 10.100.0.2
options ndots:5
svc.namespace.svc.sluster.local
安装时默认设置的域名
5.总结rc、rs及deployment控制器的使用
小知识:一个pod由两个容器组成的,进入容器内的命令
kubectl exec -it nginx(第一个容器) -c php(第二个容器) bash
RC 控制器
pod的名称是以ng-rc后加随机字符生成的
https://v1-26.docs.kubernetes.io/zh-cn/docs/reference/kubernetes-api/workload-resources/replication-controller-v1/
# cat 1-rc.yml
apiVersion: v1
kind: ReplicationController
metadata:
name: ng-rc
spec:
replicas: 2
selector:
app: ng-rc-80
template:
metadata:
labels:
app: ng-rc-80
spec:
containers:
- name: ng-rc-80
image: nginx
ports:
- containerPort: 80
RS控制器
https://v1-26.docs.kubernetes.io/zh-cn/docs/reference/kubernetes-api/workload-resources/replica-set-v1/
cat 2-rs.yml
#apiVersion: extensions/v1beta1
apiVersion: apps/v1
kind: ReplicaSet
metadata:
name: ng-rs
spec:
replicas: 2
selector:
matchExpressions:
- {key: app, operator: In, values: [ng-rs-80,ng-rs-81]}
template:
metadata:
labels:
app: ng-rs-80
spec:
containers:
- name: ng-rs-80
image: nginx
ports:
- containerPort: 80
Deployment控制器
cat 3-deployment.yml
#apiVersion: extensions/v1beta1
apiVersion: apps/v1
kind: Deployment
metadata:
name: nginx-deployment
spec:
replicas: 2
selector:
#app: ng-deploy-80 #rc
matchLabels: #rs or deployment
app: ng-deploy-80
# matchExpressions:
# - {key: app, operator: In, values: [ng-deploy-80,ng-rs-81]}
template:
metadata:
labels:
app: ng-deploy-80
spec:
containers:
- name: ng-deploy-80
image: nginx
ports:
- containerPort: 80
总结:都是用于控制pod伸缩数量的控制器
区别:RC 第一代 在标签选择器中只支持 = !=
RS 第二代控制器 增加支持in notin
Deployment 第三代控制器 ,除了RS控制器的功能外,还支持滚动升级,回滚等。
RC和RS 都是直接控制pod的副本数量,
deployment 是控制RS,由RS控制pod副本数量
deployment 滚动升级,如果修改了配置,会等新pod启动一个,删除一个,新pod全部启动后,旧的pod才会被完全删除。
# kubectl rollout history deployment -n default
deployment.apps/nginx-deployment
REVISION CHANGE-CAUSE
1 <none>
2 <none>
6.总结nodeport类型的service访问流程(画图说明)
几种不同的svc的port模式
ClusterIp 只在集群内部使用
NodePort 可以通过宿主机进行直接访问pod中的服务
LoadBalancer: 公有云结合SLB/NodePort/ClusterIP 实现把互联网的请求转发给运行在k8s中服务的目的
7.掌握pod挂载nfs的使用
https://kubernetes.io/docs/concepts/storage/volumes/#nfs
改在nfs报错的话,可以安装nfs-common的包
apiVersion: apps/v1
kind: Deployment
metadata:
name: nginx-deployment
spec:
replicas: 1
selector:
matchLabels:
app: ng-deploy-80
template:
metadata:
labels:
app: ng-deploy-80
spec:
containers:
- name: ng-deploy-80
image: nginx
ports:
- containerPort: 80
volumeMounts:
- mountPath: /usr/share/nginx/html/mysite
name: my-nfs-volume
volumes:
- name: my-nfs-volume
nfs:
server: 10.0.6.7
path: /data/k8sdata
---
apiVersion: v1
kind: Service
metadata:
name: ng-deploy-80
spec:
ports:
- name: http
port: 81
targetPort: 80
nodePort: 30016
protocol: TCP
type: NodePort
selector:
app: ng-deploy-8
8.总结nfs实现静态pvc的使用
RWO 用于有状态服务,mysql主从,redis cluster
ROX 用于多个pod需要读同一份数据,比如nginx读取静态文件
RWX 用于多个pod同时挂载,同时读写,比如java
创建pv
mkdir -p /data/k8sdata/myserver/myappdata
apiVersion: v1
kind: PersistentVolume
metadata:
name: myserver-myapp-static-pv
namespace: myserver
spec:
capacity:
storage: 10Gi
accessModes:
- ReadWriteMany
nfs:
path: /data/k8sdata/myserver/myappdata
server: 10.0.6.7
创建pvc
kubectl create namespace myserver
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: myserver-myapp-static-pvc
namespace: myserver
spec:
volumeName: myserver-myapp-static-pv
accessModes:
- ReadWriteMany
resources:
requests:
storage: 10Gi
创建控制器和svc
apiVersion: apps/v1
metadata:
labels:
app: myserver-myapp
name: myserver-myapp-deployment-name
namespace: myserver
spec:
replicas: 1
selector:
matchLabels:
app: myserver-myapp-frontend
template:
metadata:
labels:
app: myserver-myapp-frontend
spec:
containers:
- name: myserver-myapp-container
image: nginx:1.18
#imagePullPolicy: Always
volumeMounts:
- mountPath: "/usr/share/nginx/html/statics"
name: statics-datadir
volumes:
- name: statics-datadir
persistentVolumeClaim:
claimName: myserver-myapp-static-pvc
---
kind: Service
apiVersion: v1
metadata:
labels:
app: myserver-myapp-service
name: myserver-myapp-service-name
namespace: myserver
spec:
type: NodePort
ports:
- name: http
port: 80
targetPort: 80
nodePort: 30016
selector:
app: myserver-myapp-frontend
9.总结nfs实现动态pvc的使用
1.创建账户
cat 1-rbac.yaml
apiVersion: v1
kind: Namespace
metadata:
name: nfs
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: nfs-client-provisioner
# replace with namespace where provisioner is deployed
namespace: nfs
---
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: nfs-client-provisioner-runner
rules:
- apiGroups: [""]
resources: ["nodes"]
verbs: ["get", "list", "watch"]
- apiGroups: [""]
resources: ["persistentvolumes"]
verbs: ["get", "list", "watch", "create", "delete"]
- apiGroups: [""]
resources: ["persistentvolumeclaims"]
verbs: ["get", "list", "watch", "update"]
- apiGroups: ["storage.k8s.io"]
resources: ["storageclasses"]
verbs: ["get", "list", "watch"]
- apiGroups: [""]
resources: ["events"]
verbs: ["create", "update", "patch"]
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: run-nfs-client-provisioner
subjects:
- kind: ServiceAccount
name: nfs-client-provisioner
# replace with namespace where provisioner is deployed
namespace: nfs
roleRef:
kind: ClusterRole
name: nfs-client-provisioner-runner
apiGroup: rbac.authorization.k8s.io
---
kind: Role
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: leader-locking-nfs-client-provisioner
# replace with namespace where provisioner is deployed
namespace: nfs
rules:
- apiGroups: [""]
resources: ["endpoints"]
verbs: ["get", "list", "watch", "create", "update", "patch"]
---
kind: RoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: leader-locking-nfs-client-provisioner
# replace with namespace where provisioner is deployed
namespace: nfs
subjects:
- kind: ServiceAccount
name: nfs-client-provisioner
# replace with namespace where provisioner is deployed
namespace: nfs
roleRef:
kind: Role
name: leader-locking-nfs-client-provisioner
apiGroup: rbac.authorization.k8s.io
2.创建storageclass
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: managed-nfs-storage
provisioner: k8s-sigs.io/nfs-subdir-external-provisioner # or choose another name, must match deployment's env PROVISIONER_NAME'
reclaimPolicy: Retain #PV的删除策略,默认为delete,删除PV后立即删除NFS server的数据
mountOptions:
#- vers=4.1 #containerd有部分参数异常
#- noresvport #告知NFS客户端在重新建立网络连接时,使用新的传输控制协议源端口
- noatime #访问文件时不更新文件inode中的时间戳,高并发环境可提高性能
parameters:
#mountOptions: "vers=4.1,noresvport,noatime"
archiveOnDelete: "true" #删除pod时保留pod数据,默认为false时为不保留数据
3.创建nfs provisioner
mkdir -p /data/volumes
/data/volumes 10.0.6.0/20(rw,sync,no_root_squash,no_subtree_check)
apiVersion: apps/v1
kind: Deployment
metadata:
name: nfs-client-provisioner
labels:
app: nfs-client-provisioner
# replace with namespace where provisioner is deployed
namespace: nfs
spec:
replicas: 1
strategy: #部署策略
type: Recreate
selector:
matchLabels:
app: nfs-client-provisioner
template:
metadata:
labels:
app: nfs-client-provisioner
spec:
serviceAccountName: nfs-client-provisioner
containers:
- name: nfs-client-provisioner
#image: k8s.gcr.io/sig-storage/nfs-subdir-external-provisioner:v4.0.2
image: harbor.20180622.xyz/baseimages/nfs-subdir-external-provisioner:v4.0.2
volumeMounts:
- name: nfs-client-root
mountPath: /persistentvolumes
env:
- name: PROVISIONER_NAME
value: k8s-sigs.io/nfs-subdir-external-provisioner
- name: NFS_SERVER
value: 10.0.6.7
- name: NFS_PATH
value: /data/volumes
volumes:
- name: nfs-client-root
nfs:
server: 10.0.6.7
path: /data/volumes
4.创建pvc
kind: PersistentVolumeClaim
apiVersion: v1
metadata:
name: myserver-myapp-dynamic-pvc
namespace: myserver
spec:
storageClassName: managed-nfs-storage #调用的storageclass 名称
accessModes:
- ReadWriteMany #访问权限
resources:
requests:
storage: 500Mi #空间大小
5.创建web服务
kind: Deployment
#apiVersion: extensions/v1beta1
apiVersion: apps/v1
metadata:
labels:
app: myserver-myapp
name: myserver-myapp-deployment-name
namespace: myserver
spec:
replicas: 3
selector:
matchLabels:
app: myserver-myapp-frontend
template:
metadata:
labels:
app: myserver-myapp-frontend
spec:
containers:
- name: myserver-myapp-container
image: nginx:1.20.0
#imagePullPolicy: Always
volumeMounts:
- mountPath: "/usr/share/nginx/html/statics"
name: statics-datadir
volumes:
- name: statics-datadir
persistentVolumeClaim:
claimName: myserver-myapp-dynamic-pvc
---
kind: Service
apiVersion: v1
metadata:
labels:
app: myserver-myapp-service
name: myserver-myapp-service-name
namespace: myserver
spec:
type: NodePort
ports:
- name: http
port: 80
targetPort: 80
nodePort: 30016
selector:
app: myserver-myapp-frontend