利用 Kubernetes 中的 leaderelection 实现组件高可用

https://blog.csdn.net/RA681t58CJxsgCkJ31/article/details/104386126/

利用 Kubernetes 中的 leaderelection 实现组件高可用

 

在Kubernetes中,通常kube-schduler和kube-controller-manager都是多副本进行部署的来保证高可用,而真正在工作的实例其实只有一个。

这里就利用到 leaderelection 的选主机制,保证leader是处于工作状态,并且在leader挂掉之后,从其他节点选取新的leader保证组件正常工作。今天就来看看这个包的使用以及它内部是如何实现的。

基本使用

以下是一个简单使用的例子,编译完成之后同时启动多个进程,但是只有一个进程在工作,当把leader进程kill掉之后,会重新选举出一个leader进行工作,即执行其中的 run 方法:

 
  1. /*

  2. 例子来源于client-go中的example包中,

  3. */

  4. package main

  5. import (

  6. "context"

  7. "flag"

  8. "os"

  9. "os/signal"

  10. "syscall"

  11. "time"

  12.  
  13. "github.com/google/uuid"

  14. metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

  15. clientset "k8s.io/client-go/kubernetes"

  16. "k8s.io/client-go/rest"

  17. "k8s.io/client-go/tools/clientcmd"

  18. "k8s.io/client-go/tools/leaderelection"

  19. "k8s.io/client-go/tools/leaderelection/resourcelock"

  20. "k8s.io/klog"

  21. )

  22.  
  23. func buildConfig(kubeconfig string) (*rest.Config, error) {

  24. if kubeconfig != "" {

  25. cfg, err := clientcmd.BuildConfigFromFlags("", kubeconfig)

  26. if err != nil {

  27. return nil, err

  28. }

  29. return cfg, nil

  30. }

  31.  
  32. cfg, err := rest.InClusterConfig()

  33. if err != nil {

  34. return nil, err

  35. }

  36. return cfg, nil

  37. }

  38.  
  39. func main() {

  40. klog.InitFlags(nil)

  41.  
  42. var kubeconfig string

  43. var leaseLockName string

  44. var leaseLockNamespace string

  45. var id string

  46.  
  47. flag.StringVar(&kubeconfig, "kubeconfig", "", "absolute path to the kubeconfig file")

  48. flag.StringVar(&id, "id", uuid.New().String(), "the holder identity name")

  49. flag.StringVar(&leaseLockName, "lease-lock-name", "", "the lease lock resource name")

  50. flag.StringVar(&leaseLockNamespace, "lease-lock-namespace", "", "the lease lock resource namespace")

  51. flag.Parse()

  52.  
  53. if leaseLockName == "" {

  54. klog.Fatal("unable to get lease lock resource name (missing lease-lock-name flag).")

  55. }

  56. if leaseLockNamespace == "" {

  57. klog.Fatal("unable to get lease lock resource namespace (missing lease-lock-namespace flag).")

  58. }

  59.  
  60. // leader election uses the Kubernetes API by writing to a

  61. // lock object, which can be a LeaseLock object (preferred),

  62. // a ConfigMap, or an Endpoints (deprecated) object.

  63. // Conflicting writes are detected and each client handles those actions

  64. // independently.

  65. config, err := buildConfig(kubeconfig)

  66. if err != nil {

  67. klog.Fatal(err)

  68. }

  69. client := clientset.NewForConfigOrDie(config)

  70.  
  71. run := func(ctx context.Context) {

  72. // complete your controller loop here

  73. klog.Info("Controller loop...")

  74.  
  75. select {}

  76. }

  77.  
  78. // use a Go context so we can tell the leaderelection code when we

  79. // want to step down

  80. ctx, cancel := context.WithCancel(context.Background())

  81. defer cancel()

  82.  
  83. // listen for interrupts or the Linux SIGTERM signal and cancel

  84. // our context, which the leader election code will observe and

  85. // step down

  86. ch := make(chan os.Signal, 1)

  87. signal.Notify(ch, os.Interrupt, syscall.SIGTERM)

  88. go func() {

  89. <-ch

  90. klog.Info("Received termination, signaling shutdown")

  91. cancel()

  92. }()

  93.  
  94. // we use the Lease lock type since edits to Leases are less common

  95. // and fewer objects in the cluster watch "all Leases".

  96. // 指定锁的资源对象,这里使用了Lease资源,还支持configmap,endpoint,或者multilock(即多种配合使用)

  97. lock := &resourcelock.LeaseLock{

  98. LeaseMeta: metav1.ObjectMeta{

  99. Name: leaseLockName,

  100. Namespace: leaseLockNamespace,

  101. },

  102. Client: client.CoordinationV1(),

  103. LockConfig: resourcelock.ResourceLockConfig{

  104. Identity: id,

  105. },

  106. }

  107.  
  108. // start the leader election code loop

  109. leaderelection.RunOrDie(ctx, leaderelection.LeaderElectionConfig{

  110. Lock: lock,

  111. // IMPORTANT: you MUST ensure that any code you have that

  112. // is protected by the lease must terminate **before**

  113. // you call cancel. Otherwise, you could have a background

  114. // loop still running and another process could

  115. // get elected before your background loop finished, violating

  116. // the stated goal of the lease.

  117. ReleaseOnCancel: true,

  118. LeaseDuration: 60 * time.Second,//租约时间

  119. RenewDeadline: 15 * time.Second,//更新租约的

  120. RetryPeriod: 5 * time.Second,//非leader节点重试时间

  121. Callbacks: leaderelection.LeaderCallbacks{

  122. OnStartedLeading: func(ctx context.Context) {

  123. //变为leader执行的业务代码

  124. // we're notified when we start - this is where you would

  125. // usually put your code

  126. run(ctx)

  127. },

  128. OnStoppedLeading: func() {

  129. // 进程退出

  130. // we can do cleanup here

  131. klog.Infof("leader lost: %s", id)

  132. os.Exit(0)

  133. },

  134. OnNewLeader: func(identity string) {

  135. //当产生新的leader后执行的方法

  136. // we're notified when new leader elected

  137. if identity == id {

  138. // I just got the lock

  139. return

  140. }

  141. klog.Infof("new leader elected: %s", identity)

  142. },

  143. },

  144. })

  145. }

关键启动参数说明:

 
  1. kubeconfig: 指定kubeconfig文件地址

  2. lease-lock-name:指定lock的名称

  3. lease-lock-namespace:指定lock存储的namespace

  4. id: 例子中提供的区别参数,用于区分实例

  5. logtostderr:klog提供的参数,指定log输出到控制台

  6. v: 指定日志输出级别

同时启动两个进程:
启动进程1

 
  1. go run main.go -kubeconfig=/Users/silenceper/.kube/config -logtostderr=true -lease-lock-name=example -lease-lock-namespace=default -id=1 -v=4

  2. I0215 19:56:37.049658 48045 leaderelection.go:242] attempting to acquire leader lease default/example...

  3. I0215 19:56:37.080368 48045 leaderelection.go:252] successfully acquired lease default/example

  4. I0215 19:56:37.080437 48045 main.go:87] Controller loop...

启动进程2:

 
  1. ➜ leaderelection git:(master) ✗ go run main.go -kubeconfig=/Users/silenceper/.kube/config -logtostderr=true -lease-lock-name=example -lease-lock-namespace=default -id=2 -v=4

  2. I0215 19:57:35.870051 48791 leaderelection.go:242] attempting to acquire leader lease default/example...

  3. I0215 19:57:35.894735 48791 leaderelection.go:352] lock is held by 1 and has not yet expired

  4. I0215 19:57:35.894769 48791 leaderelection.go:247] failed to acquire lease default/example

  5. I0215 19:57:35.894790 48791 main.go:151] new leader elected: 1

  6. I0215 19:57:44.532991 48791 leaderelection.go:352] lock is held by 1 and has not yet expired

  7. I0215 19:57:44.533028 48791 leaderelection.go:247] failed to acquire lease default/example

这里可以看出来id=1的进程持有锁,并且运行的程序,而id=2的进程表示无法获取到锁,在不断的进程尝试。

现在kill掉id=1进程,在等待lock释放之后(有个LeaseDuration时间),leader变为id=2的进程执行工作

 
  1. I0215 20:01:41.489300 48791 leaderelection.go:252] successfully acquired lease default/example

  2. I0215 20:01:41.489577 48791 main.go:87] Controller loop...

深入理解

基本原理其实就是利用通过Kubernetes中 configmap , endpoints 或者 lease 资源实现一个分布式锁,抢(acqure)到锁的节点成为leader,并且定期更新(renew)。其他进程也在不断的尝试进行抢占,抢占不到则继续等待下次循环。当leader节点挂掉之后,租约到期,其他节点就成为新的leader。

入口

通过 leaderelection.RunOrDie 启动,

 
  1. func RunOrDie(ctx context.Context, lec LeaderElectionConfig) {

  2. le, err := NewLeaderElector(lec)

  3. if err != nil {

  4. panic(err)

  5. }

  6. if lec.WatchDog != nil {

  7. lec.WatchDog.SetLeaderElection(le)

  8. }

  9. le.Run(ctx)

  10. }

传入参数 LeaderElectionConfig :

 
  1. type LeaderElectionConfig struct {

  2. // Lock 的类型

  3. Lock rl.Interface

  4. //持有锁的时间

  5. LeaseDuration time.Duration

  6. //在更新租约的超时时间

  7. RenewDeadline time.Duration

  8. //竞争获取锁的时间

  9. RetryPeriod time.Duration

  10. //状态变化时执行的函数,支持三种:

  11. //1、OnStartedLeading 启动是执行的业务代码

  12. //2、OnStoppedLeading leader停止执行的方法

  13. //3、OnNewLeader 当产生新的leader后执行的方法

  14. Callbacks LeaderCallbacks

  15.  
  16. //进行监控检查

  17. // WatchDog is the associated health checker

  18. // WatchDog may be null if its not needed/configured.

  19. WatchDog *HealthzAdaptor

  20. //leader退出时,是否执行release方法

  21. ReleaseOnCancel bool

  22.  
  23. // Name is the name of the resource lock for debugging

  24. Name string

  25. }

LeaderElectionConfig.lock 支持保存在以下三种资源中:
configmap 
endpoint 
lease 
包中还提供了一个 multilock ,即可以进行选择两种,当其中一种保存失败时,选择第二张
可以在interface.go中看到:

 
  1. switch lockType {

  2. case EndpointsResourceLock://保存在endpoints

  3. return endpointsLock, nil

  4. case ConfigMapsResourceLock://保存在configmaps

  5. return configmapLock, nil

  6. case LeasesResourceLock://保存在leases

  7. return leaseLock, nil

  8. case EndpointsLeasesResourceLock://优先尝试保存在endpoint失败时保存在lease

  9. return &MultiLock{

  10. Primary: endpointsLock,

  11. Secondary: leaseLock,

  12. }, nil

  13. case ConfigMapsLeasesResourceLock://优先尝试保存在configmap,失败时保存在lease

  14. return &MultiLock{

  15. Primary: configmapLock,

  16. Secondary: leaseLock,

  17. }, nil

  18. default:

  19. return nil, fmt.Errorf("Invalid lock-type %s", lockType)

  20. }

以lease资源对象为例,可以在查看到保存的内容:

 
  1. ➜ ~ kubectl get lease example -n default -o yaml

  2. apiVersion: coordination.k8s.io/v1

  3. kind: Lease

  4. metadata:

  5. creationTimestamp: "2020-02-15T11:56:37Z"

  6. name: example

  7. namespace: default

  8. resourceVersion: "210675"

  9. selfLink: /apis/coordination.k8s.io/v1/namespaces/default/leases/example

  10. uid: a3470a06-6fc3-42dc-8242-9d6cebdf5315

  11. spec:

  12. acquireTime: "2020-02-15T12:01:41.476971Z"//获得锁时间

  13. holderIdentity: "2"//持有锁进程的标识

  14. leaseDurationSeconds: 60//lease租约

  15. leaseTransitions: 1//leader更换次数

  16. renewTime: "2020-02-15T12:05:37.134655Z"//更新租约的时间

关注其spec中的字段,分别进行标注,对应结构体如下:

 
  1. type LeaderElectionRecord struct {

  2. HolderIdentity string `json:"holderIdentity"`//持有锁进程的标识,一般可以利用主机名

  3. LeaseDurationSeconds int `json:"leaseDurationSeconds"`// lock的租约

  4. AcquireTime metav1.Time `json:"acquireTime"`//持有锁的时间

  5. RenewTime metav1.Time `json:"renewTime"`//更新时间

  6. LeaderTransitions int `json:"leaderTransitions"`//leader更换的次数

  7. }

获取的锁以及更新锁

Run方法中包含了获取锁以及更新锁的入口

 
  1. // Run starts the leader election loop

  2. func (le *LeaderElector) Run(ctx context.Context) {

  3. defer func() {

  4. //进行退出执行

  5. runtime.HandleCrash()

  6. //停止时执行回调方法

  7. le.config.Callbacks.OnStoppedLeading()

  8. }()

  9. //不断的进行获得锁,如果获得锁成功则执行后面的方法,否则不断的进行重试

  10. if !le.acquire(ctx) {

  11. return // ctx signalled done

  12. }

  13. ctx, cancel := context.WithCancel(ctx)

  14. defer cancel()

  15. //获取锁成功,当前进程变为leader,执行回调函数中的业务代码

  16. go le.config.Callbacks.OnStartedLeading(ctx)

  17. //不断的循环进行进行租约的更新,保证锁一直被当前进行持有

  18. le.renew(ctx)

  19. }

le.acquirele.renew 内部都是调用了 le.tryAcquireOrRenew 函数,只是对于返回结果的处理不一样。

le.acquire 对于 le.tryAcquireOrRenew 返回成功则退出,失败则继续。

le.renew 则相反,成功则继续,失败则退出。

我们来看看 tryAcquireOrRenew 方法:

 
  1. func (le *LeaderElector) tryAcquireOrRenew() bool {

  2. now := metav1.Now()

  3. //锁资源对象内容

  4. leaderElectionRecord := rl.LeaderElectionRecord{

  5. HolderIdentity: le.config.Lock.Identity(),//唯一标识

  6. LeaseDurationSeconds: int(le.config.LeaseDuration / time.Second),

  7. RenewTime: now,

  8. AcquireTime: now,

  9. }

  10.  
  11. // 1. obtain or create the ElectionRecord

  12. // 第一步:从k8s资源中获取原有的锁

  13. oldLeaderElectionRecord, oldLeaderElectionRawRecord, err := le.config.Lock.Get()

  14. if err != nil {

  15. if !errors.IsNotFound(err) {

  16. klog.Errorf("error retrieving resource lock %v: %v", le.config.Lock.Describe(), err)

  17. return false

  18. }

  19. //资源对象不存在,进行锁资源创建

  20. if err = le.config.Lock.Create(leaderElectionRecord); err != nil {

  21. klog.Errorf("error initially creating leader election record: %v", err)

  22. return false

  23. }

  24. le.observedRecord = leaderElectionRecord

  25. le.observedTime = le.clock.Now()

  26. return true

  27. }

  28.  
  29. // 2. Record obtained, check the Identity & Time

  30. // 第二步,对比存储在k8s中的锁资源与上一次获取的锁资源是否一致

  31. if !bytes.Equal(le.observedRawRecord, oldLeaderElectionRawRecord) {

  32. le.observedRecord = *oldLeaderElectionRecord

  33. le.observedRawRecord = oldLeaderElectionRawRecord

  34. le.observedTime = le.clock.Now()

  35. }

  36. //判断持有的锁是否到期以及是否被自己持有

  37. if len(oldLeaderElectionRecord.HolderIdentity) > 0 &&

  38. le.observedTime.Add(le.config.LeaseDuration).After(now.Time) &&

  39. !le.IsLeader() {

  40. klog.V(4).Infof("lock is held by %v and has not yet expired", oldLeaderElectionRecord.HolderIdentity)

  41. return false

  42. }

  43.  
  44. // 3. We're going to try to update. The leaderElectionRecord is set to it's default

  45. // here. Let's correct it before updating.

  46. //第三步:自己现在是leader,但是分两组情况,上一次也是leader和首次变为leader

  47. if le.IsLeader() {

  48. //自己本身就是leader则不需要更新AcquireTime和LeaderTransitions

  49. leaderElectionRecord.AcquireTime = oldLeaderElectionRecord.AcquireTime

  50. leaderElectionRecord.LeaderTransitions = oldLeaderElectionRecord.LeaderTransitions

  51. } else {

  52. //首次自己变为leader则更新leader的更换次数

  53. leaderElectionRecord.LeaderTransitions = oldLeaderElectionRecord.LeaderTransitions + 1

  54. }

  55.  
  56. //更新锁资源,这里如果在 Get 和 Update 之间有变化,将会更新失败

  57. // update the lock itself

  58. if err = le.config.Lock.Update(leaderElectionRecord); err != nil {

  59. klog.Errorf("Failed to update lock: %v", err)

  60. return false

  61. }

  62.  
  63. le.observedRecord = leaderElectionRecord

  64. le.observedTime = le.clock.Now()

  65. return true

  66. }

  67.  

在这一步如果发生并发操作怎么样?

这里很重要一点就是利用到了k8s api操作的原子性:

le.config.Lock.Get() 中会获取到锁的对象,其中有一个 resourceVersion 字段用于标识一个资源对象的内部版本,每次更新操作都会更新其值。如果一个更新操作附加上了 resourceVersion 字段,那么 apiserver 就会通过验证当前 resourceVersion 的值与指定的值是否相匹配来确保在此次更新操作周期内没有其他的更新操作,从而保证了更新操作的原子性。

总结

leaderelection 主要是利用了k8s API操作的原子性实现了一个分布式锁,在不断的竞争中进行选举。

选中为leader的进行才会执行具体的业务代码,这在k8s中非常的常见,而且我们很方便的利用这个包完成组件的编写,从而实现组件的高可用,比如部署为一个多副本的Deployment,当leader的pod退出后会重新启动,可能锁就被其他pod获取继续执行。

tips:戳阅读原文可查看文中完整代码

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值