kube Controller manager - resourcequota源码阅读 informer

背景

       KCM主要是围绕informer框架进行事件的异步处理,该框架在client-go包中,主要的入口文件shared_informer.go。网上资料较少可参考下:

      https://www.jianshu.com/p/d17f70369c35

       

这个是我自己整理的KCM 和 APi server 交互及处理流程

DeltaFIFO: 用来存放从apiserver watch到的资源对象。本身是一个队列,底层是一个字符串数组和map。

Relector: 用于和api server进行通信的客户端,提供listwatch功能机制。

HandleDelta 方法名:用来处理DeltaFIFO中的数据。

SharedProcessor:结构体名称,主要作用是用来管理多个processlistener。

processlistener: 结构体名称,每种资源都有自己的回调函数名,但是集成informer框架中会被进一步封装成listener。 


因为工作需要,需要了解K8s中resourcequota资源计算的方式。所以本文的目的主要就是:

     1. 了解Kube Controller Manager的代码框架

     2.  弄清楚resource quota的计算方式。

主要流程:

  1. 启动KCM 连接api server
  2. 连接成功后第一次全量同步目标资源(resource quota).
  3. Watch Etcd资源变化。watch成功后将变化资源同步到队列中,同时开启5个worker 协程.
  4. 将资源和cache中进行同步。
  5. 将更新变化后的资源同步到api-server中。

controller-manager.go/main方法

package main

 

func main() {

   rand.Seed(time.Now().UTC().UnixNano())

   

   command := app.NewControllerManagerCommand()

 

   // TODO: once we switch everything over to Cobra commands, we can go back to calling

   // utilflag.InitFlags() (by removing its pflag.Parse() call). For now, we have to set the

   // normalize func and add the go flag set by hand.

   pflag.CommandLine.SetNormalizeFunc(utilflag.WordSepNormalizeFunc)

   pflag.CommandLine.AddGoFlagSet(goflag.CommandLine)

   // utilflag.InitFlags()

   logs.InitLogs()

   defer logs.FlushLogs()

 

   if err := command.Execute(); err != nil {

      fmt.Fprintf(os.Stderr, "%v\n", err)

      os.Exit(1)

   }

}

 

     

app..NewControllerManagerCommand()方法

 

func NewControllerManagerCommand() *cobra.Command {

   s := options.NewKubeControllerManagerOptions()

   cmd := &cobra.Command{

      Use: "kube-controller-manager",

      Long: `The Kubernetes controller manager is a daemon that embeds

the core control loops shipped with Kubernetes. In applications of robotics and

automation, a control loop is a non-terminating loop that regulates the state of

the system. In Kubernetes, a controller is a control loop that watches the shared

state of the cluster through the apiserver and makes changes attempting to move the

current state towards the desired state. Examples of controllers that ship with

Kubernetes today are the replication controller, endpoints controller, namespace

controller, and serviceaccounts controller.`,

      Run: func(cmd *cobra.Command, args []string) {

         verflag.PrintAndExitIfRequested()

         utilflag.PrintFlags(cmd.Flags())

         // 1. 解析kcm已知的controller,将controoler名称作为参数传递给后面的函数

         // 2. 新建一个 config结构体,并将option的默认配置赋值到config中。

         // 3. 新建一个KubeConfig(restclient config)

         c, err := s.Config(KnownControllers(), ControllersDisabledByDefault.List())

         if err != nil {

            fmt.Fprintf(os.Stderr, "%v\n", err)

            os.Exit(1)

         }

 

         if err := Run(c.Complete()); err != nil {

            fmt.Fprintf(os.Stderr, "%v\n", err)

            os.Exit(1)

         }

      },

   }

   s.AddFlags(cmd.Flags(), KnownControllers(), ControllersDisabledByDefault.List())

 

   return cmd

}

 

options.NewKubeControllerManagerOptions()

// k8s中命令行中的参数统一称之为options。主要还是初始化一些配置,连接端口等,这里主要secure端口443以及insecure端口10252

func NewKubeControllerManagerOptions() *KubeControllerManagerOptions {

   componentConfig := cmoptions.NewDefaultControllerManagerComponentConfig(ports.InsecureKubeControllerManagerPort)

   s := KubeControllerManagerOptions{

      // The common/default are kept in 'cmd/kube-controller-manager/app/options/util.go'.

      // Please make common changes there but put anything kube-controller specific here.

      Generic: cmoptions.NewGenericControllerManagerOptions(componentConfig),

   }

   s.Generic.SecureServing.ServerCert.CertDirectory = "/var/run/kubernetes"

   s.Generic.SecureServing.ServerCert.PairName = "kube-controller-manager"

   gcIgnoredResources := make([]componentconfig.GroupResource, 0, len(garbagecollector.DefaultIgnoredResources()))

   for r := range garbagecollector.DefaultIgnoredResources() {

      gcIgnoredResources = append(gcIgnoredResources, componentconfig.GroupResource{Group: r.Group, Resource: r.Resource})

   }

   s.Generic.ComponentConfig.GCIgnoredResources = gcIgnoredResources

   // 直接将当前的kcm置成Leader。也可以看出一个集群内部只能存在一个KCM。

   s.Generic.ComponentConfig.LeaderElection.LeaderElect = true

   return &s

}

 

controllerManager.go/Run()方法:

func Run(c *config.CompletedConfig) error {

   // To help debugging, immediately log version

   glog.Infof("Version: %+v", version.Get())

 

   // 新建的cfgz map将传进来的c保存起来。

   if cfgz, err := configz.New("componentconfig"); err == nil {

      cfgz.Set(c.Generic.ComponentConfig)

   } else {

      glog.Errorf("unable to register configz: %c", err)

   }

 

   // Start the controller manager HTTP Server using http2. tls1.2 协议

   stopCh := make(chan struct{})

   if c.Generic.SecureServing != nil {

      if err := genericcontrollerconfig.Serve(&c.Generic, c.Generic.SecureServing.Serve, stopCh); err != nil {

         return err

      }

   }

   if c.Generic.InsecureServing != nil {

      if err := genericcontrollerconfig.Serve(&c.Generic, c.Generic.InsecureServing.Serve, stopCh); err != nil {

         return err

      }

   }

   

   // 重点分析此run方法。

run := func(stop <-chan struct{}) {

      rootClientBuilder := controller.SimpleControllerClientBuilder{

         ClientConfig: c.Generic.Kubeconfig,

      }

      var clientBuilder controller.ControllerClientBuilder

      if c.Generic.ComponentConfig.UseServiceAccountCredentials {

         if len(c.Generic.ComponentConfig.ServiceAccountKeyFile) == 0 {

            // It'c possible another controller process is creating the tokens for us.

            // If one isn't, we'll timeout and exit when our client builder is unable to create the tokens.

            glog.Warningf("--use-service-account-credentials was specified without providing a --service-account-private-key-file")

         }

         clientBuilder = controller.SAControllerClientBuilder{

            ClientConfig:         restclient.AnonymousClientConfig(c.Generic.Kubeconfig),

            CoreClient:           c.Generic.Client.CoreV1(),

            AuthenticationClient: c.Generic.Client.AuthenticationV1(),

            Namespace:            "kube-system",

         }

      } else {

         clientBuilder = rootClientBuilder

      }

      ctx, err := CreateControllerContext(c, rootClientBuilder, clientBuilder, stop)

      if err != nil {

         glog.Fatalf("error building controller context: %v", err)

      }

      saTokenControllerInitFunc := serviceAccountTokenControllerStarter{rootClientBuilder: rootClientBuilder}.startServiceAccountTokenController

 

      // 核心方法。运行各个controller

      if err := StartControllers(ctx, saTokenControllerInitFunc, NewControllerInitializers(ctx.LoopMode)); err != nil {

         glog.Fatalf("error starting controllers: %v", err)

      }

      ctx.InformerFactory.Start(ctx.Stop)

      close(ctx.InformersStarted)

      select {}

   }

 

   // 运行Run方法。

   if !c.Generic.ComponentConfig.LeaderElection.LeaderElect {

      run(wait.NeverStop)

      panic("unreachable")

   }

   id, err := os.Hostname()

   if err != nil {

      return err

   }

 

   // add a uniquifier so that two processes on the same host don't accidentally both become active

   id = id + "_" + string(uuid.NewUUID())

   rl, err := resourcelock.New(c.Generic.ComponentConfig.LeaderElection.ResourceLock,

      "kube-system",

      "kube-controller-manager",

      c.Generic.LeaderElectionClient.CoreV1(),

      resourcelock.ResourceLockConfig{

         Identity:      id,

         EventRecorder: c.Generic.EventRecorder,

      })

   if err != nil {

      glog.Fatalf("error creating lock: %v", err)

   }

 

   leaderelection.RunOrDie(leaderelection.LeaderElectionConfig{

      Lock:          rl,

      LeaseDuration: c.Generic.ComponentConfig.LeaderElection.LeaseDuration.Duration,

      RenewDeadline: c.Generic.ComponentConfig.LeaderElection.RenewDeadline.Duration,

      RetryPeriod:   c.Generic.ComponentConfig.LeaderElection.RetryPeriod.Duration,

      Callbacks: leaderelection.LeaderCallbacks{

         OnStartedLeading: run,

         OnStoppedLeading: func() {

            glog.Fatalf("leaderelection lost")

         },

      },

   })

   panic("unreachable")

}

run方法

run := func(stop <-chan struct{}) {

     rootClientBuilder := controller.SimpleControllerClientBuilder{

        ClientConfig: c.Generic.Kubeconfig,

     }

     var clientBuilder controller.ControllerClientBuilder

     if c.Generic.ComponentConfig.UseServiceAccountCredentials {

        if len(c.Generic.ComponentConfig.ServiceAccountKeyFile) == 0 {

           // It'c possible another controller process is creating the tokens for us.

           // If one isn't, we'll timeout and exit when our client builder is unable to create the tokens.

           glog.Warningf("--use-service-account-credentials was specified without providing a --service-account-private-key-file")

        }

        clientBuilder = controller.SAControllerClientBuilder{

           ClientConfig:         restclient.AnonymousClientConfig(c.Generic.Kubeconfig),

           CoreClient:           c.Generic.Client.CoreV1(),

           AuthenticationClient: c.Generic.Client.AuthenticationV1(),

           Namespace:            "kube-system",

        }

     } else {

        clientBuilder = rootClientBuilder

     }

     // 1. 这里ctx是所有controller共享的

     // 2. 最核心的就是在new SharedInformerFactory。

     ctx, err := CreateControllerContext(c, rootClientBuilder, clientBuilder, stop)

     if err != nil {

        glog.Fatalf("error building controller context: %v", err)

     }

     saTokenControllerInitFunc := serviceAccountTokenControllerStarter{rootClientBuilder: rootClientBuilder}.startServiceAccountTokenController

 

     // 1. NewControllerInitializers 一个map。包含了每个controller的启动函数。

     // 2. 运行每个controller的start函数。

     if err := StartControllers(ctx, saTokenControllerInitFunc, NewControllerInitializers(ctx.LoopMode)); err != nil {

        glog.Fatalf("error starting controllers: %v", err)

     }

     ctx.InformerFactory.Start(ctx.Stop)

     close(ctx.InformersStarted)

     select {}

  }

StartControllers()函数:

func StartControllers(ctx ControllerContext, startSATokenController InitFunc, controllers map[string]InitFunc) error {

   // Always start the SA token controller first using a full-power client, since it needs to mint tokens for the rest

   // If this fails, just return here and fail since other controllers won't be able to get credentials.

   if _, err := startSATokenController(ctx); err != nil {

      return err

   }

   // Initialize the cloud provider with a reference to the clientBuilder only after token controller

   // has started in case the cloud provider uses the client builder.

   if ctx.Cloud != nil {

      ctx.Cloud.Initialize(ctx.ClientBuilder)

   }

   // 遍历controller的开始函数。这里我们只选取resourcequota的函数研究

   for controllerName, initFn := range controllers {

      if !ctx.IsControllerEnabled(controllerName) {

         glog.Warningf("%q is disabled", controllerName)

         continue

      }

      time.Sleep(wait.Jitter(ctx.ComponentConfig.ControllerStartInterval.Duration, ControllerStartJitter))

 

      glog.V(1).Infof("Starting %q", controllerName)

      // run 每个start 函数。例如resourceQuotaController

      started, err := initFn(ctx)

      if err != nil {

         glog.Errorf("Error starting %q", controllerName)

         return err

      }

      if !started {

         glog.Warningf("Skipping %q", controllerName)

         continue

      }

      glog.Infof("Started %q", controllerName)

   }

 

   return nil

}

走进startResourceQuotaController()方法

func startResourceQuotaController(ctx ControllerContext) (bool, error) {

   // 为resourcequota 新建rest client 用于和api server沟通。

   // ctx.ClientBuilder 为 SimpleControllerClientBuilder 对象

   // 这里的实际就是一个httpclient,只是根据不同的请求api封装成了多个不同的struct.

   resourceQuotaControllerClient := ctx.ClientBuilder.ClientOrDie("resourcequota-controller")

   // 这里定了如何去发现resource quota,使用默认的namespaces方式 ServerPreferredNamespacedResources

   // 需要注意的是resourceQuotaControllerClient.Discovery()函数返回的是DiscoveryClient.而这个DiscoveryClient

   // 实现了ServerResourcesInterface接口实现了ServerPreferredNamespacedResources方法。

   discoveryFunc := resourceQuotaControllerClient.Discovery().ServerPreferredNamespacedResources

 

   // 这个实现不在factory.go里面,在generic.go

   listerFuncForResource := generic.ListerFuncForResourceFunc(ctx.InformerFactory.ForResource)

   // Resource quota : 加载7种资源用于监控,资源分别是 configmaps, resourcequotas, replication controllers, secrets, pod, service, persistenvolume

   // 这里的Evalutors计算方式由各种资源实现,统称为evaluators.

   quotaConfiguration := quotainstall.NewQuotaConfigurationForControllers(listerFuncForResource)

 

   resourceQuotaControllerOptions := &resourcequotacontroller.ResourceQuotaControllerOptions{

      QuotaClient:               resourceQuotaControllerClient.CoreV1(),

      // 主要就是根据当前的版本返回一个合适的SharedInformedFactory,实际这个就是context中SharedInformedFactory是之前传入

      ResourceQuotaInformer:     ctx.InformerFactory.Core().V1().ResourceQuotas(),

      ResyncPeriod:              controller.StaticResyncPeriodFunc(ctx.ComponentConfig.ResourceQuotaSyncPeriod.Duration),

      InformerFactory:           ctx.InformerFactory, // sharedInformerFactory

      ReplenishmentResyncPeriod: ctx.ResyncPeriod,

      DiscoveryFunc:             discoveryFunc,

      IgnoredResourcesFunc:      quotaConfiguration.IgnoredResources,

      InformersStarted:          ctx.InformersStarted,

      // ===========================================================================

      // ===========================================================================

      // ===========================================================================

      // ===========================================================================

      // ===========================================================================

        // 注册新的Evaluatiors, 实现新的simpleRegistry

      Registry:                  generic.NewRegistry(quotaConfiguration.Evaluators()),

   }

 

   // 这一步个人理解是为了限流。

   if resourceQuotaControllerClient.CoreV1().RESTClient().GetRateLimiter() != nil {

      if err := metrics.RegisterMetricAndTrackRateLimiterUsage("resource_quota_controller", resourceQuotaControllerClient.CoreV1().RESTClient().GetRateLimiter()); err != nil {

         return true, err

      }

   }

   // 此方法的核心在于这一步

   resourceQuotaController, err := resourcequotacontroller.NewResourceQuotaController(resourceQuotaControllerOptions)

   if err != nil {

      return false, err

   }

   // 开启5个works进行同步工作。ctx.ComponentConfig.ConcurrentResourceQuotaSyncs: 5

   go resourceQuotaController.Run(int(ctx.ComponentConfig.ConcurrentResourceQuotaSyncs), ctx.Stop)

 

   // Periodically the quota controller to detect new resource types

   go resourceQuotaController.Sync(discoveryFunc, 30*time.Second, ctx.Stop)

 

   return true, nil

}

分析下方法NewResourceQuotaController()

func NewResourceQuotaController(options *ResourceQuotaControllerOptions) (*ResourceQuotaController, error) {

   // build the resource quota controller

   rq := &ResourceQuotaController{

      // 用于和api-server交互

      rqClient:            options.QuotaClient,

      rqLister:            options.ResourceQuotaInformer.Lister(), // sharedIndexInformer

      informerSyncedFuncs: []cache.InformerSynced{options.ResourceQuotaInformer.Informer().HasSynced},

      // 将从apiserver那边同步过来的数据放入此队列中,进行异步处理

      queue:               workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "resourcequota_primary”),

      // 同样的事一个队列,也会处理此处的队列。

      missingUsageQueue:   workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "resourcequota_priority"),

      resyncPeriod:        options.ResyncPeriod,

      registry:            options.Registry, // 7个evalutor

   }

   // set the synchronization handler

   rq.syncHandler = rq.syncResourceQuotaFromKey

 

   // 为resourcequota增加事件的回调函数。当watch到apiserver中resource change的时候会触发这里设置的回调函数。

   // 该方法同样比较核心,和Inform框架结合,进一步封装事件回调函数为processorlistner,并最终叫sharedprocessor进行统一管理。

   // 此处不一一展开。感兴趣的话可参考shraed_informer.go

   options.ResourceQuotaInformer.Informer().AddEventHandlerWithResyncPeriod(

      cache.ResourceEventHandlerFuncs{

         AddFunc: rq.addQuota,

         UpdateFunc: func(old, cur interface{}) {

            // We are only interested in observing updates to quota.spec to drive updates to quota.status.

            // We ignore all updates to quota.Status because they are all driven by this controller.

            // IMPORTANT:

            // We do not use this function to queue up a full quota recalculation.  To do so, would require

            // us to enqueue all quota.Status updates, and since quota.Status updates involve additional queries

            // that cannot be backed by a cache and result in a full query of a namespace's content, we do not

            // want to pay the price on spurious status updates.  As a result, we have a separate routine that is

            // responsible for enqueue of all resource quotas when doing a full resync (enqueueAll)

            oldResourceQuota := old.(*v1.ResourceQuota)

            curResourceQuota := cur.(*v1.ResourceQuota)

            if quota.V1Equals(oldResourceQuota.Spec.Hard, curResourceQuota.Spec.Hard) {

               return

            }

            rq.addQuota(curResourceQuota)

         },

         // This will enter the sync loop and no-op, because the controller has been deleted from the store.

         // Note that deleting a controller immediately after scaling it to 0 will not work. The recommended

         // way of achieving this is by performing a `stop` operation on the controller.

         DeleteFunc: rq.enqueueResourceQuota,

      },

      rq.resyncPeriod(),

   )

 

   if options.DiscoveryFunc != nil {

      // 1. qm主要用于监听其他资源的变化情况,然后回调相应的函数。这里的回调函数是资源配额rq.replenishQuota。

      // 2. 这里主要监听pod和service的资源变化,进行资源重计算。

      qm := &QuotaMonitor{

         informersStarted:  options.InformersStarted,  // chan 类型

         informerFactory:   options.InformerFactory,   // sharedInformerFactory

         ignoredResources:  options.IgnoredResourcesFunc(), // 忽略一些不需要monitor的对象。

         resourceChanges:   workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "resource_quota_controller_resource_changes"),

         resyncPeriod:      options.ReplenishmentResyncPeriod,

 

         replenishmentFunc: rq.replenishQuota,

         registry:          rq.registry,

      }

 

      rq.quotaMonitor = qm

 

      // do initial quota monitor setup

      // GetQuotableResources: 向APi server 同步获取可以计算配额的资源对象,判断的标准是支持verbs类别为:"create", "list", "watch", "delete"

      resources, err := GetQuotableResources(options.DiscoveryFunc)

      if err != nil {

         return nil, err

      }

 

      // 1. 为每个resource 分配一个添加监听事件的回调函数

      // 2. 只会监听pod 和service的upate事件

      // 3. 监听所有资源的delete事件。

      if err = qm.SyncMonitors(resources); err != nil {

         utilruntime.HandleError(fmt.Errorf("initial monitor sync has error: %v", err))

      }

 

      // only start quota once all informers synced

      rq.informerSyncedFuncs = append(rq.informerSyncedFuncs, qm.IsSynced)

   }

 

   return rq, nil

}

走进SyncMonitors()函数

// SyncMonitors rebuilds the monitor set according to the supplied resources,

// creating or deleting monitors as necessary. It will return any error

// encountered, but will make an attempt to create a monitor for each resource

// instead of immediately exiting on an error. It may be called before or after

// Run. Monitors are NOT started as part of the sync. To ensure all existing

// monitors are started, call StartMonitors.

func (qm *QuotaMonitor) SyncMonitors(resources map[schema.GroupVersionResource]struct{}) error {

   qm.monitorLock.Lock()

   defer qm.monitorLock.Unlock()

 

   toRemove := qm.monitors

   if toRemove == nil {

      toRemove = monitors{}

   }

   current := monitors{}

   errs := []error{}

   kept := 0

   added := 0

   for resource := range resources {

      if _, ok := qm.ignoredResources[resource.GroupResource()]; ok {

         continue

      }

      if m, ok := toRemove[resource]; ok {

         current[resource] = m

         delete(toRemove, resource)

         kept++

         continue

      }

      // 获取每种资源的controller设置的处理函数。

      // 同样是一个非常核心的方法,我们说每种资源,都有自己的controller控制器,resourcequotacontroler,podcontroller等等

      // 这里会遍历每种资源并且设置回调函数

      c, err := qm.controllerFor(resource)

      if err != nil {

         errs = append(errs, fmt.Errorf("couldn't start monitor for resource %q: %v", resource, err))

         continue

      }

 

      // check if we need to create an evaluator for this resource (if none previously registered)

      // 几个特殊的资源有自己的evaluator方式。如果没有,就用ObjectCountEvaluator替代

      evaluator := qm.registry.Get(resource.GroupResource())

      if evaluator == nil {

         listerFunc := generic.ListerFuncForResourceFunc(qm.informerFactory.ForResource)

         listResourceFunc := generic.ListResourceUsingListerFunc(listerFunc, resource)

         evaluator = generic.NewObjectCountEvaluator(false, resource.GroupResource(), listResourceFunc, "")

         qm.registry.Add(evaluator)

         glog.Infof("QuotaMonitor created object count evaluator for %s", resource.GroupResource())

      }

 

      // track the monitor

      // 这个c 实际是dummyController。而后面会看到执行controller.Run()方法。实际该方法只是一个空实现。大佬们的脑回路比较奇特。

      current[resource] = &monitor{controller: c}

      added++

   }

   qm.monitors = current

 

   for _, monitor := range toRemove {

      if monitor.stopCh != nil {

         close(monitor.stopCh)

      }

   }

 

   glog.V(4).Infof("quota synced monitors; added %d, kept %d, removed %d", added, kept, len(toRemove))

   // NewAggregate returns nil if errs is 0-length

   return utilerrors.NewAggregate(errs)

}

 

controllerFor函数

func (qm *QuotaMonitor) controllerFor(resource schema.GroupVersionResource) (cache.Controller, error) {

   // TODO: pass this down

   clock := clock.RealClock{}

   handlers := cache.ResourceEventHandlerFuncs{

      UpdateFunc: func(oldObj, newObj interface{}) {

         // TODO: leaky abstraction!  live w/ it for now, but should pass down an update filter func.

         // we only want to queue the updates we care about though as too much noise will overwhelm queue.

         notifyUpdate := false

         switch resource.GroupResource() {

         case schema.GroupResource{Resource: "pods"}:

            oldPod := oldObj.(*v1.Pod)

            newPod := newObj.(*v1.Pod)

            // 判断当前的pod状态是否为terminate,如果不是就认为可以更新当前缓存,放到队列事件中。

            notifyUpdate = core.QuotaV1Pod(oldPod, clock) && !core.QuotaV1Pod(newPod, clock)

         case schema.GroupResource{Resource: "services"}:

            oldService := oldObj.(*v1.Service)

            newService := newObj.(*v1.Service)

            notifyUpdate = core.GetQuotaServiceType(oldService) != core.GetQuotaServiceType(newService)

         }

         if notifyUpdate {

            event := &event{

               eventType: updateEvent,

               obj:       newObj,

               oldObj:    oldObj,

               gvr:       resource,

            }

            qm.resourceChanges.Add(event) // 入队。

         }

      },

      DeleteFunc: func(obj interface{}) {

         // delta fifo may wrap the object in a cache.DeletedFinalStateUnknown, unwrap it

         if deletedFinalStateUnknown, ok := obj.(cache.DeletedFinalStateUnknown); ok {

            obj = deletedFinalStateUnknown.Obj

         }

         event := &event{

            eventType: deleteEvent,

            obj:       obj,

            gvr:       resource,

         }

         qm.resourceChanges.Add(event)

      },

   }

   // 这里informerFactory是会根据不同的resource得到不同的sharedIndexInform。然后添加监听事件的回调函数。

   // 该方法的实现是在generic.go。为每种resource new并返回sharedIndexInform,存入informerFactory中,如果已经存在,直接返回。

   // 从代码的逻辑来看,这里会监听所有资源的deleted事件,以及pods 和 service资源的updated事件进行处理。

   shared, err := qm.informerFactory.ForResource(resource) 

   if err == nil {

      glog.V(4).Infof("QuotaMonitor using a shared informer for resource %q", resource.String())

      // 该shared 实际是resourequotaInformer.informer: 即该cache.SharedIndexInformer类型

      // 同样的注册资源回调函数

      shared.Informer().AddEventHandlerWithResyncPeriod(handlers, qm.resyncPeriod())

      return shared.Informer().GetController(), nil

   }

   glog.V(4).Infof("QuotaMonitor unable to use a shared informer for resource %q: %v", resource.String(), err)

 

   // TODO: if we can share storage with garbage collector, it may make sense to support other resources

   // until that time, aggregated api servers will have to run their own controller to reconcile their own quota.

   return nil, fmt.Errorf("unable to monitor quota for resource %q", resource.String())

}

 

ResourceQuotaController.Run()

func (rq *ResourceQuotaController) Run(workers int, stopCh <-chan struct{}) {

   defer utilruntime.HandleCrash()

   defer rq.queue.ShutDown()

 

   glog.Infof("Starting resource quota controller")

   defer glog.Infof("Shutting down resource quota controller")

 

   if rq.quotaMonitor != nil {

      go rq.quotaMonitor.Run(stopCh) // 该方法的作用就是启动informerfactory的sharedindexformer的Run方法

   }

 

   if !controller.WaitForCacheSync("resource quota", stopCh, rq.informerSyncedFuncs...) {

      return

   }

 

   // the workers that chug through the quota calculation backlog

   for i := 0; i < workers; i++ {

      // 总共开启10个协程,5个处理rq.queue。 5个处理missingUsageQueue。处理流程不再展开。可以看之前代码中注册的回调函数。

      go wait.Until(rq.worker(rq.queue), time.Second, stopCh)

      go wait.Until(rq.worker(rq.missingUsageQueue), time.Second, stopCh)

   }

   // the timer for how often we do a full recalculation across all quotas

   go wait.Until(func() { rq.enqueueAll() }, rq.resyncPeriod(), stopCh)

   <-stopCh

}

重点看下rq.quotaMonitor.Run()

func (qm *QuotaMonitor) Run(stopCh <-chan struct{}) {

   glog.Infof("QuotaMonitor running")

   defer glog.Infof("QuotaMonitor stopping")

 

   // Set up the stop channel.

   qm.monitorLock.Lock()

   qm.stopCh = stopCh

   qm.running = true

   qm.monitorLock.Unlock()

 

   // Start monitors and begin change processing until the stop channel is

   // closed.

   // 入口方法

   qm.StartMonitors()

   wait.Until(qm.runProcessResourceChanges, 1*time.Second, stopCh)

 

   // Stop any running monitors.

   qm.monitorLock.Lock()

   defer qm.monitorLock.Unlock()

   monitors := qm.monitors

   stopped := 0

   for _, monitor := range monitors {

      if monitor.stopCh != nil {

         stopped++

         close(monitor.stopCh)

      }

   }

   glog.Infof("QuotaMonitor stopped %d of %d monitors", stopped, len(monitors))

}

qm.StartMonitors()

func (qm *QuotaMonitor) StartMonitors() {

   qm.monitorLock.Lock()

   defer qm.monitorLock.Unlock()

 

   if !qm.running {

      return

   }

 

   // we're waiting until after the informer start that happens once all the controllers are initialized.  This ensures

   // that they don't get unexpected events on their work queues.

   <-qm.informersStarted // 阻塞,等待所有的controller启动之后.

 

   monitors := qm.monitors

   started := 0

   for _, monitor := range monitors {

      if monitor.stopCh == nil {

         monitor.stopCh = make(chan struct{})

         // 到这实际就是把sharedInformFactory中的sharedIndexerInform运行

         qm.informerFactory.Start(qm.stopCh)

         go monitor.Run() // 这个方法可忽略

         started++

      }

   }

   glog.V(4).Infof("QuotaMonitor started %d new monitors, %d currently running", started, len(monitors))

}

qm.informerFactory.Start(qm.stopCh)

// Start initializes all requested informers.

func (f *sharedInformerFactory) Start(stopCh <-chan struct{}) {

   f.lock.Lock()

   defer f.lock.Unlock()

 

   for informerType, informer := range f.informers {

      if !f.startedInformers[informerType] {

         // 最终实际是每个sharedIndexerInform。工厂模式

         go informer.Run(stopCh)

         f.startedInformers[informerType] = true

      }

   }

}




以下代码大部分都属于shared_informer.go中的代码。属于informer框架部分。

sharedIndexInformer.Run()

func (s *sharedIndexInformer) Run(stopCh <-chan struct{}) {

   defer utilruntime.HandleCrash()

 

   // 返回的是当前object 的namespace/name 例如 namespaces/podna1

   // 我们说实际最后从apiserver那边监听到的数据会第一时间存放到这个deltafifo中。也是在此创建的。

   // 这里的DelatFIFO结构体包含一个字符串数组以及一个map。数组中存放的是值和map中的key相对应,都是namespace/name格式。例如"mynamespace/mypod”

   // map中存放的value则是每个资源完整的结构体。这里不展开

   fifo := NewDeltaFIFO(MetaNamespaceKeyFunc, s.indexer)

 

   cfg := &Config{

      // 这里把DelatFIFO传入

      Queue:            fifo,

      // 实际是从sharedIndexInformer传递过来

      ListerWatcher:    s.listerWatcher,

      ObjectType:       s.objectType,

      FullResyncPeriod: s.resyncCheckPeriod,

      RetryOnError:     false,

      ShouldResync:     s.processor.shouldResync,

      // 此方法特别重要 重要 重要,主要是处理Queue中 watch到的数据。。。。

      // 终于看见到了开篇时候的HandleDeltas函数

      Process: s.HandleDeltas,

   }

 

   func() {

      s.startedLock.Lock()

      defer s.startedLock.Unlock()

      // 吐槽: k8s中各种controller,很容易混淆。这里的controller主要作用是new一个reflector组件,而这个组件用于监听事件变化的。

      // cfg就是前面new的对象结构体,在这里传入。

      s.controller = New(cfg)

      s.controller.(*controller).clock = s.clock

      s.started = true

   }()

 

   // Separate stop channel because Processor should be stopped strictly after controller

   processorStopCh := make(chan struct{})

   var wg wait.Group

   defer wg.Wait()              // Wait for Processor to stop

   defer close(processorStopCh) // Tell Processor to stop

   // 这个主要是突变检测,暂时没什么用,默认此功能是关闭的。可忽略

   wg.StartWithChannel(processorStopCh, s.cacheMutationDetector.Run)

   // 运行处理函数

   wg.StartWithChannel(processorStopCh, s.processor.run)

 

   defer func() {

      s.startedLock.Lock()

      defer s.startedLock.Unlock()

      s.stopped = true // Don't want any new listeners

   }()

   s.controller.Run(stopCh)

}

看下上面的New(cfg 以及s.controller.Run函数:

func New(c *Config) Controller {

   ctlr := &controller{

      config: *c,

      clock:  &clock.RealClock{},

   }

   return ctlr

}

 

—————————

s.controller.Run()

 

func (c *controller) Run(stopCh <-chan struct{}) {

   defer utilruntime.HandleCrash()

   go func() {

      <-stopCh

      c.config.Queue.Close()

   }()

   

   // 这里边new 一个 reflector组件用于listwatch apiserver资源变化的。

   r := NewReflector(

      c.config.ListerWatcher,

      c.config.ObjectType,

      /*

       * kube-controller-manager的缓存实际是一个DeltaFIFO, 这个超级重要。

       */

      c.config.Queue,

      c.config.FullResyncPeriod,

   )

   r.ShouldResync = c.config.ShouldResync

   r.clock = c.clock

 

   c.reflectorMutex.Lock()

   c.reflector = r

   c.reflectorMutex.Unlock()

 

   var wg wait.Group

   defer wg.Wait()

 

   wg.StartWithChannel(stopCh, r.Run)

   

   // 一开始我也认为这个是一秒一次处理。但是实际不是。这个是一个死循环。如果不出现重大错误,这个是不会周期性执行。

   // 可详细看下wait.UNitl函数逻辑

   wait.Until(c.processLoop, time.Second, stopCh)

}

 

c.processLoop方法

func (c *controller) processLoop() {

   for {

      // 可以看出实际是调用deltafifo的pop函数,然后用之前cfg中的HandleDeltas函数进行处理。

      obj, err := c.config.Queue.Pop(PopProcessFunc(c.config.Process))

      if err != nil {

         if err == FIFOClosedError {

            return

         }

         if c.config.RetryOnError {

            // This is the safe way to re-enqueue.

            c.config.Queue.AddIfNotPresent(obj)

         }

      }

   }

}

DeltaFIFO.Pop函数

func (f *DeltaFIFO) Pop(process PopProcessFunc) (interface{}, error) {

   f.lock.Lock()

   defer f.lock.Unlock()

   for {

      for len(f.queue) == 0 {

         // When the queue is empty, invocation of Pop() is blocked until new item is enqueued.

         // When Close() is called, the f.closed is set and the condition is broadcasted.

         // Which causes this loop to continue and return from the Pop().

         if f.IsClosed() {

            return nil, FIFOClosedError

         }

 

         f.cond.Wait()

      }

      // 每次都会取首元素

      id := f.queue[0]

      // 处理完成之后重新计算队列元素,并把新的queue赋值给f.queue

      f.queue = f.queue[1:]

      item, ok := f.items[id]

      if f.initialPopulationCount > 0 {

         f.initialPopulationCount--

      }

      if !ok {

         // Item may have been deleted subsequently.

         continue

      }

      delete(f.items, id)

      // 调用HandleDeltas函数进行处理

      err := process(item)

      if e, ok := err.(ErrRequeue); ok {

         // 如果处理失败重新添加进队列里面,但是前提是队列该元素不存在,且还持有当前的锁

         f.addIfNotPresent(id, item)

         err = e.Err

      }

      // Don't need to copyDeltas here, because we're transferring

      // ownership to the caller.

      return item, err

   }

}

sharedIndexInformer.HandleDeltas方法

func (s *sharedIndexInformer) HandleDeltas(obj interface{}) error {

   s.blockDeltas.Lock()

   defer s.blockDeltas.Unlock()

 

   // from oldest to newest

   for _, d := range obj.(Deltas) {

      switch d.Type {

      case Sync, Added, Updated:

         isSync := d.Type == Sync

         // 这个方法可以忽略,主要作用是冲突检测,

         s.cacheMutationDetector.AddObject(d.Object)

         if old, exists, err := s.indexer.Get(d.Object); err == nil && exists {

            // 有新的对象进来,都会去更新下indexer缓存

            if err := s.indexer.Update(d.Object); err != nil {

               return err

            }

            s.processor.distribute(updateNotification{oldObj: old, newObj: d.Object}, isSync)

         } else {

             // 有新的对象进来,都会去更新下indexer缓存

            if err := s.indexer.Add(d.Object); err != nil {

               return err

            }

            s.processor.distribute(addNotification{newObj: d.Object}, isSync)

         }

      case Deleted:

          // 有新的对象进来,都会去更新下indexer缓存

         if err := s.indexer.Delete(d.Object); err != nil {

            return err

         }

         s.processor.distribute(deleteNotification{oldObj: d.Object}, false)

      }

   }

   return nil

}

s.processor.distribute(deleteNotification{oldObj: d.Object}, false)

func (p *sharedProcessor) distribute(obj interface{}, sync bool) {

   p.listenersLock.RLock()

   defer p.listenersLock.RUnlock()

 

   if sync {

      for _, listener := range p.syncingListeners {

         // 实际调用的是processorlistener组件

         listener.add(obj)

      }

   } else {

      for _, listener := range p.listeners {

         // 实际调用的是processorlistener组件

         listener.add(obj)

      }

   }

}

processorListener.add

// 这个方法把向add channel中写数据

func (p *processorListener) add(notification interface{}) {

   p.addCh <- notification

}

 

processorListener.Pop方法消费add channel中的数据

func (p *processorListener) pop() {

   defer utilruntime.HandleCrash()

   defer close(p.nextCh) // Tell .run() to stop

 

   var nextCh chan<- interface{}

   var notification interface{}

   

   for {

      // select channel比较经典的用法。在一个select中实现了冲add channel接收数据,再将数据转发。

      select {

      case nextCh <- notification:

         // Notification dispatched

         var ok bool

         notification, ok = p.pendingNotifications.ReadOne()

         if !ok { // Nothing to pop

            nextCh = nil // Disable this select case

         }

      case notificationToAdd, ok := <-p.addCh:

         if !ok {

            return

         }

         // notification为空说明当前还没发送任何事件给处理器, 直接将notification 写入next channel。

         if notification == nil { // No notification to pop (and pendingNotifications is empty)

            // Optimize the case - skip adding to pendingNotifications

            notification = notificationToAdd

            nextCh = p.nextCh

         } else { // There is already a notification waiting to be dispatched

             // 到这步,上一个事件没有被及时处理。会将上一个事件放进缓冲区,这个缓冲区是一个环形的缓冲区。

            p.pendingNotifications.WriteOne(notificationToAdd)

         }

      }

   }

}

processorListener.Run()

 

func (p *processorListener) run() {

   // this call blocks until the channel is closed.  When a panic happens during the notification

   // we will catch it, **the offending item will be skipped!**, and after a short delay (one second)

   // the next notification will be attempted.  This is usually better than the alternative of never

   // delivering again.

   stopCh := make(chan struct{})

   wait.Until(func() {

      // this gives us a few quick retries before a long pause and then a few more quick retries

      err := wait.ExponentialBackoff(retry.DefaultRetry, func() (bool, error) {

         for next := range p.nextCh {

            switch notification := next.(type) {

            case updateNotification:

               // 这里便是每个controller自己注册的事件处理函数。在此不再分析了。

               p.handler.OnUpdate(notification.oldObj, notification.newObj)

            case addNotification:

               p.handler.OnAdd(notification.newObj)

            case deleteNotification:

               p.handler.OnDelete(notification.oldObj)

            default:

               utilruntime.HandleError(fmt.Errorf("unrecognized notification: %#v", next))

            }

         }

         // the only way to get here is if the p.nextCh is empty and closed

         return true, nil

      })

 

      // the only way to get here is if the p.nextCh is empty and closed

      if err == nil {

         close(stopCh)

      }

   }, 1*time.Minute, stopCh)

}

总结: 回调的是我们在new controller(例如resourcequotacontroller)时候add event handler的函数,是通过processorListener这进行的。详细了解每个controller的处理过程,请自行研究resourcequotacontroller onAdd/onUpdate/works方法等

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值