k8s源码分析-----kubelet pod处理流程

最新推荐文章于 2024-08-07 18:11:38 发布

程序源234

最新推荐文章于 2024-08-07 18:11:38 发布

阅读量3.2k

点赞数

分类专栏： k8s

本文链接：https://blog.csdn.net/yan234280533/article/details/52497944

版权

k8s 专栏收录该内容

19 篇文章 2 订阅

订阅专栏

源码为k8s v1.3.0稳定版本

Go语言中通过chann进行跨协程通信，基于这点我们先找到通信的协程创建的流程

(1) UpdatePod channel的创建

CreateAndInitKubelet函数中，调用makePodSourceConfig创建。

k8s.io\kubernetes\cmd\kubelet\app\server.go：903

func makePodSourceConfig(kc *KubeletConfig) *config.PodConfig {
       // source of all configuration
       cfg := config.NewPodConfig(config.PodConfigNotificationIncremental, kc.Recorder)

       // define file config source
       if kc.ConfigFile != "" {
              glog.Infof("Adding manifest file: %v", kc.ConfigFile)
              config.NewSourceFile(kc.ConfigFile, kc.NodeName, kc.FileCheckFrequency, cfg.Channel(kubetypes.FileSource))
       }

       // define url config source
       if kc.ManifestURL != "" {
              glog.Errorf("Adding manifest url %q with HTTP header %v", kc.ManifestURL, kc.ManifestURLHeader)
              config.NewSourceURL(kc.ManifestURL, kc.ManifestURLHeader, kc.NodeName, kc.HTTPCheckFrequency, cfg.Channel(kubetypes.HTTPSource))
       }
       if kc.KubeClient != nil {
              glog.Errorf("Watching apiserver")
              config.NewSourceApiserver(kc.KubeClient, kc.NodeName, cfg.Channel(kubetypes.ApiserverSource))
       }
       return cfg
}

// NewPodConfig creates an object that can merge many configuration sources into a stream
// of normalized updates to a pod configuration.
func NewPodConfig(mode PodConfigNotificationMode, recorder record.EventRecorder) *PodConfig {
       updates := make(chan kubetypes.PodUpdate, 50)
       storage := newPodStorage(updates, mode, recorder)
       podConfig := &PodConfig{
              pods:    storage,
              mux:     config.NewMux(storage),
              updates: updates,
              sources: sets.String{},
       }
       return podConfig
}

// TODO: PodConfigNotificationMode could be handled by a listener to the updates channel
// in the future, especially with multiple listeners.
// TODO: allow initialization of the current state of the store with snapshotted version.
func newPodStorage(updates chan<- kubetypes.PodUpdate, mode PodConfigNotificationMode, recorder record.EventRecorder) *podStorage {
       return &podStorage{
              pods:        make(map[string]map[string]*api.Pod),
              mode:        mode,
              updates:     updates,
              sourcesSeen: sets.String{},
              recorder:    recorder,
       }
}

函数调用顺序为： makePodSourceConfig -> NewPodConfig -> newPodStorage

在NewPodConfig 函数中，创建了一个容量为50的channel。该channel后面传给了main loop的处理函数

(2) pod channel的分发

pod channel的分发也在makePodSourceConfig函数中，在创建完channel后，写的部分分发给了

分发的写入函数有三个：

NewSourceFile

NewSourceURL

NewSourceApiserver

读取channel传递给mainloop

k, podCfg, err := builder(kcfg)

startKubelet(k, podCfg, kcfg)

go wait.Until(func() { k.Run(podCfg.Updates()) }, 0, wait.NeverStop)

// Run starts the kubelet reacting to config updates
func (kl *Kubelet) Run(updates <-chan kubetypes.PodUpdate)

kl.syncLoop(updates, kl)
func (kl *Kubelet) syncLoop(updates <-chan kubetypes.PodUpdate, handler SyncHandler)
if !kl.syncLoopIteration(updates, handler, syncTicker.C, housekeepingTicker.C, plegCh) {
       break
}

读取channel的一端，逐步传递给synLoopIteration函数

(3) 三种pod更新事件的来源

SourceFile方式：

func NewSourceFile(path string, nodeName string, period time.Duration, updates chan<- interface{}) {
       config := &sourceFile{
              path:     path,
              nodeName: nodeName,
              updates:  updates,
       }
       glog.V(1).Infof("Watching path %q", path)
       go wait.Until(config.run, period, wait.NeverStop)
}

func (s *sourceFile) run() {
       if err := s.extractFromPath(); err != nil {
              glog.Errorf("Unable to read config path %q: %v", s.path, err)
       }
}

func (s *sourceFile) extractFromPath() error {
       path := s.path
       statInfo, err := os.Stat(path)
       if err != nil {
              if !os.IsNotExist(err) {
                     return err
              }
              // Emit an update with an empty PodList to allow FileSource to be marked as seen
              s.updates <- kubetypes.PodUpdate{Pods: []*api.Pod{}, Op: kubetypes.SET, Source: kubetypes.FileSource}
              return fmt.Errorf("path does not exist, ignoring")
       }

       switch {
       case statInfo.Mode().IsDir():
              pods, err := s.extractFromDir(path)
              if err != nil {
                     return err
              }
              s.updates <- kubetypes.PodUpdate{Pods: pods, Op: kubetypes.SET, Source: kubetypes.FileSource}

       case statInfo.Mode().IsRegular():
              pod, err := s.extractFromFile(path)
              if err != nil {
                     return err
              }
              s.updates <- kubetypes.PodUpdate{Pods: []*api.Pod{pod}, Op: kubetypes.SET, Source: kubetypes.FileSource}

       default:
              return fmt.Errorf("path is not a directory or file")
       }

       return nil
}

SourceUrl方式：

k8s.io\kubernetes\pkg\kubelet\config\http.go

func NewSourceURL(url string, header http.Header, nodeName string, period time.Duration, updates chan<- interface{}) {
       config := &sourceURL{
              url:      url,
              header:   header,
              nodeName: nodeName,
              updates:  updates,
              data:     nil,
              // Timing out requests leads to retries. This client is only used to
              // read the the manifest URL passed to kubelet.
              client: &http.Client{Timeout: 10 * time.Second},
       }
       glog.V(1).Infof("Watching URL %s", url)
       go wait.Until(config.run, period, wait.NeverStop)
}

func (s *sourceURL) run() {
       if err := s.extractFromURL(); err != nil {
              // Don't log this multiple times per minute. The first few entries should be
              // enough to get the point across.
              if s.failureLogs < 3 {
                     glog.Warningf("Failed to read pods from URL: %v", err)
              } else if s.failureLogs == 3 {
                     glog.Warningf("Failed to read pods from URL. Dropping verbosity of this message to V(4): %v", err)
              } else {
                     glog.V(4).Infof("Failed to read pods from URL: %v", err)
              }
              s.failureLogs++
       } else {
              if s.failureLogs > 0 {
                     glog.Info("Successfully read pods from URL.")
                     s.failureLogs = 0
              }
       }
}

func (s *sourceURL) extractFromURL() error {
       req, err := http.NewRequest("GET", s.url, nil)
       if err != nil {
              return err
       }
       req.Header = s.header
       resp, err := s.client.Do(req)
       if err != nil {
              return err
       }
       defer resp.Body.Close()
       data, err := ioutil.ReadAll(resp.Body)
       if err != nil {
              return err
       }
       if resp.StatusCode != 200 {
              return fmt.Errorf("%v: %v", s.url, resp.Status)
       }
       if len(data) == 0 {
              // Emit an update with an empty PodList to allow HTTPSource to be marked as seen
              s.updates <- kubetypes.PodUpdate{Pods: []*api.Pod{}, Op: kubetypes.SET, Source: kubetypes.HTTPSource}
              return fmt.Errorf("zero-length data received from %v", s.url)
       }
       // Short circuit if the data has not changed since the last time it was read.
       if bytes.Compare(data, s.data) == 0 {
              return nil
       }
       s.data = data

       // First try as it is a single pod.
       parsed, pod, singlePodErr := tryDecodeSinglePod(data, s.applyDefaults)
       if parsed {
              if singlePodErr != nil {
                     // It parsed but could not be used.
                     return singlePodErr
              }
              s.updates <- kubetypes.PodUpdate{Pods: []*api.Pod{pod}, Op: kubetypes.SET, Source: kubetypes.HTTPSource}
              return nil
       }

       // That didn't work, so try a list of pods.
       parsed, podList, multiPodErr := tryDecodePodList(data, s.applyDefaults)
       if parsed {
              if multiPodErr != nil {
                     // It parsed but could not be used.
                     return multiPodErr
              }
              pods := make([]*api.Pod, 0)
              for i := range podList.Items {
                     pods = append(pods, &podList.Items[i])
              }
              s.updates <- kubetypes.PodUpdate{Pods: pods, Op: kubetypes.SET, Source: kubetypes.HTTPSource}
              return nil
       }

       return fmt.Errorf("%v: received '%v', but couldn't parse as "+
              "single (%v) or multiple pods (%v).\n",
              s.url, string(data), singlePodErr, multiPodErr)
}

SourceApiserver方式：

// NewSourceApiserver creates a config source that watches and pulls from the apiserver.
func NewSourceApiserver(c *clientset.Clientset, nodeName string, updates chan<- interface{}) {
       lw := cache.NewListWatchFromClient(c.CoreClient, "pods", api.NamespaceAll, fields.OneTermEqualSelector(api.PodHostField, nodeName))
       newSourceApiserverFromLW(lw, updates)
}

// newSourceApiserverFromLW holds creates a config source that watches and pulls from the apiserver.
func newSourceApiserverFromLW(lw cache.ListerWatcher, updates chan<- interface{}) {
       send := func(objs []interface{}) {
              var pods []*api.Pod
              for _, o := range objs {
                     pods = append(pods, o.(*api.Pod))
              }
              updates <- kubetypes.PodUpdate{Pods: pods, Op: kubetypes.SET, Source: kubetypes.ApiserverSource}
       }
       cache.NewReflector(lw, &api.Pod{}, cache.NewUndeltaStore(send, cache.MetaNamespaceKeyFunc), 0).Run()
}

// Run starts a watch and handles watch events. Will restart the watch if it is closed.
// Run starts a goroutine and returns immediately.
func (r *Reflector) Run() {
       glog.V(3).Infof("Starting reflector %v (%s) from %s", r.expectedType, r.resyncPeriod, r.name)
       go wait.Until(func() {
              if err := r.ListAndWatch(wait.NeverStop); err != nil {
                     utilruntime.HandleError(err)
              }
       }, r.period, wait.NeverStop)
}

（4）syncLoopIteration中的处理

func (kl *Kubelet) syncLoopIteration(configCh <-chan kubetypes.PodUpdate, handler SyncHandler,
       syncCh <-chan time.Time, housekeepingCh <-chan time.Time, plegCh <-chan *pleg.PodLifecycleEvent) bool {
       kl.syncLoopMonitor.Store(kl.clock.Now())
       select {
       case u, open := <-configCh:
              // Update from a config source; dispatch it to the right handler
              // callback.
              if !open {
                     glog.Errorf("Update channel is closed. Exiting the sync loop.")
                     return false
              }
              kl.sourcesReady.AddSource(u.Source)

              switch u.Op {
              case kubetypes.ADD:
                     glog.V(2).Infof("SyncLoop (ADD, %q): %q", u.Source, format.Pods(u.Pods))
                     // After restarting, kubelet will get all existing pods through
                     // ADD as if they are new pods. These pods will then go through the
                     // admission process and *may* be rejcted. This can be resolved
                     // once we have checkpointing.
                     handler.HandlePodAdditions(u.Pods)
              case kubetypes.UPDATE:
                     glog.V(2).Infof("SyncLoop (UPDATE, %q): %q", u.Source, format.PodsWithDeletiontimestamps(u.Pods))
                     handler.HandlePodUpdates(u.Pods)
              case kubetypes.REMOVE:
                     glog.V(2).Infof("SyncLoop (REMOVE, %q): %q", u.Source, format.Pods(u.Pods))
                     handler.HandlePodRemoves(u.Pods)
              case kubetypes.RECONCILE:
                     glog.V(4).Infof("SyncLoop (RECONCILE, %q): %q", u.Source, format.Pods(u.Pods))
                     handler.HandlePodReconcile(u.Pods)
              case kubetypes.DELETE:
                     glog.V(2).Infof("SyncLoop (DELETE, %q): %q", u.Source, format.Pods(u.Pods))
                     // DELETE is treated as a UPDATE because of graceful deletion.
                     handler.HandlePodUpdates(u.Pods)
              case kubetypes.SET:
                     // TODO: Do we want to support this?
                     glog.Errorf("Kubelet does not support snapshot update")

              }
       case e := <-plegCh:
              if isSyncPodWorthy(e) {
                     // PLEG event for a pod; sync it.
                     pod, ok := kl.podManager.GetPodByUID(e.ID)
                     if !ok {
                            // If the pod no longer exists, ignore the event.
                            glog.V(4).Infof("SyncLoop (PLEG): ignore irrelevant event: %#v", e)
                            break
                     }
                     glog.V(2).Infof("SyncLoop (PLEG): %q, event: %#v", format.Pod(pod), e)
                     handler.HandlePodSyncs([]*api.Pod{pod})
              }
       case <-syncCh:
              // Sync pods waiting for sync
              podsToSync := kl.getPodsToSync()
              if len(podsToSync) == 0 {
                     break
              }
              glog.V(4).Infof("SyncLoop (SYNC): %d pods; %s", len(podsToSync), format.Pods(podsToSync))
              kl.HandlePodSyncs(podsToSync)
       case update := <-kl.livenessManager.Updates():
              if update.Result == proberesults.Failure {
                     // The liveness manager detected a failure; sync the pod.

                     // We should not use the pod from livenessManager, because it is never updated after
                     // initialization.
                     pod, ok := kl.podManager.GetPodByUID(update.PodUID)
                     if !ok {
                            // If the pod no longer exists, ignore the update.
                            glog.V(4).Infof("SyncLoop (container unhealthy): ignore irrelevant update: %#v", update)
                            break
                     }
                     glog.V(1).Infof("SyncLoop (container unhealthy): %q", format.Pod(pod))
                     handler.HandlePodSyncs([]*api.Pod{pod})
              }
       case <-housekeepingCh:
              if !kl.sourcesReady.AllReady() {
                     // If the sources aren't ready, skip housekeeping, as we may
                     // accidentally delete pods from unready sources.
                     glog.V(4).Infof("SyncLoop (housekeeping, skipped): sources aren't ready yet.")
              } else {
                     glog.V(4).Infof("SyncLoop (housekeeping)")
                     if err := handler.HandlePodCleanups(); err != nil {
                            glog.Errorf("Failed cleaning pods: %v", err)
                     }
              }
       }
       kl.syncLoopMonitor.Store(kl.clock.Now())
       return true
}

syncLoopIteration主要是根据收到不同podUpdate的信息，进行处理。可以理解成一个分发函数。

分发的大致流程：

(kl *Kubelet) HandlePodUpdates ->

dispatchWork

dispatchWork中podWorker对象的路径：

k8s.io\kubernetes\pkg\kubelet\pod_workers.go

(5) dispatchWork中的处理

// dispatchWork starts the asynchronous sync of the pod in a pod worker.
// If the pod is terminated, dispatchWork
func (kl *Kubelet) dispatchWork(pod *api.Pod, syncType kubetypes.SyncPodType, mirrorPod *api.Pod, start time.Time) {
       if kl.podIsTerminated(pod) {
              if pod.DeletionTimestamp != nil {
                     // If the pod is in a terminated state, there is no pod worker to
                     // handle the work item. Check if the DeletionTimestamp has been
                     // set, and force a status update to trigger a pod deletion request
                     // to the apiserver.
                     kl.statusManager.TerminatePod(pod)
              }
              return
       }
       // Run the sync in an async worker.
       kl.podWorkers.UpdatePod(&UpdatePodOptions{
              Pod:        pod,
              MirrorPod:  mirrorPod,
              UpdateType: syncType,
              OnCompleteFunc: func(err error) {
                     if err != nil {
                            metrics.PodWorkerLatency.WithLabelValues(syncType.String()).Observe(metrics.SinceInMicroseconds(start))
                     }
              },
       })
       // Note the number of containers for new pods.
       if syncType == kubetypes.SyncPodCreate {
              metrics.ContainersPerPodCount.Observe(float64(len(pod.Spec.Containers)))
       }
}

主要的一个调用流程是，利用kubelet对象中的podwork对象进行Updatepod操作

dispatchWork中podWorker对象的路径：

k8s.io\kubernetes\pkg\kubelet\pod_workers.go

(6) Updatepod中的操作

// Apply the new setting to the specified pod.
// If the options provide an OnCompleteFunc, the function is invoked if the update is accepted.
// Update requests are ignored if a kill pod request is pending.
func (p *podWorkers) UpdatePod(options *UpdatePodOptions) {
       pod := options.Pod
       uid := pod.UID
       var podUpdates chan UpdatePodOptions
       var exists bool

       p.podLock.Lock()
       defer p.podLock.Unlock()
       if podUpdates, exists = p.podUpdates[uid]; !exists {
              // We need to have a buffer here, because checkForUpdates() method that
              // puts an update into channel is called from the same goroutine where
              // the channel is consumed. However, it is guaranteed that in such case
              // the channel is empty, so buffer of size 1 is enough.
              podUpdates = make(chan UpdatePodOptions, 1)
              p.podUpdates[uid] = podUpdates

              // Creating a new pod worker either means this is a new pod, or that the
              // kubelet just restarted. In either case the kubelet is willing to believe
              // the status of the pod for the first pod worker sync. See corresponding
              // comment in syncPod.
              go func() {
                     defer runtime.HandleCrash()
                     p.managePodLoop(podUpdates)
              }()
       }
       if !p.isWorking[pod.UID] {
              p.isWorking[pod.UID] = true
              podUpdates <- *options
       } else {
              // if a request to kill a pod is pending, we do not let anything overwrite that request.
              update, found := p.lastUndeliveredWorkUpdate[pod.UID]
              if !found || update.UpdateType != kubetypes.SyncPodKill {
                     p.lastUndeliveredWorkUpdate[pod.UID] = *options
              }
       }
}