源码为k8s v1.3.0稳定版本
Go语言中通过chann进行跨协程通信,基于这点我们先找到通信的协程创建的流程
(1) UpdatePod channel的创建
func makePodSourceConfig(kc *KubeletConfig) *config.PodConfig { // source of all configuration cfg := config.NewPodConfig(config.PodConfigNotificationIncremental, kc.Recorder) // define file config source if kc.ConfigFile != "" { glog.Infof("Adding manifest file: %v", kc.ConfigFile) config.NewSourceFile(kc.ConfigFile, kc.NodeName, kc.FileCheckFrequency, cfg.Channel(kubetypes.FileSource)) } // define url config source if kc.ManifestURL != "" { glog.Errorf("Adding manifest url %q with HTTP header %v", kc.ManifestURL, kc.ManifestURLHeader) config.NewSourceURL(kc.ManifestURL, kc.ManifestURLHeader, kc.NodeName, kc.HTTPCheckFrequency, cfg.Channel(kubetypes.HTTPSource)) } if kc.KubeClient != nil { glog.Errorf("Watching apiserver") config.NewSourceApiserver(kc.KubeClient, kc.NodeName, cfg.Channel(kubetypes.ApiserverSource)) } return cfg }
// NewPodConfig creates an object that can merge many configuration sources into a stream // of normalized updates to a pod configuration. func NewPodConfig(mode PodConfigNotificationMode, recorder record.EventRecorder) *PodConfig { updates := make(chan kubetypes.PodUpdate, 50) storage := newPodStorage(updates, mode, recorder) podConfig := &PodConfig{ pods: storage, mux: config.NewMux(storage), updates: updates, sources: sets.String{}, } return podConfig }
// TODO: PodConfigNotificationMode could be handled by a listener to the updates channel // in the future, especially with multiple listeners. // TODO: allow initialization of the current state of the store with snapshotted version. func newPodStorage(updates chan<- kubetypes.PodUpdate, mode PodConfigNotificationMode, recorder record.EventRecorder) *podStorage { return &podStorage{ pods: make(map[string]map[string]*api.Pod), mode: mode, updates: updates, sourcesSeen: sets.String{}, recorder: recorder, } }
(2) pod channel的分发
k, podCfg, err := builder(kcfg)
startKubelet(k, podCfg, kcfg)
go wait.Until(func() { k.Run(podCfg.Updates()) }, 0, wait.NeverStop)
// Run starts the kubelet reacting to config updates func (kl *Kubelet) Run(updates <-chan kubetypes.PodUpdate)
kl.syncLoop(updates, kl)
func (kl *Kubelet) syncLoop(updates <-chan kubetypes.PodUpdate, handler SyncHandler)if !kl.syncLoopIteration(updates, handler, syncTicker.C, housekeepingTicker.C, plegCh) { break }
(3) 三种pod更新事件的来源
func NewSourceFile(path string, nodeName string, period time.Duration, updates chan<- interface{}) { config := &sourceFile{ path: path, nodeName: nodeName, updates: updates, } glog.V(1).Infof("Watching path %q", path) go wait.Until(config.run, period, wait.NeverStop) } func (s *sourceFile) run() { if err := s.extractFromPath(); err != nil { glog.Errorf("Unable to read config path %q: %v", s.path, err) } }
func (s *sourceFile) extractFromPath() error { path := s.path statInfo, err := os.Stat(path) if err != nil { if !os.IsNotExist(err) { return err } // Emit an update with an empty PodList to allow FileSource to be marked as seen s.updates <- kubetypes.PodUpdate{Pods: []*api.Pod{}, Op: kubetypes.SET, Source: kubetypes.FileSource} return fmt.Errorf("path does not exist, ignoring") } switch { case statInfo.Mode().IsDir(): pods, err := s.extractFromDir(path) if err != nil { return err } s.updates <- kubetypes.PodUpdate{Pods: pods, Op: kubetypes.SET, Source: kubetypes.FileSource} case statInfo.Mode().IsRegular(): pod, err := s.extractFromFile(path) if err != nil { return err } s.updates <- kubetypes.PodUpdate{Pods: []*api.Pod{pod}, Op: kubetypes.SET, Source: kubetypes.FileSource} default: return fmt.Errorf("path is not a directory or file") } return nil }
func NewSourceURL(url string, header http.Header, nodeName string, period time.Duration, updates chan<- interface{}) { config := &sourceURL{ url: url, header: header, nodeName: nodeName, updates: updates, data: nil, // Timing out requests leads to retries. This client is only used to // read the the manifest URL passed to kubelet. client: &http.Client{Timeout: 10 * time.Second}, } glog.V(1).Infof("Watching URL %s", url) go wait.Until(config.run, period, wait.NeverStop) }
func (s *sourceURL) run() { if err := s.extractFromURL(); err != nil { // Don't log this multiple times per minute. The first few entries should be // enough to get the point across. if s.failureLogs < 3 { glog.Warningf("Failed to read pods from URL: %v", err) } else if s.failureLogs == 3 { glog.Warningf("Failed to read pods from URL. Dropping verbosity of this message to V(4): %v", err) } else { glog.V(4).Infof("Failed to read pods from URL: %v", err) } s.failureLogs++ } else { if s.failureLogs > 0 { glog.Info("Successfully read pods from URL.") s.failureLogs = 0 } } }
func (s *sourceURL) extractFromURL() error { req, err := http.NewRequest("GET", s.url, nil) if err != nil { return err } req.Header = s.header resp, err := s.client.Do(req) if err != nil { return err } defer resp.Body.Close() data, err := ioutil.ReadAll(resp.Body) if err != nil { return err } if resp.StatusCode != 200 { return fmt.Errorf("%v: %v", s.url, resp.Status) } if len(data) == 0 { // Emit an update with an empty PodList to allow HTTPSource to be marked as seen s.updates <- kubetypes.PodUpdate{Pods: []*api.Pod{}, Op: kubetypes.SET, Source: kubetypes.HTTPSource} return fmt.Errorf("zero-length data received from %v", s.url) } // Short circuit if the data has not changed since the last time it was read. if bytes.Compare(data, s.data) == 0 { return nil } s.data = data // First try as it is a single pod. parsed, pod, singlePodErr := tryDecodeSinglePod(data, s.applyDefaults) if parsed { if singlePodErr != nil { // It parsed but could not be used. return singlePodErr } s.updates <- kubetypes.PodUpdate{Pods: []*api.Pod{pod}, Op: kubetypes.SET, Source: kubetypes.HTTPSource} return nil } // That didn't work, so try a list of pods. parsed, podList, multiPodErr := tryDecodePodList(data, s.applyDefaults) if parsed { if multiPodErr != nil { // It parsed but could not be used. return multiPodErr } pods := make([]*api.Pod, 0) for i := range podList.Items { pods = append(pods, &podList.Items[i]) } s.updates <- kubetypes.PodUpdate{Pods: pods, Op: kubetypes.SET, Source: kubetypes.HTTPSource} return nil } return fmt.Errorf("%v: received '%v', but couldn't parse as "+ "single (%v) or multiple pods (%v).\n", s.url, string(data), singlePodErr, multiPodErr) }
// NewSourceApiserver creates a config source that watches and pulls from the apiserver. func NewSourceApiserver(c *clientset.Clientset, nodeName string, updates chan<- interface{}) { lw := cache.NewListWatchFromClient(c.CoreClient, "pods", api.NamespaceAll, fields.OneTermEqualSelector(api.PodHostField, nodeName)) newSourceApiserverFromLW(lw, updates) }
// newSourceApiserverFromLW holds creates a config source that watches and pulls from the apiserver. func newSourceApiserverFromLW(lw cache.ListerWatcher, updates chan<- interface{}) { send := func(objs []interface{}) { var pods []*api.Pod for _, o := range objs { pods = append(pods, o.(*api.Pod)) } updates <- kubetypes.PodUpdate{Pods: pods, Op: kubetypes.SET, Source: kubetypes.ApiserverSource} } cache.NewReflector(lw, &api.Pod{}, cache.NewUndeltaStore(send, cache.MetaNamespaceKeyFunc), 0).Run() }
// Run starts a watch and handles watch events. Will restart the watch if it is closed. // Run starts a goroutine and returns immediately. func (r *Reflector) Run() { glog.V(3).Infof("Starting reflector %v (%s) from %s", r.expectedType, r.resyncPeriod, r.name) go wait.Until(func() { if err := r.ListAndWatch(wait.NeverStop); err != nil { utilruntime.HandleError(err) } }, r.period, wait.NeverStop) }
(4)syncLoopIteration中的处理
func (kl *Kubelet) syncLoopIteration(configCh <-chan kubetypes.PodUpdate, handler SyncHandler, syncCh <-chan time.Time, housekeepingCh <-chan time.Time, plegCh <-chan *pleg.PodLifecycleEvent) bool { kl.syncLoopMonitor.Store(kl.clock.Now()) select { case u, open := <-configCh: // Update from a config source; dispatch it to the right handler // callback. if !open { glog.Errorf("Update channel is closed. Exiting the sync loop.") return false } kl.sourcesReady.AddSource(u.Source) switch u.Op { case kubetypes.ADD: glog.V(2).Infof("SyncLoop (ADD, %q): %q", u.Source, format.Pods(u.Pods)) // After restarting, kubelet will get all existing pods through // ADD as if they are new pods. These pods will then go through the // admission process and *may* be rejcted. This can be resolved // once we have checkpointing. handler.HandlePodAdditions(u.Pods) case kubetypes.UPDATE: glog.V(2).Infof("SyncLoop (UPDATE, %q): %q", u.Source, format.PodsWithDeletiontimestamps(u.Pods)) handler.HandlePodUpdates(u.Pods) case kubetypes.REMOVE: glog.V(2).Infof("SyncLoop (REMOVE, %q): %q", u.Source, format.Pods(u.Pods)) handler.HandlePodRemoves(u.Pods) case kubetypes.RECONCILE: glog.V(4).Infof("SyncLoop (RECONCILE, %q): %q", u.Source, format.Pods(u.Pods)) handler.HandlePodReconcile(u.Pods) case kubetypes.DELETE: glog.V(2).Infof("SyncLoop (DELETE, %q): %q", u.Source, format.Pods(u.Pods)) // DELETE is treated as a UPDATE because of graceful deletion. handler.HandlePodUpdates(u.Pods) case kubetypes.SET: // TODO: Do we want to support this? glog.Errorf("Kubelet does not support snapshot update") } case e := <-plegCh: if isSyncPodWorthy(e) { // PLEG event for a pod; sync it. pod, ok := kl.podManager.GetPodByUID(e.ID) if !ok { // If the pod no longer exists, ignore the event. glog.V(4).Infof("SyncLoop (PLEG): ignore irrelevant event: %#v", e) break } glog.V(2).Infof("SyncLoop (PLEG): %q, event: %#v", format.Pod(pod), e) handler.HandlePodSyncs([]*api.Pod{pod}) } case <-syncCh: // Sync pods waiting for sync podsToSync := kl.getPodsToSync() if len(podsToSync) == 0 { break } glog.V(4).Infof("SyncLoop (SYNC): %d pods; %s", len(podsToSync), format.Pods(podsToSync)) kl.HandlePodSyncs(podsToSync) case update := <-kl.livenessManager.Updates(): if update.Result == proberesults.Failure { // The liveness manager detected a failure; sync the pod. // We should not use the pod from livenessManager, because it is never updated after // initialization. pod, ok := kl.podManager.GetPodByUID(update.PodUID) if !ok { // If the pod no longer exists, ignore the update. glog.V(4).Infof("SyncLoop (container unhealthy): ignore irrelevant update: %#v", update) break } glog.V(1).Infof("SyncLoop (container unhealthy): %q", format.Pod(pod)) handler.HandlePodSyncs([]*api.Pod{pod}) } case <-housekeepingCh: if !kl.sourcesReady.AllReady() { // If the sources aren't ready, skip housekeeping, as we may // accidentally delete pods from unready sources. glog.V(4).Infof("SyncLoop (housekeeping, skipped): sources aren't ready yet.") } else { glog.V(4).Infof("SyncLoop (housekeeping)") if err := handler.HandlePodCleanups(); err != nil { glog.Errorf("Failed cleaning pods: %v", err) } } } kl.syncLoopMonitor.Store(kl.clock.Now()) return true }
dispatchWork
(5) dispatchWork中的处理
// dispatchWork starts the asynchronous sync of the pod in a pod worker. // If the pod is terminated, dispatchWork func (kl *Kubelet) dispatchWork(pod *api.Pod, syncType kubetypes.SyncPodType, mirrorPod *api.Pod, start time.Time) { if kl.podIsTerminated(pod) { if pod.DeletionTimestamp != nil { // If the pod is in a terminated state, there is no pod worker to // handle the work item. Check if the DeletionTimestamp has been // set, and force a status update to trigger a pod deletion request // to the apiserver. kl.statusManager.TerminatePod(pod) } return } // Run the sync in an async worker. kl.podWorkers.UpdatePod(&UpdatePodOptions{ Pod: pod, MirrorPod: mirrorPod, UpdateType: syncType, OnCompleteFunc: func(err error) { if err != nil { metrics.PodWorkerLatency.WithLabelValues(syncType.String()).Observe(metrics.SinceInMicroseconds(start)) } }, }) // Note the number of containers for new pods. if syncType == kubetypes.SyncPodCreate { metrics.ContainersPerPodCount.Observe(float64(len(pod.Spec.Containers))) } }
(6) Updatepod中的操作
// Apply the new setting to the specified pod. // If the options provide an OnCompleteFunc, the function is invoked if the update is accepted. // Update requests are ignored if a kill pod request is pending. func (p *podWorkers) UpdatePod(options *UpdatePodOptions) { pod := options.Pod uid := pod.UID var podUpdates chan UpdatePodOptions var exists bool p.podLock.Lock() defer p.podLock.Unlock() if podUpdates, exists = p.podUpdates[uid]; !exists { // We need to have a buffer here, because checkForUpdates() method that // puts an update into channel is called from the same goroutine where // the channel is consumed. However, it is guaranteed that in such case // the channel is empty, so buffer of size 1 is enough. podUpdates = make(chan UpdatePodOptions, 1) p.podUpdates[uid] = podUpdates // Creating a new pod worker either means this is a new pod, or that the // kubelet just restarted. In either case the kubelet is willing to believe // the status of the pod for the first pod worker sync. See corresponding // comment in syncPod. go func() { defer runtime.HandleCrash() p.managePodLoop(podUpdates) }() } if !p.isWorking[pod.UID] { p.isWorking[pod.UID] = true podUpdates <- *options } else { // if a request to kill a pod is pending, we do not let anything overwrite that request. update, found := p.lastUndeliveredWorkUpdate[pod.UID] if !found || update.UpdateType != kubetypes.SyncPodKill { p.lastUndeliveredWorkUpdate[pod.UID] = *options } } }
(7) updatepod起的协程在何时退出
再kubelet中有两个过程涉及到两个处理函数
一个是: HandlePodRemoves-》deletePod-》kl.podWorkers.ForgetWorker(pod.UID)-》removeWorker
另外一个是: HandlePodCleanups -》ForgetNonExistingPodWorkers-》 然后循环遍历所有的pod -》 removeWorker
(8) managepodLoop中的处理
step1:接收updatepod函数发送过来的事件消息,不断的进行处理
step2: 比较最后同步时间
step3: 进行同步处理,同步处理的函数为kubelet中的syncPod函数
k8s.io\kubernetes\pkg\kubelet\kubelet.go:1703
step4:更新最后同步的时间
step5:执行完成的回调函数:OnCompleteFunc
step6:wrapUp中设置一个时间间隔(待确定具体工作流程)
参考文章:(该文章基本上对pod的管理已经分析的很细致了,本文只是做一定的梳理)
【原创】基本k8s源码分析-----kubelet(8)pod管理
http://blog.csdn.net/screscent/article/details/51145382?locationNum=3