kubelet PLEG实现分析

kubelet的主要作用是确保pod状态和podspec保持一致,这里的pod状态包括pod中的container状态,个数等,为了达到这个目的,kubelet需要从多个来源watch pod spec的变化,并周期从container runtime获取最新的container状态。比如创建了包括三个container的pod,当其中一个container异常退出时,kubelet能通过PLEG及时发现此事件并重建container。

PLEG(pod lifecycle event generate)是kubelet中的一个子模块,周期从container runtime获取最新的container状态,生成pod生命周期事件,
并将此事件发到channel中,kubelet从channel获取事件进行相应的处理。


PLEG工作流程图如下:
在这里插入图片描述

PodLifecycleEvent定义如下

// PodLifeCycleEventType define the event type of pod life cycle events.
type PodLifeCycleEventType string

const (
	// ContainerStarted - event type when the new state of container is running.
	ContainerStarted PodLifeCycleEventType = "ContainerStarted"
	// ContainerDied - event type when the new state of container is exited.
	ContainerDied PodLifeCycleEventType = "ContainerDied"
	// ContainerRemoved - event type when the old state of container is exited.
	ContainerRemoved PodLifeCycleEventType = "ContainerRemoved"
	// PodSync is used to trigger syncing of a pod when the observed change of
	// the state of the pod cannot be captured by any single event above.
	PodSync PodLifeCycleEventType = "PodSync"
	// ContainerChanged - event type when the new state of container is unknown.
	ContainerChanged PodLifeCycleEventType = "ContainerChanged"
)

// PodLifecycleEvent is an event that reflects the change of the pod state.
type PodLifecycleEvent struct {
	// The pod ID.
	ID types.UID
	// The type of the event.
	Type PodLifeCycleEventType
	// The accompanied data which varies based on the event type.
	//   - ContainerStarted/ContainerStopped: the container name (string).
	//   - All other event types: unused.
	Data interface{}
}

PLEG对外暴露如下几个接口

// PodLifecycleEventGenerator contains functions for generating pod life cycle events.
type PodLifecycleEventGenerator interface {
	//启动relist
	Start()
	//返回pleg的通道,用来从pleg获取事件
	Watch() chan *PodLifecycleEvent
	//用来返回单次执行relist的时间是否超过relistThreshold(三分钟),如果超过relistThreshold说明container runtime响应太慢,或者pod太多,或者runtime真的出问题了
	Healthy() (bool, error)
}

PLEG使用GenericPLEG表示
type GenericPLEG struct {
	//relist周期,默认10s
	relistPeriod time.Duration
	//container runtime,用来获取最新的container状态
	runtime kubecontainer.Runtime
	//用来发送pleg事件的通道
	eventChannel chan *PodLifecycleEvent
	//pleg内部使用,用来保存从container runtime获取的所有pod信息
	podRecords podRecords
	//执行relist的当前时间,用来计算单次relist所耗时间
	relistTime atomic.Value
	//kubelet内部cache,用来保存从container runtime获取的PodStatus
	cache kubecontainer.Cache
	//记录本次relist时从container runtime获取状态失败的pod,下次relist时需要重新获取
	podsToReinspect map[types.UID]*kubecontainer.Pod
}

​1. 创建GenericPLEG

// NewGenericPLEG instantiates a new GenericPLEG object and return it.
func NewGenericPLEG(runtime kubecontainer.Runtime, channelCapacity int,
	relistPeriod time.Duration, cache kubecontainer.Cache, clock clock.Clock) PodLifecycleEventGenerator {
	return &GenericPLEG{
		relistPeriod: relistPeriod,
		runtime:      runtime,
		eventChannel: make(chan *PodLifecycleEvent, channelCapacity),
		podRecords:   make(podRecords),
		cache:        cache,
		clock:        clock,
	}
}

2. 启动GenericPLEG

Start用来周期执行relist,周期时间为10s

// Start spawns a goroutine to relist periodically.
func (g *GenericPLEG) Start() {
	go wait.Until(g.relist, g.relistPeriod, wait.NeverStop)
}

从relist的注释也能看到出,relist的作用是从container runtime获取pod/container列表,和内部维护的pod/container列表进行比较,
生成对应的事件

// relist queries the container runtime for list of pods/containers, compare
// with the internal pods/containers, and generates events accordingly.
func (g *GenericPLEG) relist() {
	klog.V(5).InfoS("GenericPLEG: Relisting")

	if lastRelistTime := g.getRelistTime(); !lastRelistTime.IsZero() {
		metrics.PLEGRelistInterval.Observe(metrics.SinceInSeconds(lastRelistTime))
	}

	timestamp := g.clock.Now()
	defer func() {
		metrics.PLEGRelistDuration.Observe(metrics.SinceInSeconds(timestamp))
	}()

	//从container runtime获取本node上所有的pod/container列表,参数true表示获取所有的,包括已经退出或者dead的
	// Get all the pods.
	podList, err := g.runtime.GetPods(true)
	if err != nil {
		klog.ErrorS(err, "GenericPLEG: Unable to retrieve pods")
		return
	}

	//更新本次执行relist时间戳
	g.updateRelistTime(timestamp)

	pods := kubecontainer.Pods(podList)
	// update running pod and container count
	updateRunningPodAndContainerMetrics(pods)
	//将本次获取的pods保存到podRecords[pid].current
	g.podRecords.setCurrent(pods)

	//核心
	//podRecords[pid].current保存的是pod最新的状态,
	//podRecords[pid].old保存的是上次relist时pod的状态,
	//通过比较新旧pod状态,生成对应事件,保存到eventsByPodID
	// Compare the old and the current pods, and generate events.
	eventsByPodID := map[types.UID][]*PodLifecycleEvent{}
	for pid := range g.podRecords {
		oldPod := g.podRecords.getOld(pid)
		pod := g.podRecords.getCurrent(pid)
		// Get all containers in the old and the new pod.
		allContainers := getContainersFromPods(oldPod, pod)
		for _, container := range allContainers {
			events := computeEvents(oldPod, pod, &container.ID)
			for _, e := range events {
				updateEvents(eventsByPodID, e)
			}
		}
	}

	var needsReinspection map[types.UID]*kubecontainer.Pod
	if g.cacheEnabled() {
		needsReinspection = make(map[types.UID]*kubecontainer.Pod)
	}

	//遍历事件eventsByPodID,将事件发到通道上
	// If there are events associated with a pod, we should update the
	// podCache.
	for pid, events := range eventsByPodID {
		pod := g.podRecords.getCurrent(pid)
		if g.cacheEnabled() {
			//将pod状态更新到cache中,如果失败则将pod加入needsReinspection,下次relist时再次更新
			// updateCache() will inspect the pod and update the cache. If an
			// error occurs during the inspection, we want PLEG to retry again
			// in the next relist. To achieve this, we do not update the
			// associated podRecord of the pod, so that the change will be
			// detect again in the next relist.
			// TODO: If many pods changed during the same relist period,
			// inspecting the pod and getting the PodStatus to update the cache
			// serially may take a while. We should be aware of this and
			// parallelize if needed.
			if err := g.updateCache(pod, pid); err != nil {
				// Rely on updateCache calling GetPodStatus to log the actual error.
				klog.V(4).ErrorS(err, "PLEG: Ignoring events for pod", "pod", klog.KRef(pod.Namespace, pod.Name))

				// make sure we try to reinspect the pod during the next relisting
				needsReinspection[pid] = pod

				continue
			} else {
				// this pod was in the list to reinspect and we did so because it had events, so remove it
				// from the list (we don't want the reinspection code below to inspect it a second time in
				// this relist execution)
				delete(g.podsToReinspect, pid)
			}
		}
		//将本次获取的pods更新到podRecords[pid].old
		// Update the internal storage and send out the events.
		g.podRecords.update(pid)

		// Map from containerId to exit code; used as a temporary cache for lookup
		containerExitCode := make(map[string]int)

		for i := range events {
			//不关心ContainerChanged事件,此事件说明还不稳定,没必要发送,即使发送了可能还好很快产生其他事件,
			//影响效率,而且其他模块对此事件也不感兴趣
			// Filter out events that are not reliable and no other components use yet.
			if events[i].Type == ContainerChanged {
				continue
			}
			select {
			//发送事件到通道上,kubelet会从通道获取事件进行处理
			case g.eventChannel <- events[i]:
			default:
				metrics.PLEGDiscardEvents.Inc()
				klog.ErrorS(nil, "Event channel is full, discard this relist() cycle event")
			}
			...
		}
	}

	if g.cacheEnabled() {
		// reinspect any pods that failed inspection during the previous relist
		if len(g.podsToReinspect) > 0 {
			klog.V(5).InfoS("GenericPLEG: Reinspecting pods that previously failed inspection")
			for pid, pod := range g.podsToReinspect {
				if err := g.updateCache(pod, pid); err != nil {
					// Rely on updateCache calling GetPodStatus to log the actual error.
					klog.V(5).ErrorS(err, "PLEG: pod failed reinspection", "pod", klog.KRef(pod.Namespace, pod.Name))
					needsReinspection[pid] = pod
				}
			}
		}

		// Update the cache timestamp.  This needs to happen *after*
		// all pods have been properly updated in the cache.
		g.cache.UpdateTime(timestamp)
	}

	// make sure we retain the list of pods that need reinspecting the next time relist is called
	g.podsToReinspect = needsReinspection
}

computeEvents比较pod中container的新旧状态生成事件

func computeEvents(oldPod, newPod *kubecontainer.Pod, cid *kubecontainer.ContainerID) []*PodLifecycleEvent {
	var pid types.UID
	if oldPod != nil {
		pid = oldPod.ID
	} else if newPod != nil {
		pid = newPod.ID
	}
	//获取container旧状态
	oldState := getContainerState(oldPod, cid)
	//获取container新状态
	newState := getContainerState(newPod, cid)
	//根据新旧状态生成事件
	return generateEvents(pid, cid.ID, oldState, newState)
}

func generateEvents(podID types.UID, cid string, oldState, newState plegContainerState) []*PodLifecycleEvent {
	//如果新旧状态相同,说明此container没发送变化,则不生成事件
	if newState == oldState {
		return nil
	}

	klog.V(4).InfoS("GenericPLEG", "podUID", podID, "containerID", cid, "oldState", oldState, "newState", newState)
	switch newState {
	case plegContainerRunning:
		return []*PodLifecycleEvent{{ID: podID, Type: ContainerStarted, Data: cid}}
	case plegContainerExited:
		return []*PodLifecycleEvent{{ID: podID, Type: ContainerDied, Data: cid}}
	case plegContainerUnknown:
		return []*PodLifecycleEvent{{ID: podID, Type: ContainerChanged, Data: cid}}
	case plegContainerNonExistent:
		switch oldState {
		case plegContainerExited:
			// We already reported that the container died before.
			return []*PodLifecycleEvent{{ID: podID, Type: ContainerRemoved, Data: cid}}
		default:
			return []*PodLifecycleEvent{{ID: podID, Type: ContainerDied, Data: cid}, {ID: podID, Type: ContainerRemoved, Data: cid}}
		}
	default:
		panic(fmt.Sprintf("unrecognized container state: %v", newState))
	}
}

3. kubelet会监听PLEG的通道获取pod的事件

syncLoop为kubelet的主循环

func (kl *Kubelet) syncLoop(updates <-chan kubetypes.PodUpdate, handler SyncHandler)
	//获取pleg的通道
	plegCh := kl.pleg.Watch()
	const (
		base   = 100 * time.Millisecond
		max    = 5 * time.Second
		factor = 2
	)
	duration := base
	...
	for {
		//3.1 如果runtime健康检查异常后,需要sleep一段时间,不执行pod同步操作
		//sleep的时间最少100毫秒,每次乘2,最大值5s
		if err := kl.runtimeState.runtimeErrors(); err != nil {
			klog.ErrorS(err, "Skipping pod synchronization")
			// exponential backoff
			time.Sleep(duration)
			duration = time.Duration(math.Min(float64(max), factor*float64(duration)))
			continue
		}
		// reset backoff if we have a success
		duration = base

		kl.syncLoopMonitor.Store(kl.clock.Now())
		//3.2 从各种channel获取时间
		if !kl.syncLoopIteration(updates, handler, syncTicker.C, housekeepingTicker.C, plegCh) {
			break
		}
		kl.syncLoopMonitor.Store(kl.clock.Now())
	}

3.1 runtimeErrors用来调用健康检查查看runtime状态是否正常

NewMainKubelet(...)
	klet.runtimeState = newRuntimeState(maxWaitForContainerRuntime)
	//注册PLEG的健康检查函数Healthy
	klet.runtimeState.addHealthCheck("PLEG", klet.pleg.Healthy)

// Healthy check if PLEG work properly.
// relistThreshold is the maximum interval between two relist.
func (g *GenericPLEG) Healthy() (bool, error) {
	//获取执行relist时的时间戳
	relistTime := g.getRelistTime()
	//如果为0说明还未开始relist,返回false
	if relistTime.IsZero() {
		return false, fmt.Errorf("pleg has yet to be successful")
	}
	// Expose as metric so you can alert on `time()-pleg_last_seen_seconds > nn`
	metrics.PLEGLastSeen.Set(float64(relistTime.Unix()))
	//计算超过了多久
	elapsed := g.clock.Since(relistTime)
	//如果超过了三分钟,则返回false
	if elapsed > relistThreshold {
		return false, fmt.Errorf("pleg was last seen active %v ago; threshold is %v", elapsed, relistThreshold)
	}
	return true, nil
}

func (s *runtimeState) runtimeErrors() error {
	s.RLock()
	defer s.RUnlock()
	errs := []error{}
	if s.lastBaseRuntimeSync.IsZero() {
		errs = append(errs, errors.New("container runtime status check may not have completed yet"))
	} else if !s.lastBaseRuntimeSync.Add(s.baseRuntimeSyncThreshold).After(time.Now()) {
		errs = append(errs, errors.New("container runtime is down"))
	}
	//遍历healthChecks,目前只有PLEG注册的Healthy
	for _, hc := range s.healthChecks {
		//执行 Healthy,如果返回false说明runtime还未准备好,或者不健康
		if ok, err := hc.fn(); !ok {
			errs = append(errs, fmt.Errorf("%s is not healthy: %v", hc.name, err))
		}
	}
	if s.runtimeError != nil {
		errs = append(errs, s.runtimeError)
	}

	return utilerrors.NewAggregate(errs)
}

3.2 syncLoopIteration用来从channle获取事件执行
syncLoopIteration监听多种channel,这里只看pleg的channel

func (kl *Kubelet) syncLoopIteration(configCh <-chan kubetypes.PodUpdate, handler SyncHandler,
	syncCh <-chan time.Time, housekeepingCh <-chan time.Time, plegCh <-chan *pleg.PodLifecycleEvent) bool {
	select {
	case u, open := <-configCh:
	...
	case e := <-plegCh:
		if e.Type == pleg.ContainerStarted {
			// record the most recent time we observed a container start for this pod.
			// this lets us selectively invalidate the runtimeCache when processing a delete for this pod
			// to make sure we don't miss handling graceful termination for containers we reported as having started.
			kl.lastContainerStartedTime.Add(e.ID, time.Now())
		}
		//只要事件类型不是ContainerRemoved,则执行HandlePodSyncs进行pod同步
		if isSyncPodWorthy(e) {
			// PLEG event for a pod; sync it.
			if pod, ok := kl.podManager.GetPodByUID(e.ID); ok {
				klog.V(2).InfoS("SyncLoop (PLEG): event for pod", "pod", klog.KObj(pod), "event", e)
				handler.HandlePodSyncs([]*v1.Pod{pod})
			} else {
				// If the pod no longer exists, ignore the event.
				klog.V(4).InfoS("SyncLoop (PLEG): pod does not exist, ignore irrelevant event", "event", e)
			}
		}
		//事件类型为ContainerDied,则调用runtime接口清除此container信息
		if e.Type == pleg.ContainerDied {
			if containerID, ok := e.Data.(string); ok {
				kl.cleanUpContainersInPod(e.ID, containerID)
			}
		}
	case <-syncCh:
	...
	}

后续流程的调用链如下,也可参考上面的流程图,就不再贴代码了
HandlePodSyncs -> dispatchWork -> podWorkers.UpdatePod -> managePodLoop -> syncPod

参考:https://github.com/kubernetes/design-proposals-archive/blob/main/node/pod-lifecycle-event-generator.md

  • 0
    点赞
  • 6
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
PLEG(捧腹大笑)确实不是一种健康的行为,但作为一种娱乐活动,我们可以通过一些方法来确保它对我们的身体和心理健康的影响最小化。 首先,我们可以适度参与PLEG活动。过度的大笑可能导致呼吸困难、头痛和肌肉疼痛等身体不适。因此,我们应该控制自己的笑声,尤其是在感到疲劳或身体不适时。我们可以使用一些轻松的笑声技巧,如微笑或小范围的笑声,来减少对身体的压力。 其次,我们可以在PLEG活动中保持适当的坐姿。长时间的盘腿坐姿可能导致腰部和腿部的不适,所以我们应该适时地调整姿势,放松身体。我们可以选择合适的座位,使用靠垫或坐垫来提供支撑和舒适度。 另外,我们可以在PLEG活动中注意养成良好的呼吸习惯。深呼吸可以帮助我们放松身心,并增加氧气供应。当我们笑得太过激烈时,我们可以尝试一些深呼吸的练习来保持身体和心理的平衡。 最后,我们应该选择健康的PLEG活动。这意味着我们在选择娱乐节目或社交活动时要注意是否给予我们积极和愉快的体验。我们可以选择与好友一起欣赏正面积极的喜剧节目或是参加健康有趣的户外活动,以确保我们的笑声带来积极的影响。 综上所述,虽然PLEG不是一种健康的行为,但我们可以通过适度参与、保持适当的姿势、养成良好的呼吸习惯以及选择健康的PLEG活动来减少对身心健康的不良影响。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值