kubelet源码分析 kuberuntime的syncpod、killpod函数(一)

kubelet源码分析 kuberuntime的syncpod、killpod函数(一)

上篇介绍的kubelet.go中的syncpod函数,走到后面就是到kuberuntime包中。
kuberuntime包中的syncpod函数任务比较大,分三章介绍。总体流程逻辑步骤是

  1. 计算沙箱和容器的变化。
  2. 如果有必要,杀死pod沙盒。
  3. 关闭任何不应该运行的容器。
  4. 必要时创建沙盒。
  5. 创建短暂的容器。
  6. 创建初始容器。
  7. 调整正在运行的容器的大小(如果InPlacePodVerticalScaling==true)。
  8. 创建普通容器。

这篇主要介绍第一步,计算沙箱和容器的变化。(computePodActions函数)

一、syncpod函数

整体流程如下,这章主要介绍computePodActions函数计算沙箱和容器的变化。

func (m *kubeGenericRuntimeManager) SyncPod(ctx context.Context, pod *v1.Pod, podStatus *kubecontainer.PodStatus, pullSecrets []v1.Secret, backOff *flowcontrol.Backoff) (result kubecontainer.PodSyncResult) {
	// Step 1: 计算沙箱和容器的变化。
	podContainerChanges := m.computePodActions(ctx, pod, podStatus)
	klog.V(3).InfoS("computePodActions got for pod", "podActions", podContainerChanges, "pod", klog.KObj(pod))
	if podContainerChanges.CreateSandbox {
		ref, err := ref.GetReference(legacyscheme.Scheme, pod)
		if err != nil {
			klog.ErrorS(err, "Couldn't make a ref to pod", "pod", klog.KObj(pod))
		}
		if podContainerChanges.SandboxID != "" {
			m.recorder.Eventf(ref, v1.EventTypeNormal, events.SandboxChanged, "Pod sandbox changed, it will be killed and re-created.")
		} else {
			klog.V(4).InfoS("SyncPod received new pod, will create a sandbox for it", "pod", klog.KObj(pod))
		}
	}

	// Step 2: 如果有必要,杀死pod沙盒。
	if podContainerChanges.KillPod {
		if podContainerChanges.CreateSandbox {
			klog.V(4).InfoS("Stopping PodSandbox for pod, will start new one", "pod", klog.KObj(pod))
		} else {
			klog.V(4).InfoS("Stopping PodSandbox for pod, because all other containers are dead", "pod", klog.KObj(pod))
		}

		killResult := m.killPodWithSyncResult(ctx, pod, kubecontainer.ConvertPodStatusToRunningPod(m.runtimeName, podStatus), nil)
		result.AddPodSyncResult(killResult)
		if killResult.Error() != nil {
			klog.ErrorS(killResult.Error(), "killPodWithSyncResult failed")
			return
		}

		if podContainerChanges.CreateSandbox {
			m.purgeInitContainers(ctx, pod, podStatus)
		}
	} else {
		// Step 3: 关闭任何不应该运行的容器。
		for containerID, containerInfo := range podContainerChanges.ContainersToKill {
			klog.V(3).InfoS("Killing unwanted container for pod", "containerName", containerInfo.name, "containerID", containerID, "pod", klog.KObj(pod))
			killContainerResult := kubecontainer.NewSyncResult(kubecontainer.KillContainer, containerInfo.name)
			result.AddSyncResult(killContainerResult)
			if err := m.killContainer(ctx, pod, containerID, containerInfo.name, containerInfo.message, containerInfo.reason, nil); err != nil {
				killContainerResult.Fail(kubecontainer.ErrKillContainer, err.Error())
				klog.ErrorS(err, "killContainer for pod failed", "containerName", containerInfo.name, "containerID", containerID, "pod", klog.KObj(pod))
				return
			}
		}
	}

	m.pruneInitContainersBeforeStart(ctx, pod, podStatus)
	var podIPs []string
	if podStatus != nil {
		podIPs = podStatus.IPs
	}

	// Step 4: 必要时创建沙盒。
	podSandboxID := podContainerChanges.SandboxID
	if podContainerChanges.CreateSandbox {
		var msg string
		var err error

		klog.V(4).InfoS("Creating PodSandbox for pod", "pod", klog.KObj(pod))
		metrics.StartedPodsTotal.Inc()
		createSandboxResult := kubecontainer.NewSyncResult(kubecontainer.CreatePodSandbox, format.Pod(pod))
		result.AddSyncResult(createSandboxResult)
sysctl.ConvertPodSysctlsVariableToDotsSeparator(pod.Spec.SecurityContext)
utilfeature.DefaultFeatureGate.Enabled(features.DynamicResourceAllocation) {
			if m.runtimeHelper.PrepareDynamicResources(pod) != nil {
				return
			}
		}

		podSandboxID, msg, err = m.createPodSandbox(ctx, pod, podContainerChanges.Attempt)
		if err != nil {
			if m.podStateProvider.IsPodTerminationRequested(pod.UID) {
				klog.V(4).InfoS("Pod was deleted and sandbox failed to be created", "pod", klog.KObj(pod), "podUID", pod.UID)
				return
			}
			metrics.StartedPodsErrorsTotal.Inc()
			createSandboxResult.Fail(kubecontainer.ErrCreatePodSandbox, msg)
			klog.ErrorS(err, "CreatePodSandbox for pod failed", "pod", klog.KObj(pod))
			ref, referr := ref.GetReference(legacyscheme.Scheme, pod)
			if referr != nil {
				klog.ErrorS(referr, "Couldn't make a ref to pod", "pod", klog.KObj(pod))
			}
			m.recorder.Eventf(ref, v1.EventTypeWarning, events.FailedCreatePodSandBox, "Failed to create pod sandbox: %v", err)
			return
		}
		klog.V(4).InfoS("Created PodSandbox for pod", "podSandboxID", podSandboxID, "pod", klog.KObj(pod))

		resp, err := m.runtimeService.PodSandboxStatus(ctx, podSandboxID, false)
		if err != nil {
			ref, referr := ref.GetReference(legacyscheme.Scheme, pod)
			if referr != nil {
				klog.ErrorS(referr, "Couldn't make a ref to pod", "pod", klog.KObj(pod))
			}
			m.recorder.Eventf(ref, v1.EventTypeWarning, events.FailedStatusPodSandBox, "Unable to get pod sandbox status: %v", err)
			klog.ErrorS(err, "Failed to get pod sandbox status; Skipping pod", "pod", klog.KObj(pod))
			result.Fail(err)
			return
		}
		if resp.GetStatus() == nil {
			result.Fail(errors.New("pod sandbox status is nil"))
			return
		}


		if !kubecontainer.IsHostNetworkPod(pod) {
			podIPs = m.determinePodSandboxIPs(pod.Namespace, pod.Name, resp.GetStatus())
			klog.V(4).InfoS("Determined the ip for pod after sandbox changed", "IPs", podIPs, "pod", klog.KObj(pod))
		}
	}

	podIP := ""
	if len(podIPs) != 0 {
		podIP = podIPs[0]
	}

	
	configPodSandboxResult := kubecontainer.NewSyncResult(kubecontainer.ConfigPodSandbox, podSandboxID)
	result.AddSyncResult(configPodSandboxResult)
	podSandboxConfig, err := m.generatePodSandboxConfig(pod, podContainerChanges.Attempt)
	if err != nil {
		message := fmt.Sprintf("GeneratePodSandboxConfig for pod %q failed: %v", format.Pod(pod), err)
		klog.ErrorS(err, "GeneratePodSandboxConfig for pod failed", "pod", klog.KObj(pod))
		configPodSandboxResult.Fail(kubecontainer.ErrConfigPodSandbox, message)
		return
	}
	//创建匿名函数,创建容器
	start := func(ctx context.Context, typeName, metricLabel string, spec *startSpec) error {
		startContainerResult := kubecontainer.NewSyncResult(kubecontainer.StartContainer, spec.container.Name)
		result.AddSyncResult(startContainerResult)

		isInBackOff, msg, err := m.doBackOff(pod, spec.container, podStatus, backOff)
		if isInBackOff {
			startContainerResult.Fail(err, msg)
			klog.V(4).InfoS("Backing Off restarting container in pod", "containerType", typeName, "container", spec.container, "pod", klog.KObj(pod))
			return err
		}

		metrics.StartedContainersTotal.WithLabelValues(metricLabel).Inc()
		if sc.HasWindowsHostProcessRequest(pod, spec.container) {
			metrics.StartedHostProcessContainersTotal.WithLabelValues(metricLabel).Inc()
		}
		klog.V(4).InfoS("Creating container in pod", "containerType", typeName, "container", spec.container, "pod", klog.KObj(pod))
		if msg, err := m.startContainer(ctx, podSandboxID, podSandboxConfig, spec, pod, podStatus, pullSecrets, podIP, podIPs); err != nil {
			metrics.StartedContainersErrorsTotal.WithLabelValues(metricLabel, err.Error()).Inc()
			if sc.HasWindowsHostProcessRequest(pod, spec.container) {
				metrics.StartedHostProcessContainersErrorsTotal.WithLabelValues(metricLabel, err.Error()).Inc()
			}
			startContainerResult.Fail(err, msg)
			switch {
			case err == images.ErrImagePullBackOff:
				klog.V(3).InfoS("Container start failed in pod", "containerType", typeName, "container", spec.container, "pod", klog.KObj(pod), "containerMessage", msg, "err", err)
			default:
				utilruntime.HandleError(fmt.Errorf("%v %+v start failed in pod %v: %v: %s", typeName, spec.container, format.Pod(pod), err, msg))
			}
			return err
		}

		return nil
	}

	// Step 5: 创建短暂的容器。
	for _, idx := range podContainerChanges.EphemeralContainersToStart {
		start(ctx, "ephemeral container", metrics.EphemeralContainer, ephemeralContainerStartSpec(&pod.Spec.EphemeralContainers[idx]))
	}

	// Step 6: 创建初始容器。
	if container := podContainerChanges.NextInitContainerToStart; container != nil {
		if err := start(ctx, "init container", metrics.InitContainer, containerStartSpec(container)); err != nil {
			return
		}
		klog.V(4).InfoS("Completed init container for pod", "containerName", container.Name, "pod", klog.KObj(pod))
	}
	// Step 7: 调整正在运行的容器的大小(如果InPlacePodVerticalScaling==true)。
	if isInPlacePodVerticalScalingAllowed(pod) {
		if len(podContainerChanges.ContainersToUpdate) > 0 || podContainerChanges.UpdatePodResources {
			m.doPodResizeAction(pod, podStatus, podContainerChanges, result)
		}
	}

	// Step 8: 创建普通容器。
	for _, idx := range podContainerChanges.ContainersToStart {
		start(ctx, "container", metrics.Container, containerStartSpec(&pod.Spec.Containers[idx]))
	}

	return
}

二、computePodActions函数

  • 获取到sandbox和容器的变化,2.1函数介绍
  • 如果返回true,代表需要创建创建sndbox(那init容器和其他容器都需要重启)
  • 如果容器设置的用不重启,并且重启次数大于0,则将重启变成false并直接返回
  • 如果重启容器个数为0,则计算下一个init是否完成,如果也完成,就没有要重启的容器,则不需要重启sandbox了
  • 如果init不等于0,因为sanbox要重启,所以init也要重启,直接获取第一个init重启即可
  • 如果init容器没有,则重启普通的容器
  • 到这里代表sandbox不重启,则要计算其他容器的重启顺序了
  • 首先启动的事init。找到下一个要运行的init容器。函数2.2findNextInitContainerToRun
  • 如果有下一个init要启动。如果initLastStatus不为空则代表当前init有错误,并且isInitContainerFailed函数返回true代表init有错误(oom,退出码!=0,unknown)
  • 如果确实当前init错误并且重启策略不是用不重启,则killpod=ture,代表需要删掉所有pod,也就是init容器失败了删除整个pod的容器(sandbox容器也会被删除)
  • 如果错误不等于空,并且错误是未知,记录原因
  • 如果init没全部执行完成,但是没有下一个init要执行。则代表当前init容器正在running,没有运行完成,返回
  • 如果设置了就地升级,则初始化一下
  • 如果init容器也不需要重启,则到了关键的普通容器的处理。
  • 如果运行状态不为空,并且不是running,代表失败的,准备删掉。先把生命周期性相关的停止掉。
  • 如果容器运行状态==nil或者状态不是running,验证是否重启,需要重启的添加到结构体中
  • 如果是running的容器,则要校验。容器配置(spec)与运行时(status)是否发生了改变
  • 如果有变化,但是没有开启了就地升级或者去掉Resources的配置还有其他变化的话,则需要重启。
  • 如果资源没变化,或者开启了就地升级并且值更改了Resources,则不需要重启,保持这个pod
  • 果可以重启,则把这些容器也加入到重启
  • 如果需要保持的和需要重启的都是0,则删了所有pod
func (m *kubeGenericRuntimeManager) computePodActions(ctx context.Context, pod *v1.Pod, podStatus *kubecontainer.PodStatus) podActions {
	klog.V(5).InfoS("Syncing Pod", "pod", klog.KObj(pod))
	//获取到sandbox和容器的变化,2.1函数介绍
	createPodSandbox, attempt, sandboxID := runtimeutil.PodSandboxChanged(pod, podStatus)
	changes := podActions{
		KillPod:           createPodSandbox,
		CreateSandbox:     createPodSandbox,
		SandboxID:         sandboxID,
		Attempt:           attempt,
		ContainersToStart: []int{},
		ContainersToKill:  make(map[kubecontainer.ContainerID]containerToKillInfo),
	}
	//如果返回true,代表需要创建sndbox(那init容器和其他容器都需要重启)
	if createPodSandbox {
	    //如果容器设置的用不重启,并且重启次数大于0,则将重启变成false并直接返回
		if !shouldRestartOnFailure(pod) && attempt != 0 && len(podStatus.ContainerStatuses) != 0 {
			changes.CreateSandbox = false
			return changes
		}
		//计算要重启的容器个数
		var containersToStart []int
		for idx, c := range pod.Spec.Containers {
		    //如果容器重启策略是失败重启但是容器退出码=0,则跳过这个
			if pod.Spec.RestartPolicy == v1.RestartPolicyOnFailure && containerSucceeded(&c, podStatus) {
				continue
			}
			containersToStart = append(containersToStart, idx)
		}
		//如果重启容器个数为0
		if len(containersToStart) == 0 {
		    //找到下一个init容器,函数2.2findNextInitContainerToRun
			_, _, done := findNextInitContainerToRun(pod, podStatus)
			//如果init函数都执行完了,则没有需要重启的容器了,那sandbox也不需要重启了
			if done {
				changes.CreateSandbox = false
				return changes
			}
		}
		//如果init不等于0,因为sanbox要重启,所以init也要重启,直接获取第一个init重启即可
		if len(pod.Spec.InitContainers) != 0 {
			changes.NextInitContainerToStart = &pod.Spec.InitContainers[0]
			return changes
		}
		//如果init容器没有,则重启普通的容器
		changes.ContainersToStart = containersToStart
		return changes
	}
	//到这里代表sandbox不重启,则要计算其他容器的重启顺序了
	//遍历所有临时容器并添加
	for i := range pod.Spec.EphemeralContainers {
		c := (*v1.Container)(&pod.Spec.EphemeralContainers[i].EphemeralContainerCommon)

		if podStatus.FindContainerStatusByName(c.Name) == nil {
			changes.EphemeralContainersToStart = append(changes.EphemeralContainersToStart, i)
		}
	}
	//找到下一个要运行的init容器。函数2.2findNextInitContainerToRun
	initLastStatus, next, done := findNextInitContainerToRun(pod, podStatus)
	//如果没有执行完,则先启动init容器
	if !done {
	    //如果有下一个init要启动
		if next != nil {
		    //如果initLastStatus不为空则代表当前init有错误,并且isInitContainerFailed函数返回true代表init有错误(oom,退出码!=0,unknown)
			initFailed := initLastStatus != nil && isInitContainerFailed(initLastStatus)
			//如果确实当前init错误并且重启策略不是用不重启,则killpod=ture,代表需要删掉所有pod,也就是init容器失败了删除整个pod的容器(sandbox容器也会被删除)
			if initFailed && !shouldRestartOnFailure(pod) {
				changes.KillPod = true
			} else {
			    //如果错误不等于空,并且错误是未知,记录原因
				if initLastStatus != nil && initLastStatus.State == kubecontainer.ContainerStateUnknown {
					changes.ContainersToKill[initLastStatus.ID] = containerToKillInfo{
						name:      next.Name,
						container: next,
						message: fmt.Sprintf("Init container is in %q state, try killing it before restart",
							initLastStatus.State),
						reason: reasonUnknown,
					}
				}
				//下一个要重启的init容器就是当前这个
				changes.NextInitContainerToStart = next
			}
		}
		//如果init没全部执行完成,但是没有下一个init要执行。则代表当前init容器正在running,没有运行完成,返回
		return changes
	}
	//如果设置了就地升级,则初始化一下
	if isInPlacePodVerticalScalingAllowed(pod) {
		changes.ContainersToUpdate = make(map[v1.ResourceName][]containerToUpdateInfo)
		latestPodStatus, err := m.GetPodStatus(ctx, podStatus.ID, pod.Name, pod.Namespace)
		if err == nil {
			podStatus = latestPodStatus
		}
	}

    //要保持的容器数
	keepCount := 0

	for idx, container := range pod.Spec.Containers {
		containerStatus := podStatus.FindContainerStatusByName(container.Name)
		//如果运行状态不为空,并且不是running,代表失败的,准备删掉。先把生命周期性相关的停止掉。
		if containerStatus != nil && containerStatus.State != kubecontainer.ContainerStateRunning {
			if err := m.internalLifecycle.PostStopContainer(containerStatus.ID.ID); err != nil {
				klog.ErrorS(err, "Internal container post-stop lifecycle hook failed for container in pod with error",
					"containerName", container.Name, "pod", klog.KObj(pod))
			}
		}
		//如果容器运行状态==nil或者状态不是running
		if containerStatus == nil || containerStatus.State != kubecontainer.ContainerStateRunning {
		     //是否需要重启。函数2.3
			if kubecontainer.ShouldContainerBeRestarted(&container, pod, podStatus) {
				klog.V(3).InfoS("Container of pod is not in the desired state and shall be started", "containerName", container.Name, "pod", klog.KObj(pod))
				//加入到需要重启的列表里。
				changes.ContainersToStart = append(changes.ContainersToStart, idx)
				//如果状态是未知,做日志记录
				if containerStatus != nil && containerStatus.State == kubecontainer.ContainerStateUnknown {
					changes.ContainersToKill[containerStatus.ID] = containerToKillInfo{
						name:      containerStatus.Name,
						container: &pod.Spec.Containers[idx],
						message: fmt.Sprintf("Container is in %q state, try killing it before restart",
							containerStatus.State),
						reason: reasonUnknown,
					}
				}
			}
			continue
		}
		var message string
		var reason containerKillReason
		//如果不是用不重启就返回true
		restart := shouldRestartOnFailure(pod)
		//到这里,就都是running的容器了。如果容器配置(spec)与运行时(status)是否发生了改变。这个函数就是把字符串转成hash,然后比较hash值。
		//如果有变化,但是没有开启了就地升级或者去掉Resources的配置还有其他变化的话,则需要重启。
		if _, _, changed := containerChanged(&container, containerStatus); changed &&
			(!isInPlacePodVerticalScalingAllowed(pod) ||
				kubecontainer.HashContainerWithoutResources(&container) != containerStatus.HashWithoutResources) {
			message = fmt.Sprintf("Container %s definition changed", container.Name)
			restart = true
		} else if liveness, found := m.livenessManager.Get(containerStatus.ID); found && liveness == proberesults.Failure {
		    //如果是liveness失败,记录原因
			message = fmt.Sprintf("Container %s failed liveness probe", container.Name)
			reason = reasonLivenessProbe
		} else if startup, found := m.startupManager.Get(containerStatus.ID); found && startup == proberesults.Failure {
			message = fmt.Sprintf("Container %s failed startup probe", container.Name)
			reason = reasonStartupProbe
		} else if isInPlacePodVerticalScalingAllowed(pod) && !m.computePodResizeAction(pod, idx, containerStatus, &changes) {
		    //如果是开启了就地升级并且值改了Resources,则不需要重启,保持这个pod。函数2.4computePodResizeAction,否则则记录到重启结构体中(函数内处理)
			continue
		} else {
		    //如果资源没变化,或者开启了就地升级并且值更改了Resources,则不需要重启,保持这个pod
			keepCount++
			continue
		}
		//如果可以重启,则把这些容器也加入到重启立
		if restart {
			message = fmt.Sprintf("%s, will be restarted", message)
			changes.ContainersToStart = append(changes.ContainersToStart, idx)
		}

		changes.ContainersToKill[containerStatus.ID] = containerToKillInfo{
			name:      containerStatus.Name,
			container: &pod.Spec.Containers[idx],
			message:   message,
			reason:    reason,
		}
		klog.V(2).InfoS("Message for Container of pod", "containerName", container.Name, "containerStatusID", containerStatus.ID, "pod", klog.KObj(pod), "containerMessage", message)
	}
	//如果需要保持的和需要重启的都是0,则也不需要sandbox容器
	if keepCount == 0 && len(changes.ContainersToStart) == 0 {
		changes.KillPod = true
	}

	return changes
}

2.1PodSandboxChanged函数

  • 如果sandbox为空,则需要重启
  • 计数ready的sandbox数量
  • 如果超过一个sandbox,则代表错误,需要重启
  • 如果第一个sandbox状态不是ready,也需要重启
  • 网络命名空间改变了,重启
  • 如果network未设置,重启
func PodSandboxChanged(pod *v1.Pod, podStatus *kubecontainer.PodStatus) (bool, uint32, string) {
    //如果sandbox为空,则需要重启
	if len(podStatus.SandboxStatuses) == 0 {
		klog.V(2).InfoS("No sandbox for pod can be found. Need to start a new one", "pod", klog.KObj(pod))
		return true, 0, ""
	}

	readySandboxCount := 0
	//计数ready的sandbox数量
	for _, s := range podStatus.SandboxStatuses {
		if s.State == runtimeapi.PodSandboxState_SANDBOX_READY {
			readySandboxCount++
		}
	}
	sandboxStatus := podStatus.SandboxStatuses[0]
	//如果超过一个sandbox,则代表错误,需要重启
	if readySandboxCount > 1 {
		klog.V(2).InfoS("Multiple sandboxes are ready for Pod. Need to reconcile them", "pod", klog.KObj(pod))
		return true, sandboxStatus.Metadata.Attempt + 1, sandboxStatus.Id
	}
	//如果第一个sandbox状态不是ready,也需要重启
	if sandboxStatus.State != runtimeapi.PodSandboxState_SANDBOX_READY {
		klog.V(2).InfoS("No ready sandbox for pod can be found. Need to start a new one", "pod", klog.KObj(pod))
		return true, sandboxStatus.Metadata.Attempt + 1, sandboxStatus.Id
	}
	//网络命名空间改变了,重启
	if sandboxStatus.GetLinux().GetNamespaces().GetOptions().GetNetwork() != NetworkNamespaceForPod(pod) {
		klog.V(2).InfoS("Sandbox for pod has changed. Need to start a new one", "pod", klog.KObj(pod))
		return true, sandboxStatus.Metadata.Attempt + 1, ""
	}
	//如果network未设置,重启
	if !kubecontainer.IsHostNetworkPod(pod) && sandboxStatus.Network != nil && sandboxStatus.Network.Ip == "" {
		klog.V(2).InfoS("Sandbox for pod has no IP address. Need to start a new one", "pod", klog.KObj(pod))
		return true, sandboxStatus.Metadata.Attempt + 1, sandboxStatus.Id
	}

	return false, sandboxStatus.Metadata.Attempt, sandboxStatus.Id
}

2.2 findNextInitContainerToRun函数

这里返回的三个参数分别代表 status(当前的容器,只有运行失败的才返回)next(下一个要运行的init容器)done(true代表全部执行完成)

  • 如果没有init容器,返回结果,当前状态为nil,下一个为nil,全部完成=true
  • 如果普通容器都是running,则不用校验init了
  • 从最后一个init往前校验
  • 如果是失败的,则返回当前状态。返回当前的容器未下一个,是否运行完成=false
  • 如果在运行中,返回是否完成=false
  • 如果正常退出,并且是最后一个,是否允许完成返回true
  • 否则把下一个返回
func findNextInitContainerToRun(pod *v1.Pod, podStatus *kubecontainer.PodStatus) (status *kubecontainer.Status, next *v1.Container, done bool) {
    //如果没有init容器,返回结果,当前状态为nil,下一个为nil,全部完成=true
	if len(pod.Spec.InitContainers) == 0 {
		return nil, nil, true
	}
	for i := range pod.Spec.Containers {
		container := &pod.Spec.Containers[i]
		status := podStatus.FindContainerStatusByName(container.Name)
		if status != nil && status.State == kubecontainer.ContainerStateRunning {
		 //如果普通容器都是running,则不用校验init了
			return nil, nil, true
		}
	}
	//从最后一个init往前校验
	for i := len(pod.Spec.InitContainers) - 1; i >= 0; i-- {
		container := &pod.Spec.InitContainers[i]
		status := podStatus.FindContainerStatusByName(container.Name)
		//如果是失败的,则返回当前状态。返回当前的容器未下一个,是否运行完成=false
		if status != nil && isInitContainerFailed(status) {
			return status, container, false
		}
	}

	for i := len(pod.Spec.InitContainers) - 1; i >= 0; i-- {
		container := &pod.Spec.InitContainers[i]
		status := podStatus.FindContainerStatusByName(container.Name)
		if status == nil {
			continue
		}
		//如果在运行中,返回是否完成=false
		if status.State == kubecontainer.ContainerStateRunning {
			return nil, nil, false
		}

		if status.State == kubecontainer.ContainerStateExited {
			//如果正常退出,并且是最后一个,是否允许完成返回true
			if i == (len(pod.Spec.InitContainers) - 1) {
				return nil, nil, true
			}
			//否则把下一个返回
			return nil, &pod.Spec.InitContainers[i+1], false
		}
	}

	return nil, &pod.Spec.InitContainers[0], false
}

2.3 ShouldContainerBeRestarted函数

func ShouldContainerBeRestarted(container *v1.Container, pod *v1.Pod, podStatus *PodStatus) bool {
    //如果有删除时间,则代表既要删除的,不用重启
	if pod.DeletionTimestamp != nil {
		return false
	}

	status := podStatus.FindContainerStatusByName(container.Name)
	//运行状态为空,需要重启
	if status == nil {
		return true
	}
	//运行状态为runnig,不需要重启
	if status.State == ContainerStateRunning {
		return false
	}
	//运行状态未知或创建中,需要重启
	if status.State == ContainerStateUnknown || status.State == ContainerStateCreated {
		return true
	}
	//如果重启策略设置了用不重启,不需要重启
	if pod.Spec.RestartPolicy == v1.RestartPolicyNever {
		klog.V(4).InfoS("Already ran container, do nothing", "pod", klog.KObj(pod), "containerName", container.Name)
		return false
	}
	//如果设置失败时才重启,验证退出码是否为0
	if pod.Spec.RestartPolicy == v1.RestartPolicyOnFailure {

		if status.ExitCode == 0 {
			klog.V(4).InfoS("Already successfully ran container, do nothing", "pod", klog.KObj(pod), "containerName", container.Name)
			return false
		}
	}
	return true
}

2.4 computePodResizeAction

  • 如果没有设置limit或者还没运行,直接返回true
  • 做一下基本校验
  • 获取期望的cpu和内存指标和实际现在使用的cpu和内存指标
  • 如果容器运行时设置了状态,则替代v1的(更有时效性)
  • 如果预期的和实际的都相等,直接返回
  • 确定一下最终数据
  • 如果需要重启,当前容器加入到删除列表或加入到重启列表
  • 否则比较进行热更新
func (m *kubeGenericRuntimeManager) computePodResizeAction(pod *v1.Pod, containerIdx int, kubeContainerStatus *kubecontainer.Status, changes *podActions) bool {
    //如果没有设置limit或者还没运行,直接返回true
	container := pod.Spec.Containers[containerIdx]
	if container.Resources.Limits == nil || len(pod.Status.ContainerStatuses) == 0 {
		return true
	}
	apiContainerStatus, exists := podutil.GetContainerStatus(pod.Status.ContainerStatuses, container.Name)
	//做一下基本校验
	if !exists || apiContainerStatus.State.Running == nil || apiContainerStatus.Resources == nil ||
		kubeContainerStatus.State != kubecontainer.ContainerStateRunning ||
		kubeContainerStatus.ID.String() != apiContainerStatus.ContainerID ||
		len(diff.ObjectDiff(container.Resources.Requests, apiContainerStatus.AllocatedResources)) != 0 {
		return true
	}
	//期望的cpu和内存指标
	desiredMemoryLimit := container.Resources.Limits.Memory().Value()
	desiredCPULimit := container.Resources.Limits.Cpu().MilliValue()
	desiredCPURequest := container.Resources.Requests.Cpu().MilliValue()
	//实际现在使用的cpu和内存指标
	currentMemoryLimit := apiContainerStatus.Resources.Limits.Memory().Value()
	currentCPULimit := apiContainerStatus.Resources.Limits.Cpu().MilliValue()
	currentCPURequest := apiContainerStatus.Resources.Requests.Cpu().MilliValue()
	//如果容器运行时设置了状态,则替代v1的(更有时效性)
	if kubeContainerStatus.Resources != nil {
		if kubeContainerStatus.Resources.MemoryLimit != nil {
			currentMemoryLimit = kubeContainerStatus.Resources.MemoryLimit.Value()
		}
		if kubeContainerStatus.Resources.CPULimit != nil {
			currentCPULimit = kubeContainerStatus.Resources.CPULimit.MilliValue()
		}
		if kubeContainerStatus.Resources.CPURequest != nil {
			currentCPURequest = kubeContainerStatus.Resources.CPURequest.MilliValue()
		}
	}
	//如果预期的和实际的都相等,直接返回
	if desiredMemoryLimit == currentMemoryLimit && desiredCPULimit == currentCPULimit && desiredCPURequest == currentCPURequest {
		return true
	}
	//确定一下最终数据
	desiredResources := containerResources{
		memoryLimit:   desiredMemoryLimit,
		memoryRequest: apiContainerStatus.AllocatedResources.Memory().Value(),
		cpuLimit:      desiredCPULimit,
		cpuRequest:    desiredCPURequest,
	}
	currentResources := containerResources{
		memoryLimit:   currentMemoryLimit,
		memoryRequest: apiContainerStatus.Resources.Requests.Memory().Value(),
		cpuLimit:      currentCPULimit,
		cpuRequest:    currentCPURequest,
	}

	resizePolicy := make(map[v1.ResourceName]v1.ResourceResizeRestartPolicy)
	for _, pol := range container.ResizePolicy {
		resizePolicy[pol.ResourceName] = pol.RestartPolicy
	}
	//下面会调用这个函数,如果期望与实际相等,则不变化
	determineContainerResize := func(rName v1.ResourceName, specValue, statusValue int64) (resize, restart bool) {
		if specValue == statusValue {
			return false, false
		}
		//如果设置的资源变化为重启,则resize为true,restart为true
		if resizePolicy[rName] == v1.RestartContainer {
			return true, true
		}
		//如果资源变化不重启,则则resize为true,restart为false
		return true, false
	}
	//做比较的函数,
	markContainerForUpdate := func(rName v1.ResourceName, specValue, statusValue int64) {
		cUpdateInfo := containerToUpdateInfo{
			apiContainerIdx:           containerIdx,
			kubeContainerID:           kubeContainerStatus.ID,
			desiredContainerResources: desiredResources,
			currentContainerResources: &currentResources,
		}
		switch {
		//如果期望的大于实际的(增加资源量),追加到更新的最后面
		case specValue > statusValue: // append
			changes.ContainersToUpdate[rName] = append(changes.ContainersToUpdate[rName], cUpdateInfo)
		//如果期望的小于实际的(减少资源量)则放在第一个进行更新
		case specValue < statusValue: // prepend
			changes.ContainersToUpdate[rName] = append(changes.ContainersToUpdate[rName], containerToUpdateInfo{})
			copy(changes.ContainersToUpdate[rName][1:], changes.ContainersToUpdate[rName])
			changes.ContainersToUpdate[rName][0] = cUpdateInfo
		}
	}
	//调用函数
	resizeMemLim, restartMemLim := determineContainerResize(v1.ResourceMemory, desiredMemoryLimit, currentMemoryLimit)
	resizeCPULim, restartCPULim := determineContainerResize(v1.ResourceCPU, desiredCPULimit, currentCPULimit)
	resizeCPUReq, restartCPUReq := determineContainerResize(v1.ResourceCPU, desiredCPURequest, currentCPURequest)
	//如果需要重启
	if restartCPULim || restartCPUReq || restartMemLim {
	    //当前容器加入到删除列表
		changes.ContainersToKill[kubeContainerStatus.ID] = containerToKillInfo{
			name:      kubeContainerStatus.Name,
			container: &pod.Spec.Containers[containerIdx],
			message:   fmt.Sprintf("Container %s resize requires restart", container.Name),
		}
		//当前容器加入到重启列表
		changes.ContainersToStart = append(changes.ContainersToStart, containerIdx)
		changes.UpdatePodResources = true
		return false
	} else {
	    //进行热更新
		if resizeMemLim {
			markContainerForUpdate(v1.ResourceMemory, desiredMemoryLimit, currentMemoryLimit)
		}
		if resizeCPULim {
			markContainerForUpdate(v1.ResourceCPU, desiredCPULimit, currentCPULimit)
		} else if resizeCPUReq {
			markContainerForUpdate(v1.ResourceCPU, desiredCPURequest, currentCPURequest)
		}
	}
	return true
}

下一篇: kubelet源码分析 kuberuntime的syncpod、killpod函数(二)

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值