kubelet源码分析 kuberuntime的syncpod、killpod函数(一)
上篇介绍的kubelet.go中的syncpod函数,走到后面就是到kuberuntime包中。
kuberuntime包中的syncpod函数任务比较大,分三章介绍。总体流程逻辑步骤是
- 计算沙箱和容器的变化。
- 如果有必要,杀死pod沙盒。
- 关闭任何不应该运行的容器。
- 必要时创建沙盒。
- 创建短暂的容器。
- 创建初始容器。
- 调整正在运行的容器的大小(如果InPlacePodVerticalScaling==true)。
- 创建普通容器。
这篇主要介绍第一步,计算沙箱和容器的变化。(computePodActions函数)
一、syncpod函数
整体流程如下,这章主要介绍computePodActions函数计算沙箱和容器的变化。
func (m *kubeGenericRuntimeManager) SyncPod(ctx context.Context, pod *v1.Pod, podStatus *kubecontainer.PodStatus, pullSecrets []v1.Secret, backOff *flowcontrol.Backoff) (result kubecontainer.PodSyncResult) {
// Step 1: 计算沙箱和容器的变化。
podContainerChanges := m.computePodActions(ctx, pod, podStatus)
klog.V(3).InfoS("computePodActions got for pod", "podActions", podContainerChanges, "pod", klog.KObj(pod))
if podContainerChanges.CreateSandbox {
ref, err := ref.GetReference(legacyscheme.Scheme, pod)
if err != nil {
klog.ErrorS(err, "Couldn't make a ref to pod", "pod", klog.KObj(pod))
}
if podContainerChanges.SandboxID != "" {
m.recorder.Eventf(ref, v1.EventTypeNormal, events.SandboxChanged, "Pod sandbox changed, it will be killed and re-created.")
} else {
klog.V(4).InfoS("SyncPod received new pod, will create a sandbox for it", "pod", klog.KObj(pod))
}
}
// Step 2: 如果有必要,杀死pod沙盒。
if podContainerChanges.KillPod {
if podContainerChanges.CreateSandbox {
klog.V(4).InfoS("Stopping PodSandbox for pod, will start new one", "pod", klog.KObj(pod))
} else {
klog.V(4).InfoS("Stopping PodSandbox for pod, because all other containers are dead", "pod", klog.KObj(pod))
}
killResult := m.killPodWithSyncResult(ctx, pod, kubecontainer.ConvertPodStatusToRunningPod(m.runtimeName, podStatus), nil)
result.AddPodSyncResult(killResult)
if killResult.Error() != nil {
klog.ErrorS(killResult.Error(), "killPodWithSyncResult failed")
return
}
if podContainerChanges.CreateSandbox {
m.purgeInitContainers(ctx, pod, podStatus)
}
} else {
// Step 3: 关闭任何不应该运行的容器。
for containerID, containerInfo := range podContainerChanges.ContainersToKill {
klog.V(3).InfoS("Killing unwanted container for pod", "containerName", containerInfo.name, "containerID", containerID, "pod", klog.KObj(pod))
killContainerResult := kubecontainer.NewSyncResult(kubecontainer.KillContainer, containerInfo.name)
result.AddSyncResult(killContainerResult)
if err := m.killContainer(ctx, pod, containerID, containerInfo.name, containerInfo.message, containerInfo.reason, nil); err != nil {
killContainerResult.Fail(kubecontainer.ErrKillContainer, err.Error())
klog.ErrorS(err, "killContainer for pod failed", "containerName", containerInfo.name, "containerID", containerID, "pod", klog.KObj(pod))
return
}
}
}
m.pruneInitContainersBeforeStart(ctx, pod, podStatus)
var podIPs []string
if podStatus != nil {
podIPs = podStatus.IPs
}
// Step 4: 必要时创建沙盒。
podSandboxID := podContainerChanges.SandboxID
if podContainerChanges.CreateSandbox {
var msg string
var err error
klog.V(4).InfoS("Creating PodSandbox for pod", "pod", klog.KObj(pod))
metrics.StartedPodsTotal.Inc()
createSandboxResult := kubecontainer.NewSyncResult(kubecontainer.CreatePodSandbox, format.Pod(pod))
result.AddSyncResult(createSandboxResult)
sysctl.ConvertPodSysctlsVariableToDotsSeparator(pod.Spec.SecurityContext)
utilfeature.DefaultFeatureGate.Enabled(features.DynamicResourceAllocation) {
if m.runtimeHelper.PrepareDynamicResources(pod) != nil {
return
}
}
podSandboxID, msg, err = m.createPodSandbox(ctx, pod, podContainerChanges.Attempt)
if err != nil {
if m.podStateProvider.IsPodTerminationRequested(pod.UID) {
klog.V(4).InfoS("Pod was deleted and sandbox failed to be created", "pod", klog.KObj(pod), "podUID", pod.UID)
return
}
metrics.StartedPodsErrorsTotal.Inc()
createSandboxResult.Fail(kubecontainer.ErrCreatePodSandbox, msg)
klog.ErrorS(err, "CreatePodSandbox for pod failed", "pod", klog.KObj(pod))
ref, referr := ref.GetReference(legacyscheme.Scheme, pod)
if referr != nil {
klog.ErrorS(referr, "Couldn't make a ref to pod", "pod", klog.KObj(pod))
}
m.recorder.Eventf(ref, v1.EventTypeWarning, events.FailedCreatePodSandBox, "Failed to create pod sandbox: %v", err)
return
}
klog.V(4).InfoS("Created PodSandbox for pod", "podSandboxID", podSandboxID, "pod", klog.KObj(pod))
resp, err := m.runtimeService.PodSandboxStatus(ctx, podSandboxID, false)
if err != nil {
ref, referr := ref.GetReference(legacyscheme.Scheme, pod)
if referr != nil {
klog.ErrorS(referr, "Couldn't make a ref to pod", "pod", klog.KObj(pod))
}
m.recorder.Eventf(ref, v1.EventTypeWarning, events.FailedStatusPodSandBox, "Unable to get pod sandbox status: %v", err)
klog.ErrorS(err, "Failed to get pod sandbox status; Skipping pod", "pod", klog.KObj(pod))
result.Fail(err)
return
}
if resp.GetStatus() == nil {
result.Fail(errors.New("pod sandbox status is nil"))
return
}
if !kubecontainer.IsHostNetworkPod(pod) {
podIPs = m.determinePodSandboxIPs(pod.Namespace, pod.Name, resp.GetStatus())
klog.V(4).InfoS("Determined the ip for pod after sandbox changed", "IPs", podIPs, "pod", klog.KObj(pod))
}
}
podIP := ""
if len(podIPs) != 0 {
podIP = podIPs[0]
}
configPodSandboxResult := kubecontainer.NewSyncResult(kubecontainer.ConfigPodSandbox, podSandboxID)
result.AddSyncResult(configPodSandboxResult)
podSandboxConfig, err := m.generatePodSandboxConfig(pod, podContainerChanges.Attempt)
if err != nil {
message := fmt.Sprintf("GeneratePodSandboxConfig for pod %q failed: %v", format.Pod(pod), err)
klog.ErrorS(err, "GeneratePodSandboxConfig for pod failed", "pod", klog.KObj(pod))
configPodSandboxResult.Fail(kubecontainer.ErrConfigPodSandbox, message)
return
}
//创建匿名函数,创建容器
start := func(ctx context.Context, typeName, metricLabel string, spec *startSpec) error {
startContainerResult := kubecontainer.NewSyncResult(kubecontainer.StartContainer, spec.container.Name)
result.AddSyncResult(startContainerResult)
isInBackOff, msg, err := m.doBackOff(pod, spec.container, podStatus, backOff)
if isInBackOff {
startContainerResult.Fail(err, msg)
klog.V(4).InfoS("Backing Off restarting container in pod", "containerType", typeName, "container", spec.container, "pod", klog.KObj(pod))
return err
}
metrics.StartedContainersTotal.WithLabelValues(metricLabel).Inc()
if sc.HasWindowsHostProcessRequest(pod, spec.container) {
metrics.StartedHostProcessContainersTotal.WithLabelValues(metricLabel).Inc()
}
klog.V(4).InfoS("Creating container in pod", "containerType", typeName, "container", spec.container, "pod", klog.KObj(pod))
if msg, err := m.startContainer(ctx, podSandboxID, podSandboxConfig, spec, pod, podStatus, pullSecrets, podIP, podIPs); err != nil {
metrics.StartedContainersErrorsTotal.WithLabelValues(metricLabel, err.Error()).Inc()
if sc.HasWindowsHostProcessRequest(pod, spec.container) {
metrics.StartedHostProcessContainersErrorsTotal.WithLabelValues(metricLabel, err.Error()).Inc()
}
startContainerResult.Fail(err, msg)
switch {
case err == images.ErrImagePullBackOff:
klog.V(3).InfoS("Container start failed in pod", "containerType", typeName, "container", spec.container, "pod", klog.KObj(pod), "containerMessage", msg, "err", err)
default:
utilruntime.HandleError(fmt.Errorf("%v %+v start failed in pod %v: %v: %s", typeName, spec.container, format.Pod(pod), err, msg))
}
return err
}
return nil
}
// Step 5: 创建短暂的容器。
for _, idx := range podContainerChanges.EphemeralContainersToStart {
start(ctx, "ephemeral container", metrics.EphemeralContainer, ephemeralContainerStartSpec(&pod.Spec.EphemeralContainers[idx]))
}
// Step 6: 创建初始容器。
if container := podContainerChanges.NextInitContainerToStart; container != nil {
if err := start(ctx, "init container", metrics.InitContainer, containerStartSpec(container)); err != nil {
return
}
klog.V(4).InfoS("Completed init container for pod", "containerName", container.Name, "pod", klog.KObj(pod))
}
// Step 7: 调整正在运行的容器的大小(如果InPlacePodVerticalScaling==true)。
if isInPlacePodVerticalScalingAllowed(pod) {
if len(podContainerChanges.ContainersToUpdate) > 0 || podContainerChanges.UpdatePodResources {
m.doPodResizeAction(pod, podStatus, podContainerChanges, result)
}
}
// Step 8: 创建普通容器。
for _, idx := range podContainerChanges.ContainersToStart {
start(ctx, "container", metrics.Container, containerStartSpec(&pod.Spec.Containers[idx]))
}
return
}
二、computePodActions函数
- 获取到sandbox和容器的变化,2.1函数介绍
- 如果返回true,代表需要创建创建sndbox(那init容器和其他容器都需要重启)
- 如果容器设置的用不重启,并且重启次数大于0,则将重启变成false并直接返回
- 如果重启容器个数为0,则计算下一个init是否完成,如果也完成,就没有要重启的容器,则不需要重启sandbox了
- 如果init不等于0,因为sanbox要重启,所以init也要重启,直接获取第一个init重启即可
- 如果init容器没有,则重启普通的容器
- 到这里代表sandbox不重启,则要计算其他容器的重启顺序了
- 首先启动的事init。找到下一个要运行的init容器。函数2.2findNextInitContainerToRun
- 如果有下一个init要启动。如果initLastStatus不为空则代表当前init有错误,并且isInitContainerFailed函数返回true代表init有错误(oom,退出码!=0,unknown)
- 如果确实当前init错误并且重启策略不是用不重启,则killpod=ture,代表需要删掉所有pod,也就是init容器失败了删除整个pod的容器(sandbox容器也会被删除)
- 如果错误不等于空,并且错误是未知,记录原因
- 如果init没全部执行完成,但是没有下一个init要执行。则代表当前init容器正在running,没有运行完成,返回
- 如果设置了就地升级,则初始化一下
- 如果init容器也不需要重启,则到了关键的普通容器的处理。
- 如果运行状态不为空,并且不是running,代表失败的,准备删掉。先把生命周期性相关的停止掉。
- 如果容器运行状态==nil或者状态不是running,验证是否重启,需要重启的添加到结构体中
- 如果是running的容器,则要校验。容器配置(spec)与运行时(status)是否发生了改变
- 如果有变化,但是没有开启了就地升级或者去掉Resources的配置还有其他变化的话,则需要重启。
- 如果资源没变化,或者开启了就地升级并且值更改了Resources,则不需要重启,保持这个pod
- 果可以重启,则把这些容器也加入到重启
- 如果需要保持的和需要重启的都是0,则删了所有pod
func (m *kubeGenericRuntimeManager) computePodActions(ctx context.Context, pod *v1.Pod, podStatus *kubecontainer.PodStatus) podActions {
klog.V(5).InfoS("Syncing Pod", "pod", klog.KObj(pod))
//获取到sandbox和容器的变化,2.1函数介绍
createPodSandbox, attempt, sandboxID := runtimeutil.PodSandboxChanged(pod, podStatus)
changes := podActions{
KillPod: createPodSandbox,
CreateSandbox: createPodSandbox,
SandboxID: sandboxID,
Attempt: attempt,
ContainersToStart: []int{},
ContainersToKill: make(map[kubecontainer.ContainerID]containerToKillInfo),
}
//如果返回true,代表需要创建sndbox(那init容器和其他容器都需要重启)
if createPodSandbox {
//如果容器设置的用不重启,并且重启次数大于0,则将重启变成false并直接返回
if !shouldRestartOnFailure(pod) && attempt != 0 && len(podStatus.ContainerStatuses) != 0 {
changes.CreateSandbox = false
return changes
}
//计算要重启的容器个数
var containersToStart []int
for idx, c := range pod.Spec.Containers {
//如果容器重启策略是失败重启但是容器退出码=0,则跳过这个
if pod.Spec.RestartPolicy == v1.RestartPolicyOnFailure && containerSucceeded(&c, podStatus) {
continue
}
containersToStart = append(containersToStart, idx)
}
//如果重启容器个数为0
if len(containersToStart) == 0 {
//找到下一个init容器,函数2.2findNextInitContainerToRun
_, _, done := findNextInitContainerToRun(pod, podStatus)
//如果init函数都执行完了,则没有需要重启的容器了,那sandbox也不需要重启了
if done {
changes.CreateSandbox = false
return changes
}
}
//如果init不等于0,因为sanbox要重启,所以init也要重启,直接获取第一个init重启即可
if len(pod.Spec.InitContainers) != 0 {
changes.NextInitContainerToStart = &pod.Spec.InitContainers[0]
return changes
}
//如果init容器没有,则重启普通的容器
changes.ContainersToStart = containersToStart
return changes
}
//到这里代表sandbox不重启,则要计算其他容器的重启顺序了
//遍历所有临时容器并添加
for i := range pod.Spec.EphemeralContainers {
c := (*v1.Container)(&pod.Spec.EphemeralContainers[i].EphemeralContainerCommon)
if podStatus.FindContainerStatusByName(c.Name) == nil {
changes.EphemeralContainersToStart = append(changes.EphemeralContainersToStart, i)
}
}
//找到下一个要运行的init容器。函数2.2findNextInitContainerToRun
initLastStatus, next, done := findNextInitContainerToRun(pod, podStatus)
//如果没有执行完,则先启动init容器
if !done {
//如果有下一个init要启动
if next != nil {
//如果initLastStatus不为空则代表当前init有错误,并且isInitContainerFailed函数返回true代表init有错误(oom,退出码!=0,unknown)
initFailed := initLastStatus != nil && isInitContainerFailed(initLastStatus)
//如果确实当前init错误并且重启策略不是用不重启,则killpod=ture,代表需要删掉所有pod,也就是init容器失败了删除整个pod的容器(sandbox容器也会被删除)
if initFailed && !shouldRestartOnFailure(pod) {
changes.KillPod = true
} else {
//如果错误不等于空,并且错误是未知,记录原因
if initLastStatus != nil && initLastStatus.State == kubecontainer.ContainerStateUnknown {
changes.ContainersToKill[initLastStatus.ID] = containerToKillInfo{
name: next.Name,
container: next,
message: fmt.Sprintf("Init container is in %q state, try killing it before restart",
initLastStatus.State),
reason: reasonUnknown,
}
}
//下一个要重启的init容器就是当前这个
changes.NextInitContainerToStart = next
}
}
//如果init没全部执行完成,但是没有下一个init要执行。则代表当前init容器正在running,没有运行完成,返回
return changes
}
//如果设置了就地升级,则初始化一下
if isInPlacePodVerticalScalingAllowed(pod) {
changes.ContainersToUpdate = make(map[v1.ResourceName][]containerToUpdateInfo)
latestPodStatus, err := m.GetPodStatus(ctx, podStatus.ID, pod.Name, pod.Namespace)
if err == nil {
podStatus = latestPodStatus
}
}
//要保持的容器数
keepCount := 0
for idx, container := range pod.Spec.Containers {
containerStatus := podStatus.FindContainerStatusByName(container.Name)
//如果运行状态不为空,并且不是running,代表失败的,准备删掉。先把生命周期性相关的停止掉。
if containerStatus != nil && containerStatus.State != kubecontainer.ContainerStateRunning {
if err := m.internalLifecycle.PostStopContainer(containerStatus.ID.ID); err != nil {
klog.ErrorS(err, "Internal container post-stop lifecycle hook failed for container in pod with error",
"containerName", container.Name, "pod", klog.KObj(pod))
}
}
//如果容器运行状态==nil或者状态不是running
if containerStatus == nil || containerStatus.State != kubecontainer.ContainerStateRunning {
//是否需要重启。函数2.3
if kubecontainer.ShouldContainerBeRestarted(&container, pod, podStatus) {
klog.V(3).InfoS("Container of pod is not in the desired state and shall be started", "containerName", container.Name, "pod", klog.KObj(pod))
//加入到需要重启的列表里。
changes.ContainersToStart = append(changes.ContainersToStart, idx)
//如果状态是未知,做日志记录
if containerStatus != nil && containerStatus.State == kubecontainer.ContainerStateUnknown {
changes.ContainersToKill[containerStatus.ID] = containerToKillInfo{
name: containerStatus.Name,
container: &pod.Spec.Containers[idx],
message: fmt.Sprintf("Container is in %q state, try killing it before restart",
containerStatus.State),
reason: reasonUnknown,
}
}
}
continue
}
var message string
var reason containerKillReason
//如果不是用不重启就返回true
restart := shouldRestartOnFailure(pod)
//到这里,就都是running的容器了。如果容器配置(spec)与运行时(status)是否发生了改变。这个函数就是把字符串转成hash,然后比较hash值。
//如果有变化,但是没有开启了就地升级或者去掉Resources的配置还有其他变化的话,则需要重启。
if _, _, changed := containerChanged(&container, containerStatus); changed &&
(!isInPlacePodVerticalScalingAllowed(pod) ||
kubecontainer.HashContainerWithoutResources(&container) != containerStatus.HashWithoutResources) {
message = fmt.Sprintf("Container %s definition changed", container.Name)
restart = true
} else if liveness, found := m.livenessManager.Get(containerStatus.ID); found && liveness == proberesults.Failure {
//如果是liveness失败,记录原因
message = fmt.Sprintf("Container %s failed liveness probe", container.Name)
reason = reasonLivenessProbe
} else if startup, found := m.startupManager.Get(containerStatus.ID); found && startup == proberesults.Failure {
message = fmt.Sprintf("Container %s failed startup probe", container.Name)
reason = reasonStartupProbe
} else if isInPlacePodVerticalScalingAllowed(pod) && !m.computePodResizeAction(pod, idx, containerStatus, &changes) {
//如果是开启了就地升级并且值改了Resources,则不需要重启,保持这个pod。函数2.4computePodResizeAction,否则则记录到重启结构体中(函数内处理)
continue
} else {
//如果资源没变化,或者开启了就地升级并且值更改了Resources,则不需要重启,保持这个pod
keepCount++
continue
}
//如果可以重启,则把这些容器也加入到重启立
if restart {
message = fmt.Sprintf("%s, will be restarted", message)
changes.ContainersToStart = append(changes.ContainersToStart, idx)
}
changes.ContainersToKill[containerStatus.ID] = containerToKillInfo{
name: containerStatus.Name,
container: &pod.Spec.Containers[idx],
message: message,
reason: reason,
}
klog.V(2).InfoS("Message for Container of pod", "containerName", container.Name, "containerStatusID", containerStatus.ID, "pod", klog.KObj(pod), "containerMessage", message)
}
//如果需要保持的和需要重启的都是0,则也不需要sandbox容器
if keepCount == 0 && len(changes.ContainersToStart) == 0 {
changes.KillPod = true
}
return changes
}
2.1PodSandboxChanged函数
- 如果sandbox为空,则需要重启
- 计数ready的sandbox数量
- 如果超过一个sandbox,则代表错误,需要重启
- 如果第一个sandbox状态不是ready,也需要重启
- 网络命名空间改变了,重启
- 如果network未设置,重启
func PodSandboxChanged(pod *v1.Pod, podStatus *kubecontainer.PodStatus) (bool, uint32, string) {
//如果sandbox为空,则需要重启
if len(podStatus.SandboxStatuses) == 0 {
klog.V(2).InfoS("No sandbox for pod can be found. Need to start a new one", "pod", klog.KObj(pod))
return true, 0, ""
}
readySandboxCount := 0
//计数ready的sandbox数量
for _, s := range podStatus.SandboxStatuses {
if s.State == runtimeapi.PodSandboxState_SANDBOX_READY {
readySandboxCount++
}
}
sandboxStatus := podStatus.SandboxStatuses[0]
//如果超过一个sandbox,则代表错误,需要重启
if readySandboxCount > 1 {
klog.V(2).InfoS("Multiple sandboxes are ready for Pod. Need to reconcile them", "pod", klog.KObj(pod))
return true, sandboxStatus.Metadata.Attempt + 1, sandboxStatus.Id
}
//如果第一个sandbox状态不是ready,也需要重启
if sandboxStatus.State != runtimeapi.PodSandboxState_SANDBOX_READY {
klog.V(2).InfoS("No ready sandbox for pod can be found. Need to start a new one", "pod", klog.KObj(pod))
return true, sandboxStatus.Metadata.Attempt + 1, sandboxStatus.Id
}
//网络命名空间改变了,重启
if sandboxStatus.GetLinux().GetNamespaces().GetOptions().GetNetwork() != NetworkNamespaceForPod(pod) {
klog.V(2).InfoS("Sandbox for pod has changed. Need to start a new one", "pod", klog.KObj(pod))
return true, sandboxStatus.Metadata.Attempt + 1, ""
}
//如果network未设置,重启
if !kubecontainer.IsHostNetworkPod(pod) && sandboxStatus.Network != nil && sandboxStatus.Network.Ip == "" {
klog.V(2).InfoS("Sandbox for pod has no IP address. Need to start a new one", "pod", klog.KObj(pod))
return true, sandboxStatus.Metadata.Attempt + 1, sandboxStatus.Id
}
return false, sandboxStatus.Metadata.Attempt, sandboxStatus.Id
}
2.2 findNextInitContainerToRun函数
这里返回的三个参数分别代表 status(当前的容器,只有运行失败的才返回)next(下一个要运行的init容器)done(true代表全部执行完成)
- 如果没有init容器,返回结果,当前状态为nil,下一个为nil,全部完成=true
- 如果普通容器都是running,则不用校验init了
- 从最后一个init往前校验
- 如果是失败的,则返回当前状态。返回当前的容器未下一个,是否运行完成=false
- 如果在运行中,返回是否完成=false
- 如果正常退出,并且是最后一个,是否允许完成返回true
- 否则把下一个返回
func findNextInitContainerToRun(pod *v1.Pod, podStatus *kubecontainer.PodStatus) (status *kubecontainer.Status, next *v1.Container, done bool) {
//如果没有init容器,返回结果,当前状态为nil,下一个为nil,全部完成=true
if len(pod.Spec.InitContainers) == 0 {
return nil, nil, true
}
for i := range pod.Spec.Containers {
container := &pod.Spec.Containers[i]
status := podStatus.FindContainerStatusByName(container.Name)
if status != nil && status.State == kubecontainer.ContainerStateRunning {
//如果普通容器都是running,则不用校验init了
return nil, nil, true
}
}
//从最后一个init往前校验
for i := len(pod.Spec.InitContainers) - 1; i >= 0; i-- {
container := &pod.Spec.InitContainers[i]
status := podStatus.FindContainerStatusByName(container.Name)
//如果是失败的,则返回当前状态。返回当前的容器未下一个,是否运行完成=false
if status != nil && isInitContainerFailed(status) {
return status, container, false
}
}
for i := len(pod.Spec.InitContainers) - 1; i >= 0; i-- {
container := &pod.Spec.InitContainers[i]
status := podStatus.FindContainerStatusByName(container.Name)
if status == nil {
continue
}
//如果在运行中,返回是否完成=false
if status.State == kubecontainer.ContainerStateRunning {
return nil, nil, false
}
if status.State == kubecontainer.ContainerStateExited {
//如果正常退出,并且是最后一个,是否允许完成返回true
if i == (len(pod.Spec.InitContainers) - 1) {
return nil, nil, true
}
//否则把下一个返回
return nil, &pod.Spec.InitContainers[i+1], false
}
}
return nil, &pod.Spec.InitContainers[0], false
}
2.3 ShouldContainerBeRestarted函数
func ShouldContainerBeRestarted(container *v1.Container, pod *v1.Pod, podStatus *PodStatus) bool {
//如果有删除时间,则代表既要删除的,不用重启
if pod.DeletionTimestamp != nil {
return false
}
status := podStatus.FindContainerStatusByName(container.Name)
//运行状态为空,需要重启
if status == nil {
return true
}
//运行状态为runnig,不需要重启
if status.State == ContainerStateRunning {
return false
}
//运行状态未知或创建中,需要重启
if status.State == ContainerStateUnknown || status.State == ContainerStateCreated {
return true
}
//如果重启策略设置了用不重启,不需要重启
if pod.Spec.RestartPolicy == v1.RestartPolicyNever {
klog.V(4).InfoS("Already ran container, do nothing", "pod", klog.KObj(pod), "containerName", container.Name)
return false
}
//如果设置失败时才重启,验证退出码是否为0
if pod.Spec.RestartPolicy == v1.RestartPolicyOnFailure {
if status.ExitCode == 0 {
klog.V(4).InfoS("Already successfully ran container, do nothing", "pod", klog.KObj(pod), "containerName", container.Name)
return false
}
}
return true
}
2.4 computePodResizeAction
- 如果没有设置limit或者还没运行,直接返回true
- 做一下基本校验
- 获取期望的cpu和内存指标和实际现在使用的cpu和内存指标
- 如果容器运行时设置了状态,则替代v1的(更有时效性)
- 如果预期的和实际的都相等,直接返回
- 确定一下最终数据
- 如果需要重启,当前容器加入到删除列表或加入到重启列表
- 否则比较进行热更新
func (m *kubeGenericRuntimeManager) computePodResizeAction(pod *v1.Pod, containerIdx int, kubeContainerStatus *kubecontainer.Status, changes *podActions) bool {
//如果没有设置limit或者还没运行,直接返回true
container := pod.Spec.Containers[containerIdx]
if container.Resources.Limits == nil || len(pod.Status.ContainerStatuses) == 0 {
return true
}
apiContainerStatus, exists := podutil.GetContainerStatus(pod.Status.ContainerStatuses, container.Name)
//做一下基本校验
if !exists || apiContainerStatus.State.Running == nil || apiContainerStatus.Resources == nil ||
kubeContainerStatus.State != kubecontainer.ContainerStateRunning ||
kubeContainerStatus.ID.String() != apiContainerStatus.ContainerID ||
len(diff.ObjectDiff(container.Resources.Requests, apiContainerStatus.AllocatedResources)) != 0 {
return true
}
//期望的cpu和内存指标
desiredMemoryLimit := container.Resources.Limits.Memory().Value()
desiredCPULimit := container.Resources.Limits.Cpu().MilliValue()
desiredCPURequest := container.Resources.Requests.Cpu().MilliValue()
//实际现在使用的cpu和内存指标
currentMemoryLimit := apiContainerStatus.Resources.Limits.Memory().Value()
currentCPULimit := apiContainerStatus.Resources.Limits.Cpu().MilliValue()
currentCPURequest := apiContainerStatus.Resources.Requests.Cpu().MilliValue()
//如果容器运行时设置了状态,则替代v1的(更有时效性)
if kubeContainerStatus.Resources != nil {
if kubeContainerStatus.Resources.MemoryLimit != nil {
currentMemoryLimit = kubeContainerStatus.Resources.MemoryLimit.Value()
}
if kubeContainerStatus.Resources.CPULimit != nil {
currentCPULimit = kubeContainerStatus.Resources.CPULimit.MilliValue()
}
if kubeContainerStatus.Resources.CPURequest != nil {
currentCPURequest = kubeContainerStatus.Resources.CPURequest.MilliValue()
}
}
//如果预期的和实际的都相等,直接返回
if desiredMemoryLimit == currentMemoryLimit && desiredCPULimit == currentCPULimit && desiredCPURequest == currentCPURequest {
return true
}
//确定一下最终数据
desiredResources := containerResources{
memoryLimit: desiredMemoryLimit,
memoryRequest: apiContainerStatus.AllocatedResources.Memory().Value(),
cpuLimit: desiredCPULimit,
cpuRequest: desiredCPURequest,
}
currentResources := containerResources{
memoryLimit: currentMemoryLimit,
memoryRequest: apiContainerStatus.Resources.Requests.Memory().Value(),
cpuLimit: currentCPULimit,
cpuRequest: currentCPURequest,
}
resizePolicy := make(map[v1.ResourceName]v1.ResourceResizeRestartPolicy)
for _, pol := range container.ResizePolicy {
resizePolicy[pol.ResourceName] = pol.RestartPolicy
}
//下面会调用这个函数,如果期望与实际相等,则不变化
determineContainerResize := func(rName v1.ResourceName, specValue, statusValue int64) (resize, restart bool) {
if specValue == statusValue {
return false, false
}
//如果设置的资源变化为重启,则resize为true,restart为true
if resizePolicy[rName] == v1.RestartContainer {
return true, true
}
//如果资源变化不重启,则则resize为true,restart为false
return true, false
}
//做比较的函数,
markContainerForUpdate := func(rName v1.ResourceName, specValue, statusValue int64) {
cUpdateInfo := containerToUpdateInfo{
apiContainerIdx: containerIdx,
kubeContainerID: kubeContainerStatus.ID,
desiredContainerResources: desiredResources,
currentContainerResources: ¤tResources,
}
switch {
//如果期望的大于实际的(增加资源量),追加到更新的最后面
case specValue > statusValue: // append
changes.ContainersToUpdate[rName] = append(changes.ContainersToUpdate[rName], cUpdateInfo)
//如果期望的小于实际的(减少资源量)则放在第一个进行更新
case specValue < statusValue: // prepend
changes.ContainersToUpdate[rName] = append(changes.ContainersToUpdate[rName], containerToUpdateInfo{})
copy(changes.ContainersToUpdate[rName][1:], changes.ContainersToUpdate[rName])
changes.ContainersToUpdate[rName][0] = cUpdateInfo
}
}
//调用函数
resizeMemLim, restartMemLim := determineContainerResize(v1.ResourceMemory, desiredMemoryLimit, currentMemoryLimit)
resizeCPULim, restartCPULim := determineContainerResize(v1.ResourceCPU, desiredCPULimit, currentCPULimit)
resizeCPUReq, restartCPUReq := determineContainerResize(v1.ResourceCPU, desiredCPURequest, currentCPURequest)
//如果需要重启
if restartCPULim || restartCPUReq || restartMemLim {
//当前容器加入到删除列表
changes.ContainersToKill[kubeContainerStatus.ID] = containerToKillInfo{
name: kubeContainerStatus.Name,
container: &pod.Spec.Containers[containerIdx],
message: fmt.Sprintf("Container %s resize requires restart", container.Name),
}
//当前容器加入到重启列表
changes.ContainersToStart = append(changes.ContainersToStart, containerIdx)
changes.UpdatePodResources = true
return false
} else {
//进行热更新
if resizeMemLim {
markContainerForUpdate(v1.ResourceMemory, desiredMemoryLimit, currentMemoryLimit)
}
if resizeCPULim {
markContainerForUpdate(v1.ResourceCPU, desiredCPULimit, currentCPULimit)
} else if resizeCPUReq {
markContainerForUpdate(v1.ResourceCPU, desiredCPURequest, currentCPURequest)
}
}
return true
}