【kubernetes/k8s源码分析】kubelet源码分析之启动容器

最新推荐文章于 2024-04-19 07:30:00 发布

张忠琳

最新推荐文章于 2024-04-19 07:30:00 发布

阅读量4.1k

点赞数 2

分类专栏： # kubelet

本文链接：https://blog.csdn.net/zhonglinzhang/article/details/83028925

版权

kubelet 专栏收录该内容

15 篇文章 6 订阅

订阅专栏

主要是调用runtime，这里默认为docker

0. 数据流

NewMainKubelet（cmd/kubelet/app/server.go） ->
NewKubeGenericRuntimeManager(pkg/kubelet/kuberuntime/kuberuntime_manager.go) ->

syncPod（pkg/kubelet/kubelet.go） ->
SyncPod（pkg/kubelet/kuberuntime/kuberuntime_manager.go）

1. 数据结构

1.1 接口ContainerManager

管理运行在宿主机上的container，接口定义比较清晰

// Manages the containers running on a machine.
type ContainerManager interface {
	// Runs the container manager's housekeeping.
	// - Ensures that the Docker daemon is in a container.
	// - Creates the system container where all non-containerized processes run.
	Start(*v1.Node, ActivePodsFunc, config.SourcesReady, status.PodStatusProvider, internalapi.RuntimeService) error

	// SystemCgroupsLimit returns resources allocated to system cgroups in the machine.
	// These cgroups include the system and Kubernetes services.
	SystemCgroupsLimit() v1.ResourceList

	// GetNodeConfig returns a NodeConfig that is being used by the container manager.
	GetNodeConfig() NodeConfig

	// Status returns internal Status.
	Status() Status

	// NewPodContainerManager is a factory method which returns a podContainerManager object
	// Returns a noop implementation if qos cgroup hierarchy is not enabled
	NewPodContainerManager() PodContainerManager

	// GetMountedSubsystems returns the mounted cgroup subsystems on the node
	GetMountedSubsystems() *CgroupSubsystems

	// GetQOSContainersInfo returns the names of top level QoS containers
	GetQOSContainersInfo() QOSContainersInfo

	// GetNodeAllocatableReservation returns the amount of compute resources that have to be reserved from scheduling.
	GetNodeAllocatableReservation() v1.ResourceList

	// GetCapacity returns the amount of compute resources tracked by container manager available on the node.
	GetCapacity() v1.ResourceList

	// GetDevicePluginResourceCapacity returns the node capacity (amount of total device plugin resources),
	// node allocatable (amount of total healthy resources reported by device plugin),
	// and inactive device plugin resources previously registered on the node.
	GetDevicePluginResourceCapacity() (v1.ResourceList, v1.ResourceList, []string)

	// UpdateQOSCgroups performs housekeeping updates to ensure that the top
	// level QoS containers have their desired state in a thread-safe way
	UpdateQOSCgroups() error

	// GetResources returns RunContainerOptions with devices, mounts, and env fields populated for
	// extended resources required by container.
	GetResources(pod *v1.Pod, container *v1.Container) (*kubecontainer.RunContainerOptions, error)

	// UpdatePluginResources calls Allocate of device plugin handler for potential
	// requests for device plugin resources, and returns an error if fails.
	// Otherwise, it updates allocatableResource in nodeInfo if necessary,
	// to make sure it is at least equal to the pod's requested capacity for
	// any registered device plugin resource
	UpdatePluginResources(*schedulercache.NodeInfo, *lifecycle.PodAdmitAttributes) error

	InternalContainerLifecycle() InternalContainerLifecycle

	// GetPodCgroupRoot returns the cgroup which contains all pods.
	GetPodCgroupRoot() string

	// GetPluginRegistrationHandler returns a plugin registration handler
	// The pluginwatcher's Handlers allow to have a single module for handling
	// registration.
	GetPluginRegistrationHandler() pluginwatcher.PluginHandler
}

2. NewMainKubelet函数

runtime初始化，调用NewKubeGenericRuntimeManager函数

    runtime, err := kuberuntime.NewKubeGenericRuntimeManager(
        kubecontainer.FilterEventRecorder(kubeDeps.Recorder),
        klet.livenessManager,
        seccompProfileRoot,
        containerRefManager,
        machineInfo,
        klet,
        kubeDeps.OSInterface,
        klet,
        httpClient,
        imageBackOff,
        kubeCfg.SerializeImagePulls,
        float32(kubeCfg.RegistryPullQPS),
        int(kubeCfg.RegistryBurst),
        kubeCfg.CPUCFSQuota,
        kubeCfg.CPUCFSQuotaPeriod,
        runtimeService,
        imageService,
        kubeDeps.ContainerManager.InternalContainerLifecycle(),
        legacyLogProvider,
        klet.runtimeClassManager,
    )

2.1 可以看到containerRuntime为runtime，实现了接口

klet.containerRuntime = runtime
klet.streamingRuntime = runtime
klet.runner = runtime

3. syncPod函数

路径pkg/kubelet/kubelt.go

吧啦吧啦初始化一大堆，最终调用SyncPod，根据klet.containerRuntime = runtime，可以得到kubeGenericRuntimeManager实现了

	// Call the container runtime's SyncPod callback
	result := kl.containerRuntime.SyncPod(pod, apiPodStatus, podStatus, pullSecrets, kl.backOff)
	kl.reasonCache.Update(pod.UID, result)
	if err := result.Error(); err != nil {
		// Do not return error if the only failures were pods in backoff
		for _, r := range result.SyncResults {
			if r.Error != kubecontainer.ErrCrashLoopBackOff && r.Error != images.ErrImagePullBackOff {
				// Do not record an event here, as we keep all event logging for sync pod failures
				// local to container runtime so we get better errors
				return err
			}
		}

		return nil
	}

4. SyncPod函数

路径pkg/kuberuntime/kuberuntime_manager.go

// SyncPod syncs the running pod into the desired pod by executing following steps:
//
//  1. Compute sandbox and container changes.
//  2. Kill pod sandbox if necessary.
//  3. Kill any containers that should not be running.
//  4. Create sandbox if necessary.
//  5. Create init containers.
//  6. Create normal containers.

4.1 Step 1: Compute sandbox and container changes

确定哪些容器要创建，哪些容器要删除，需要删除的放入podContainerChanges.ContainersToKill，需要创建的放入podContainerChanges.ContainersToStart，computePodActions 函数详细分析第 6 章节

	podContainerChanges := m.computePodActions(pod, podStatus)
	glog.V(3).Infof("computePodActions got %+v for pod %q", podContainerChanges, format.Pod(pod))
	if podContainerChanges.CreateSandbox {
		ref, err := ref.GetReference(legacyscheme.Scheme, pod)
		if err != nil {
			glog.Errorf("Couldn't make a ref to pod %q: '%v'", format.Pod(pod), err)
		}
		if podContainerChanges.SandboxID != "" {
			m.recorder.Eventf(ref, v1.EventTypeNormal, events.SandboxChanged, "Pod sandbox changed, it will be killed and re-created.")
		} else {
			glog.V(4).Infof("SyncPod received new pod %q, will create a sandbox for it", format.Pod(pod))
		}
	}

4.2 Step 2: Kill the pod if the sandbox has changed

当沙箱变化的时候，需要重新创建pod

	if podContainerChanges.KillPod {
		if !podContainerChanges.CreateSandbox {
			glog.V(4).Infof("Stopping PodSandbox for %q because all other containers are dead.", format.Pod(pod))
		} else {
			glog.V(4).Infof("Stopping PodSandbox for %q, will start new one", format.Pod(pod))
		}

		killResult := m.killPodWithSyncResult(pod, kubecontainer.ConvertPodStatusToRunningPod(m.runtimeName, podStatus), nil)
		result.AddPodSyncResult(killResult)
		if killResult.Error() != nil {
			glog.Errorf("killPodWithSyncResult failed: %v", killResult.Error())
			return
		}

		if podContainerChanges.CreateSandbox {
			m.purgeInitContainers(pod, podStatus)
		}
	}

4.3 Step 3: kill any running containers in this pod which are not to keep

删除不需要的pod下的容器

		// Step 3: kill any running containers in this pod which are not to keep.
		for containerID, containerInfo := range podContainerChanges.ContainersToKill {
			glog.V(3).Infof("Killing unwanted container %q(id=%q) for pod %q", containerInfo.name, containerID, format.Pod(pod))
			killContainerResult := kubecontainer.NewSyncResult(kubecontainer.KillContainer, containerInfo.name)
			result.AddSyncResult(killContainerResult)
			if err := m.killContainer(pod, containerID, containerInfo.name, containerInfo.message, nil); err != nil {
				killContainerResult.Fail(kubecontainer.ErrKillContainer, err.Error())
				glog.Errorf("killContainer %q(id=%q) for pod %q failed: %v", containerInfo.name, containerID, format.Pod(pod), err)
				return
			}
		}
	}

4.4 Step 4: Create a sandbox for the pod if necessary

创建sandbox（容器标准），調用createPodSandbox，創建pod配置，創建pod log目錄，調用m.runtimeService.RunPodSandbox

	// Step 4: Create a sandbox for the pod if necessary.
	podSandboxID := podContainerChanges.SandboxID
	if podContainerChanges.CreateSandbox {
		var msg string
		var err error

		glog.V(4).Infof("Creating sandbox for pod %q", format.Pod(pod))
		createSandboxResult := kubecontainer.NewSyncResult(kubecontainer.CreatePodSandbox, format.Pod(pod))
		result.AddSyncResult(createSandboxResult)
		podSandboxID, msg, err = m.createPodSandbox(pod, podContainerChanges.Attempt)
		if err != nil {
			createSandboxResult.Fail(kubecontainer.ErrCreatePodSandbox, msg)
			glog.Errorf("createPodSandbox for pod %q failed: %v", format.Pod(pod), err)
			ref, referr := ref.GetReference(legacyscheme.Scheme, pod)
			if referr != nil {
				glog.Errorf("Couldn't make a ref to pod %q: '%v'", format.Pod(pod), referr)
			}
			m.recorder.Eventf(ref, v1.EventTypeWarning, events.FailedCreatePodSandBox, "Failed create pod sandbox: %v", err)
			return
		}
		glog.V(4).Infof("Created PodSandbox %q for pod %q", podSandboxID, format.Pod(pod))

		podSandboxStatus, err := m.runtimeService.PodSandboxStatus(podSandboxID)
		if err != nil {
			ref, referr := ref.GetReference(legacyscheme.Scheme, pod)
			if referr != nil {
				glog.Errorf("Couldn't make a ref to pod %q: '%v'", format.Pod(pod), referr)
			}
			m.recorder.Eventf(ref, v1.EventTypeWarning, events.FailedStatusPodSandBox, "Unable to get pod sandbox status: %v", err)
			glog.Errorf("Failed to get pod sandbox status: %v; Skipping pod %q", err, format.Pod(pod))
			result.Fail(err)
			return
		}

		// If we ever allow updating a pod from non-host-network to
		// host-network, we may use a stale IP.
		if !kubecontainer.IsHostNetworkPod(pod) {
			// Overwrite the podIP passed in the pod status, since we just started the pod sandbox.
			podIP = m.determinePodSandboxIP(pod.Namespace, pod.Name, podSandboxStatus)
			glog.V(4).Infof("Determined the ip %q for pod %q after sandbox changed", podIP, format.Pod(pod))
		}
	}

4.5 Step 5: start the init container

init是为容器做初始化工作的,

	// Step 5: start the init container.
	if container := podContainerChanges.NextInitContainerToStart; container != nil {
		// Start the next init container.
		startContainerResult := kubecontainer.NewSyncResult(kubecontainer.StartContainer, container.Name)
		result.AddSyncResult(startContainerResult)
		isInBackOff, msg, err := m.doBackOff(pod, container, podStatus, backOff)
		if isInBackOff {
			startContainerResult.Fail(err, msg)
			glog.V(4).Infof("Backing Off restarting init container %+v in pod %v", container, format.Pod(pod))
			return
		}

		glog.V(4).Infof("Creating init container %+v in pod %v", container, format.Pod(pod))
		if msg, err := m.startContainer(podSandboxID, podSandboxConfig, container, pod, podStatus, pullSecrets, podIP, kubecontainer.ContainerTypeInit); err != nil {
			startContainerResult.Fail(err, msg)
			utilruntime.HandleError(fmt.Errorf("init container start failed: %v: %s", err, msg))
			return
		}

		// Successfully started the container; clear the entry in the failure
		glog.V(4).Infof("Completed init container %q for pod %q", container.Name, format.Pod(pod))
	}

4.6 Step 6: start containers in podContainerChanges.ContainersToStart

真是真正启动容器进程，调用startContainer第五章节讲解

	// Step 6: start containers in podContainerChanges.ContainersToStart.
	for _, idx := range podContainerChanges.ContainersToStart {
		container := &pod.Spec.Containers[idx]
		startContainerResult := kubecontainer.NewSyncResult(kubecontainer.StartContainer, container.Name)
		result.AddSyncResult(startContainerResult)

		isInBackOff, msg, err := m.doBackOff(pod, container, podStatus, backOff)
		if isInBackOff {
			startContainerResult.Fail(err, msg)
			glog.V(4).Infof("Backing Off restarting container %+v in pod %v", container, format.Pod(pod))
			continue
		}

		glog.V(4).Infof("Creating container %+v in pod %v", container, format.Pod(pod))
		if msg, err := m.startContainer(podSandboxID, podSandboxConfig, container, pod, podStatus, pullSecrets, podIP, kubecontainer.ContainerTypeRegular); err != nil {
			startContainerResult.Fail(err, msg)
			// known errors that are logged in other places are logged at higher levels here to avoid
			// repetitive log spam
			switch {
			case err == images.ErrImagePullBackOff:
				glog.V(3).Infof("container start failed: %v: %s", err, msg)
			default:
				utilruntime.HandleError(fmt.Errorf("container start failed: %v: %s", err, msg))
			}
			continue
		}
	}

5. startContainer函数

路径：pkg/kubelet/kuberuntime/kuberuntime_container.go

5.1 Step 1: pull the image

一看就明白，拉去镜像

	// Step 1: pull the image.
	imageRef, msg, err := m.imagePuller.EnsureImageExists(pod, container, pullSecrets)
	if err != nil {
		m.recordContainerEvent(pod, container, "", v1.EventTypeWarning, events.FailedToCreateContainer, "Error: %v", grpc.ErrorDesc(err))
		return msg, err
	}

5.2 Step 2: create the container

create container，最终调用API使用GRPC连接，相当于docker create操作

	// Step 2: create the container.
	ref, err := kubecontainer.GenerateContainerRef(pod, container)
	if err != nil {
		glog.Errorf("Can't make a ref to pod %q, container %v: %v", format.Pod(pod), container.Name, err)
	}
	glog.V(4).Infof("Generating ref for container %s: %#v", container.Name, ref)

	// For a new container, the RestartCount should be 0
	restartCount := 0
	containerStatus := podStatus.FindContainerStatusByName(container.Name)
	if containerStatus != nil {
		restartCount = containerStatus.RestartCount + 1
	}

	containerConfig, cleanupAction, err := m.generateContainerConfig(container, pod, restartCount, podIP, imageRef, containerType)
	if cleanupAction != nil {
		defer cleanupAction()
	}
	if err != nil {
		m.recordContainerEvent(pod, container, "", v1.EventTypeWarning, events.FailedToCreateContainer, "Error: %v", grpc.ErrorDesc(err))
		return grpc.ErrorDesc(err), ErrCreateContainerConfig
	}

	containerID, err := m.runtimeService.CreateContainer(podSandboxID, containerConfig, podSandboxConfig)
	if err != nil {
		m.recordContainerEvent(pod, container, containerID, v1.EventTypeWarning, events.FailedToCreateContainer, "Error: %v", grpc.ErrorDesc(err))
		return grpc.ErrorDesc(err), ErrCreateContainer
	}

5.3 Step 3: start the container

相当于docker start这种操作

	// Step 3: start the container.
	err = m.runtimeService.StartContainer(containerID)
	if err != nil {
		m.recordContainerEvent(pod, container, containerID, v1.EventTypeWarning, events.FailedToStartContainer, "Error: %v", grpc.ErrorDesc(err))
		return grpc.ErrorDesc(err), kubecontainer.ErrRunContainer
	}
	m.recordContainerEvent(pod, container, containerID, v1.EventTypeNormal, events.StartedContainer, "Started container")

	// Symlink container logs to the legacy container log location for cluster logging
	// support.
	// TODO(random-liu): Remove this after cluster logging supports CRI container log path.
	containerMeta := containerConfig.GetMetadata()
	sandboxMeta := podSandboxConfig.GetMetadata()
	legacySymlink := legacyLogSymlink(containerID, containerMeta.Name, sandboxMeta.Name,
		sandboxMeta.Namespace)
	containerLog := filepath.Join(podSandboxConfig.LogDirectory, containerConfig.LogPath)
	// only create legacy symlink if containerLog path exists (or the error is not IsNotExist).
	// Because if containerLog path does not exist, only dandling legacySymlink is created.
	// This dangling legacySymlink is later removed by container gc, so it does not make sense
	// to create it in the first place. it happens when journald logging driver is used with docker.
	if _, err := m.osInterface.Stat(containerLog); !os.IsNotExist(err) {
		if err := m.osInterface.Symlink(containerLog, legacySymlink); err != nil {
			glog.Errorf("Failed to create legacy symbolic link %q to container %q log %q: %v",
				legacySymlink, containerID, containerLog, err)
		}
	}

++++ 2020.03.06 主要是需求 Pod 原地升级方案，所以重点看了这个函数

6. computePodActions 函数

computePodActions 函数主要检查 pod 的 spec 是否发生变化，主要函数在 podSandboxChanged 分析在 6.1 章节，看看具体怎么检查的

// computePodActions checks whether the pod spec has changed and returns the changes if true.
func (m *kubeGenericRuntimeManager) computePodActions(pod *v1.Pod, podStatus *kubecontainer.PodStatus) podActions {
	klog.V(5).Infof("Syncing Pod %q: %+v", format.Pod(pod), pod)

	createPodSandbox, attempt, sandboxID := m.podSandboxChanged(pod, podStatus)
	changes := podActions{
		KillPod:           createPodSandbox,
		CreateSandbox:     createPodSandbox,
		SandboxID:         sandboxID,
		Attempt:           attempt,
		ContainersToStart: []int{},
		ContainersToKill:  make(map[kubecontainer.ContainerID]containerToKillInfo),
	}

6.1 podSandboxChanged 函数

如果没有 SandboxStatus 就认为没有 sandbox

// podSandboxChanged checks whether the spec of the pod is changed and returns
// (changed, new attempt, original sandboxID if exist).
func (m *kubeGenericRuntimeManager) podSandboxChanged(pod *v1.Pod, podStatus *kubecontainer.PodStatus) (bool, uint32, string) {
	if len(podStatus.SandboxStatuses) == 0 {
		klog.V(2).Infof("No sandbox for pod %q can be found. Need to start a new one", format.Pod(pod))
		return true, 0, ""
	}

看看 PodStatus 结构体定义，包括容器状态，与 sandbox 状态，容器状态主要的是 ID State ExitCode Hash

sandbox 状态主要是 Id State Network

// PodStatus represents the status of the pod and its containers.
// v1.PodStatus can be derived from examining PodStatus and v1.Pod.
type PodStatus struct {
	// ID of the pod.
	ID types.UID
	// Name of the pod.
	Name string
	// Namespace of the pod.
	Namespace string
	// IP of the pod.
	IP string
	// Status of containers in the pod.
	ContainerStatuses []*ContainerStatus
	// Status of the pod sandbox.
	// Only for kuberuntime now, other runtime may keep it nil.
	SandboxStatuses []*runtimeapi.PodSandboxStatus
}

6.1.1 根据 sandbox 的状态，统计多少个运行的 sandbox

如果多于 1 个运行的 sandbox 则需要进行调整操作，返回 true，如果第一个 sandbox 不是 ready 的也需要重建

readySandboxCount := 0
for _, s := range podStatus.SandboxStatuses {
	if s.State == runtimeapi.PodSandboxState_SANDBOX_READY {
		readySandboxCount++
	}
}
// Needs to create a new sandbox when readySandboxCount > 1 or the ready sandbox is not the latest one.
sandboxStatus := podStatus.SandboxStatuses[0]
if readySandboxCount > 1 {
	klog.V(2).Infof("More than 1 sandboxes for pod %q are ready. Need to reconcile them", format.Pod(pod))
	return true, sandboxStatus.Metadata.Attempt + 1, sandboxStatus.Id
}
if sandboxStatus.State != runtimeapi.PodSandboxState_SANDBOX_READY {
	klog.V(2).Infof("No ready sandbox for pod %q can be found. Need to start a new one", format.Pod(pod))
	return true, sandboxStatus.Metadata.Attempt + 1, sandboxStatus.Id
}

6.1.2 如果 sandbox 网络与 pod 网络不一致，sandbox 网络未获得 IP 需要重新创建 sandbox

// Needs to create a new sandbox when network namespace changed.
if sandboxStatus.GetLinux().GetNamespaces().GetOptions().GetNetwork() != networkNamespaceForPod(pod) {
	klog.V(2).Infof("Sandbox for pod %q has changed. Need to start a new one", format.Pod(pod))
	return true, sandboxStatus.Metadata.Attempt + 1, ""
}

// Needs to create a new sandbox when the sandbox does not have an IP address.
if !kubecontainer.IsHostNetworkPod(pod) && sandboxStatus.Network.Ip == "" {
	klog.V(2).Infof("Sandbox for pod %q has no IP address.  Need to start a new one", format.Pod(pod))
	return true, sandboxStatus.Metadata.Attempt + 1, sandboxStatus.Id
}

总结一下哪些情况需要重新 new sandbox 的，其他情况就可以实行原地升级容器

未发现 sandbox 状态的，则认为没有 sandbox 需要重新创建
多于 1 个 sandbox ready 的情况需要重新创建
根据第一个 sandbox 不是 ready 的情况需要重新创建
sandbox 状态中的网络namespace 发生了变化需要重新创建
pod 不是 hostnetwork 模式，sandbox 未获得IP 需要重新创建

6.2 实例化 podActions 对象

关键都在这个结果 podActions，以这个为依据具体，是否需要重建 sandbox 需要重建 container 等

KillPod 和 CreateSandbox 如果都为 true，则就是需要先删除 pod，在重新创建

ContainersToStart 需要重新启动的容器

ContainersToKill 需要重新删除的容器

changes := podActions{
	KillPod:           createPodSandbox,
	CreateSandbox:     createPodSandbox,
	SandboxID:         sandboxID,
	Attempt:           attempt,
	ContainersToStart: []int{},
	ContainersToKill:  make(map[kubecontainer.ContainerID]containerToKillInfo),
}

6.3 如果需要重新创建 sandbox 的情况

// If we need to (re-)create the pod sandbox, everything will need to be
// killed and recreated, and init containers should be purged.
if createPodSandbox {
	if !shouldRestartOnFailure(pod) && attempt != 0 && len(podStatus.ContainerStatuses) != 0 {
		// Should not restart the pod, just return.
		// we should not create a sandbox for a pod if it is already done.
		// if all containers are done and should not be started, there is no need to create a new sandbox.
		// this stops confusing logs on pods whose containers all have exit codes, but we recreate a sandbox before terminating it.
		//
		// If ContainerStatuses is empty, we assume that we've never
		// successfully created any containers. In this case, we should
		// retry creating the sandbox.
		changes.CreateSandbox = false
		return changes
	}
	if len(pod.Spec.InitContainers) != 0 {
		// Pod has init containers, return the first one.
		changes.NextInitContainerToStart = &pod.Spec.InitContainers[0]
		return changes
	}
	// Start all containers by default but exclude the ones that succeeded if
	// RestartPolicy is OnFailure.
	for idx, c := range pod.Spec.Containers {
		if containerSucceeded(&c, podStatus) && pod.Spec.RestartPolicy == v1.RestartPolicyOnFailure {
			continue
		}
		changes.ContainersToStart = append(changes.ContainersToStart, idx)
	}
	return changes
}

6.4 如下也是逻辑处理的地方

处理所有 pod 中的容器，如果容器的状态不是 running 的情况，调用 PostStopContainer 函数主要是释放 CPU

// Number of running containers to keep.
keepCount := 0
// check the status of containers.
for idx, container := range pod.Spec.Containers {
	containerStatus := podStatus.FindContainerStatusByName(container.Name)

	// Call internal container post-stop lifecycle hook for any non-running container so that any
	// allocated cpus are released immediately. If the container is restarted, cpus will be re-allocated
	// to it.
	if containerStatus != nil && containerStatus.State != kubecontainer.ContainerStateRunning {
		if err := m.internalLifecycle.PostStopContainer(containerStatus.ID.ID); err != nil {
			klog.Errorf("internal container post-stop lifecycle hook failed for container %v in pod %v with error %v",
				container.Name, pod.Name, err)
		}
	}

6.4.1 容器不存在或者容器不是running情况

// If container does not exist, or is not running, check whether we
// need to restart it.
if containerStatus == nil || containerStatus.State != kubecontainer.ContainerStateRunning {
	if kubecontainer.ShouldContainerBeRestarted(&container, pod, podStatus) {
		message := fmt.Sprintf("Container %+v is dead, but RestartPolicy says that we should restart it.", container)
		klog.V(3).Infof(message)
		changes.ContainersToStart = append(changes.ContainersToStart, idx)
		if containerStatus != nil && containerStatus.State == kubecontainer.ContainerStateUnknown {
			// If container is in unknown state, we don't know whether it
			// is actually running or not, always try killing it before
			// restart to avoid having 2 running instances of the same container.
			changes.ContainersToKill[containerStatus.ID] = containerToKillInfo{
				name:      containerStatus.Name,
				container: &pod.Spec.Containers[idx],
				message: fmt.Sprintf("Container is in %q state, try killing it before restart",
					containerStatus.State),
			}
		}
	}
	continue
}

6.4.1.1 ShouldContainerBeRestarted 函数

需要容器不重启的情况

容器状态是 running 的情况
容器的重启策略是 never
容器的重启策略是 onfailure 器 exitCode = 0

除了上述三种情况，不重启的情况

容器没有状态的情况
容器状态是 unknown 或者 created 状态

6.4.1.2 容器是 running 的情况，配置是否变化的情况

containerChanged 函数主要是根据 container 结构计算 hash 是否变化了

// The container is running, but kill the container if any of the following condition is met.
var message string
restart := shouldRestartOnFailure(pod)
if _, _, changed := containerChanged(&container, containerStatus); changed {
	message = fmt.Sprintf("Container %s definition changed", container.Name)
	// Restart regardless of the restart policy because the container
	// spec changed.
	restart = true
} else if liveness, found := m.livenessManager.Get(containerStatus.ID); found && liveness == proberesults.Failure {
	// If the container failed the liveness probe, we should kill it.
	message = fmt.Sprintf("Container %s failed liveness probe", container.Name)
} else {
	// Keep the container.
	keepCount++
	continue
}

张忠琳

关注

2
点赞
踩
4

收藏

觉得还不错? 一键收藏
0
评论
【kubernetes/k8s源码分析】kubelet源码分析之启动容器

主要是调用runtime，这里默认为docker0. 数据流NewMainKubelet（cmd/kubelet/app/server.go） ->NewKubeGenericRuntimeManager(pkg/kubelet/kuberuntime/kuberuntime_manager.go) ->syncPod（pkg/kubelet/kubelet.go） ...
复制链接

扫一扫