【containerd 源码分析】containerd cri PodRunSandbox 源码分析之二


    criService 实现了接口 runtime.RuntimeServiceServer


1. RunPodSandbox 函数

    路径 pkg/server/sandbox_run.go,创建以及启动 sandbox,确认成功是 sandbox 状态为 ready

// RunPodSandbox creates and starts a pod-level sandbox. Runtimes should ensure
// the sandbox is in ready state.
func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandboxRequest) (_ *runtime.RunPodSandboxResponse, retErr error) {
	config := r.GetConfig()
	log.G(ctx).Debugf("Sandbox config %+v", config)

    1.1 生成 ID,生成 name,注册 name <--> key 映射关系,防治并行创建

// Generate unique id and name for the sandbox and reserve the name.
id := util.GenerateID()
metadata := config.GetMetadata()
if metadata == nil {
	return nil, errors.New("sandbox config must include metadata")
name := makeSandboxName(metadata)
log.G(ctx).Debugf("Generated id %q for sandbox %q", id, name)
// Reserve the sandbox name to avoid concurrent `RunPodSandbox` request starting the
// same sandbox.
if err := c.sandboxNameIndex.Reserve(name, id); err != nil {
	return nil, errors.Wrapf(err, "failed to reserve sandbox name %q", name)

    1.2 实例化 Sandbox,初始状态为 unknown

// Create initial internal sandbox object.
sandbox := sandboxstore.NewSandbox(
		ID:             id,
		Name:           name,
		Config:         config,
		RuntimeHandler: r.GetRuntimeHandler(),
		State: sandboxstore.StateUnknown,

    1.3 确保有镜像,如果没有镜像则 pull 镜像

// Ensure sandbox container image snapshot.
image, err := c.ensureImageExists(ctx, c.config.SandboxImage, config)
if err != nil {
	return nil, errors.Wrapf(err, "failed to get sandbox image %q", c.config.SandboxImage)
containerdImage, err := c.toContainerdImage(ctx, *image)
if err != nil {
	return nil, errors.Wrapf(err, "failed to get image from containerd %q", image.ID)

    1.4 获取 sandbox runtime

     注解 io.kubernetes.cri.untrusted-workload = true,设置这个 untrusted 返回 untrusted runtime,否则返回默认 runtime io.containerd.runc.v1

      snapshotter = "overlayfs"
      default_runtime_name = "runc"
      no_pivot = false
        runtime_type = ""
        runtime_engine = ""
        runtime_root = ""
        privileged_without_host_devices = false
        runtime_type = ""
        runtime_engine = ""
        runtime_root = ""
        privileged_without_host_devices = false
          runtime_type = "io.containerd.runc.v1"
          runtime_engine = ""
          runtime_root = ""
          privileged_without_host_devices = false

// getSandboxRuntime returns the runtime configuration for sandbox.
// If the sandbox contains untrusted workload, runtime for untrusted workload will be returned,
// or else default runtime will be returned.
func (c *criService) getSandboxRuntime(config *runtime.PodSandboxConfig, runtimeHandler string) (criconfig.Runtime, error) {
	if untrustedWorkload(config) {
		// If the untrusted annotation is provided, runtimeHandler MUST be empty.
		if runtimeHandler != "" && runtimeHandler != criconfig.RuntimeUntrusted {
			return criconfig.Runtime{}, errors.New("untrusted workload with explicit runtime handler is not allowed")

		//  If the untrusted workload is requesting access to the host/node, this request will fail.
		//  Note: If the workload is marked untrusted but requests privileged, this can be granted, as the
		// runtime may support this.  For example, in a virtual-machine isolated runtime, privileged
		// is a supported option, granting the workload to access the entire guest VM instead of host.
		// TODO(windows): Deprecate this so that we don't need to handle it for windows.
		if hostAccessingSandbox(config) {
			return criconfig.Runtime{}, errors.New("untrusted workload with host access is not allowed")

		runtimeHandler = criconfig.RuntimeUntrusted

	if runtimeHandler == "" {
		runtimeHandler = c.config.ContainerdConfig.DefaultRuntimeName

	handler, ok := c.config.ContainerdConfig.Runtimes[runtimeHandler]
	if !ok {
		return criconfig.Runtime{}, errors.Errorf("no runtime for %q is configured", runtimeHandler)
	return handler, nil

    1.5 需要为 pod 设置网络

       如果不是 host 网络模式,需要创建 namespace    

       NewNetNS 创建网络 namespace,在目录 /var/run/netns/cni-%x-%x-%x-%x-%x

if podNetwork {
	// If it is not in host network namespace then create a namespace and set the sandbox
	// handle. NetNSPath in sandbox metadata and NetNS is non empty only for non host network
	// namespaces. If the pod is in host network namespace then both are empty and should not
	// be used.
	sandbox.NetNS, err = netns.NewNetNS()
	if err != nil {
		return nil, errors.Wrapf(err, "failed to create network namespace for sandbox %q", id)
	sandbox.NetNSPath = sandbox.NetNS.GetPath()


2. setupPodNetwork 为 sandbox 创建网络

      整理传给 CNI 插件的配置,包括 sandbox ID,网络 namespace,以及基本配置,如果包括 bandwidth,dns

// setupPodNetwork setups up the network for a pod
func (c *criService) setupPodNetwork(ctx context.Context, sandbox *sandboxstore.Sandbox) error {
	var (
		id     = sandbox.ID
		config = sandbox.Config
		path   = sandbox.NetNSPath
	if c.netPlugin == nil {
		return errors.New("cni config not initialized")

	opts, err := cniNamespaceOpts(id, config)
	if err != nil {
		return errors.Wrap(err, "get cni namespace options")

    2..1 netPlugin.Setup 最终调用 AddNetworkList CNI 插件接口为 sandbox 配置网络

     最终调用 plugin 二进制为 sandbox 配置网络


result, err := c.netPlugin.Setup(ctx, id, path, opts...)
if err != nil {
	return err
logDebugCNIResult(ctx, id, result)
// Check if the default interface has IP config
if configs, ok := result.Interfaces[defaultIfName]; ok && len(configs.IPConfigs) > 0 {
	sandbox.IP, sandbox.AdditionalIPs = selectPodIPs(configs.IPConfigs)
	sandbox.CNIResult = result
	return nil


3. 生成 runtime spec 配置

    可以使用 crictl pods,crictl inspectp $id 查看配置

func (c *criService) sandboxContainerSpec(id string, config *runtime.PodSandboxConfig,
	imageConfig *imagespec.ImageConfig, nsPath string, runtimePodAnnotations []string) (*runtimespec.Spec, error) {
	// Creates a spec Generator with the default spec.
	// TODO(random-liu): [P1] Compare the default settings with docker and containerd default.
	specOpts := []oci.SpecOpts{
	if imageConfig.WorkingDir != "" {
		specOpts = append(specOpts, oci.WithProcessCwd(imageConfig.WorkingDir))

    3.1 label 的类型为 sandbox

// Generate spec options that will be applied to the spec later.
specOpts, err := c.sandboxContainerSpecOpts(config, &image.ImageSpec.Config)
if err != nil {
	return nil, errors.Wrap(err, "failed to generate sanbdox container spec options")

sandboxLabels := buildLabels(config.Labels, containerKindSandbox)


4. 存储 sandbox 信息,创建 root 工作目录

	container, err := c.client.NewContainer(ctx, id, opts...)
	if err != nil {
		return nil, errors.Wrap(err, "failed to create containerd container")

	// Create sandbox container root directories.
	sandboxRootDir := c.getSandboxRootDir(id)
	if err := c.os.MkdirAll(sandboxRootDir, 0755); err != nil {
		return nil, errors.Wrapf(err, "failed to create sandbox root directory %q",

    4.1 setupSandboxFiles 主要创建 hostname resolv.conf hosts 等文件

// Setup files required for the sandbox.
if err = c.setupSandboxFiles(id, config); err != nil {
	return nil, errors.Wrapf(err, "failed to setup sandbox files")


5. 创建 sandbox 任务

     这个其实最终是发送 task 请求,分别为 CreateTaskRequest,StartRequest,创建以及启动任务

taskOpts := c.taskOpts(ociRuntime.Type)
// We don't need stdio for sandbox container.
task, err := container.NewTask(ctx, containerdio.NullIO, taskOpts...)
if err != nil {
	return nil, errors.Wrap(err, "failed to create containerd task")

// wait is a long running background request, no timeout needed.
exitCh, err := task.Wait(ctrdutil.NamespacedContext())
if err != nil {
	return nil, errors.Wrap(err, "failed to wait for sandbox container task")

if err := task.Start(ctx); err != nil {
	return nil, errors.Wrapf(err, "failed to start sandbox container task %q", id)

    5.1 比如使用默认 tasks-service  io.containerd.service.v1

func (l *local) Create(ctx context.Context, r *api.CreateTaskRequest, _ ...grpc.CallOption) (*api.CreateTaskResponse, error) {
	container, err := l.getContainer(ctx, r.ContainerID)
	if err != nil {
		return nil, errdefs.ToGRPC(err)
	checkpointPath, err := getRestorePath(container.Runtime.Name, r.Options)
	if err != nil {
		return nil, err

    5.2 比如 io.containerd.runc.v1

     实现路径为 contaienrd/runtime/v1/runtime.go

// Create a new task
func (r *Runtime) Create(ctx context.Context, id string, opts runtime.CreateOpts) (_ runtime.Task, err error) {
	namespace, err := namespaces.NamespaceRequired(ctx)
	if err != nil {
		return nil, err

	if err := identifiers.Validate(id); err != nil {
		return nil, errors.Wrapf(err, "invalid task id")

	ropts, err := r.getRuncOptions(ctx, id)
	if err != nil {
		return nil, err

     启动 shim 进程

     /usr/bin/containerd-shim-runc-v1 -namespace k8s.io -id d84185af26fcc146b4787ed08543c49d327bb97171ed6b669618f9793a8545fc -address /run/containerd/containerd.sock

shimopt := ShimLocal(r.config, r.events)
if !r.config.NoShim {
	var cgroup string
	if opts.TaskOptions != nil {
		v, err := typeurl.UnmarshalAny(opts.TaskOptions)
		if err != nil {
			return nil, err
		cgroup = v.(*runctypes.CreateOptions).ShimCgroup
	exitHandler := func() {
		log.G(ctx).WithField("id", id).Info("shim reaped")

		if _, err := r.tasks.Get(ctx, id); err != nil {
			// Task was never started or was already successfully deleted

		if err = r.cleanupAfterDeadShim(context.Background(), bundle, namespace, id); err != nil {
				"id":        id,
				"namespace": namespace,
			}).Warn("failed to clean up after killed shim")
	shimopt = ShimRemote(r.config, r.address, cgroup, exitHandler)

    与 shim 建立GRPC 连接,发送 CreateTaskRequest  

sopts := &shim.CreateTaskRequest{
	ID:         id,
	Bundle:     bundle.path,
	Runtime:    rt,
	Stdin:      opts.IO.Stdin,
	Stdout:     opts.IO.Stdout,
	Stderr:     opts.IO.Stderr,
	Terminal:   opts.IO.Terminal,
	Checkpoint: opts.Checkpoint,
	Options:    opts.TaskOptions,
for _, m := range opts.Rootfs {
	sopts.Rootfs = append(sopts.Rootfs, &types.Mount{
		Type:    m.Type,
		Source:  m.Source,
		Options: m.Options,
cr, err := s.Create(ctx, sopts)
if err != nil {
	return nil, errdefs.FromGRPC(err)

    startTaskRequest 一样的流程


6. 更新 sandbox 状态为 ready

if err := sandbox.Status.Update(func(status sandboxstore.Status) (sandboxstore.Status, error) {
	// Set the pod sandbox as ready after successfully start sandbox container.
	status.Pid = task.Pid()
	status.State = sandboxstore.StateReady
	status.CreatedAt = info.CreatedAt
	return status, nil
}); err != nil {
	return nil, errors.Wrap(err, "failed to update sandbox status")



    RunPodSandbox 获取配置,生成 ID,name 注册 name <--> 映射关系,防止重复并发创建

    确保 image 本地节点存在,不存在册 pull image

    获取 runtime,根据 pod 注解,以及配置文件,如果 untrusted 则返回该 runtime,否则返回默认 runtime

    为 sandbox 创建网络,与 docker-shim 不一样的是这个先创建网络

    生成 spec 配置

    发送 GRPC 创建以及启动请求,成功将 sandbox 状态改为 ready

  • 3
  • 2
    觉得还不错? 一键收藏
  • 2
containerd是一个用于管理Linux容器的开源守护程序,它在Kubernetes等容器编排系统中扮演着非常重要的角色。而CRIContainer Runtime Interface)是用于与Kubernetes API交互的标准化接口,与容器运行时进行通信。 要安装containerd CRI,我们需要按照以下步骤进行操作: 1. 安装依赖项:首先,我们需要安装一些依赖项,包括操作系统所需的软件包和工具。这包括golang的安装,以及编译containerd所需的build-essential和git等软件包。 2. 下载containerd源代码:接下来,我们需要从containerd的GitHub仓库中下载源代码。可以使用git命令克隆仓库或者下载源代码的压缩包。 3. 构建和安装containerd:进入containerd源码的根目录,执行make命令进行构建。构建完成后,可以使用make install命令将containerd安装到系统中。安装完成后,可以使用containerd命令进行验证。 4. 配置containerd:在安装containerd之后,我们需要进行一些配置。可以通过编辑containerd的配置文件,通常位于/etc/containerd/config.toml,来进行配置。该文件包含了containerd的各种配置选项,如默认的Runtime类型和镜像存储位置等。 5. 配置CRI使用containerd:接下来,我们需要配置Kubernetes使用containerd作为其CRI。可以通过编辑kubelet的配置文件,通常位于/etc/kubernetes/kubelet.conf,来进行配置。在该配置文件中,可以指定containerd的地址和其他相关选项。 6. 重启服务并验证:完成上述配置后,需要重启kubelet和containerd服务。重启后,可以使用kubectl命令验证Kubernetes是否成功使用containerd作为其CRI。 通过以上步骤,我们可以完成containerd CRI的安装和配置。这将使得Kubernetes能够正常工作并管理容器的生命周期。同时,containerd作为高效、稳定和可扩展的容器运行时,也能够提供更好的容器管理和资源利用效率。


  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
评论 2




当前余额3.43前往充值 >
领取后你会自动成为博主和红包主的粉丝 规则
钱包余额 0


