kubevirt-controller 代码解析(1)

kubevirt-controller Execute入口代码解析

func Execute() {
	var err error
	# 生成app 
	var app = VirtControllerApp{}
	# 获取leader选举的默认规则 
	app.LeaderElection = leaderelectionconfig.DefaultLeaderElectionConfiguration()

	# 解析参数
	service.Setup(&app)

	# readyChan赋值
	app.readyChan = make(chan bool, 1)

	# 初步化日志
	log.InitializeLogging("virt-controller")
	
	# 限制访问速度?这里确实就会有flowcontroller的概念了
	app.reloadableRateLimiter = ratelimiter.NewReloadableRateLimiter(flowcontrol.NewTokenBucketRateLimiter(virtconfig.DefaultVirtControllerQPS, virtconfig.DefaultVirtControllerBurst))
	
	# 为 KubeVirt 客户端注册 REST 配置钩子,以便在构建其配置之前添加监控功能。具体来说,它设置了资源监控和 HTTP 请求的监控,以便收集和报告相关的性能指标。( from copilot)
	clientmetrics.RegisterRestConfigHooks()
	
	# 先获取一个配置,关于k8s的访问的client的配置
	clientConfig, err := kubecli.GetKubevirtClientConfig()
	if err != nil {
		panic(err)
	}
	
	# 然后将这个k8s的clientConfig的RateLimiter等于自己的配置。
	clientConfig.RateLimiter = app.reloadableRateLimiter

	# 根据config获取到一个clientSet,可以理解为client集的意思。
	app.clientSet, err = kubecli.GetKubevirtClientFromRESTConfig(clientConfig)
	if err != nil {
		golog.Fatal(err)
	}
	
	# 到这里时,才是真正的生成client
	app.restClient = app.clientSet.RestClient()

	// Bootstrapping. From here on the initialization order is important
	app.kubevirtNamespace, err = clientutil.GetNamespace()
	if err != nil {
		golog.Fatalf("Error searching for namespace: %v", err)
	}
	
	# 设置controller的host
	host, err := os.Hostname()
	if err != nil {
		golog.Fatalf("unable to get hostname: %v", err)
	}
	app.host = host

	# 生成上下文, 在openstack中也有上下文。一般情况下,context更多的包含的是一些大家都用到的配置项。
	ctx, cancel := context.WithCancel(context.Background())
	stopChan := ctx.Done()
	app.ctx = ctx

	# 创建一个KubeInformerFactory,用于监视和缓存k8s资源的状态变化 。
	app.informerFactory = controller.NewKubeInformerFactory(app.restClient, app.clientSet, nil, app.kubevirtNamespace)

	# 获取到k8s的crd资源. informerFactory里面是用的k8s的cache机制来实现的。 
	app.crdInformer = app.informerFactory.CRD()
	# 获取到k8s的kubevirt信息
	app.kubeVirtInformer = app.informerFactory.KubeVirt()

	# 不知道作用是什么
	if err := app.kubeVirtInformer.SetWatchErrorHandler(func(r *cache.Reflector, err error) {
		apiHealthVersion.Clear()
		cache.DefaultWatchErrorHandler(r, err)
	}); err != nil {
		golog.Fatalf("failed to set the watch error handler: %v", err)
	}
	app.informerFactory.Start(stopChan)

	# 现在就更新一下crdInformer
	cache.WaitForCacheSync(stopChan, app.crdInformer.HasSynced, app.kubeVirtInformer.HasSynced)
	
	# 获取到clusterConfig, 集群配置
	app.clusterConfig, err = virtconfig.NewClusterConfig(app.crdInformer, app.kubeVirtInformer, app.kubevirtNamespace)
	if err != nil {
		panic(err)
	}

	app.reInitChan = make(chan string, 10)
	# 判断集群的datavolume资源
	app.hasCDI = app.clusterConfig.HasDataVolumeAPI()
	# 设置几个回调函数 
	app.clusterConfig.SetConfigModifiedCallback(app.configModificationCallback)
	app.clusterConfig.SetConfigModifiedCallback(app.shouldChangeLogVerbosity)
	app.clusterConfig.SetConfigModifiedCallback(app.shouldChangeRateLimiter)

	# 设置一些基本的restful service 
	webService := new(restful.WebService)
	webService.Path("/").Consumes(restful.MIME_JSON).Produces(restful.MIME_JSON)
	webService.Route(webService.GET("/healthz").To(healthz.KubeConnectionHealthzFuncFactory(app.clusterConfig, apiHealthVersion)).Doc("Health endpoint"))
	webService.Route(webService.GET("/leader").To(app.leaderProbe).Doc("Leader endpoint"))

	componentProfiler := profiler.NewProfileManager(app.clusterConfig)
	webService.Route(webService.GET("/start-profiler").To(componentProfiler.HandleStartProfiler).Doc("start profiler endpoint"))
	webService.Route(webService.GET("/stop-profiler").To(componentProfiler.HandleStopProfiler).Doc("stop profiler endpoint"))
	webService.Route(webService.GET("/dump-profiler").To(componentProfiler.HandleDumpProfiler).Doc("dump profiler results endpoint"))

	restful.Add(webService)
	# 获取到一堆的信息
	app.vmiInformer = app.informerFactory.VMI()
	app.kvPodInformer = app.informerFactory.KubeVirtPod()
	app.nodeInformer = app.informerFactory.KubeVirtNode()
	app.namespaceStore = app.informerFactory.Namespace().GetStore()
	app.namespaceInformer = app.informerFactory.Namespace()
	app.vmiCache = app.vmiInformer.GetStore()
	app.vmiRecorder = app.newRecorder(k8sv1.NamespaceAll, "virtualmachine-controller")

	app.rsInformer = app.informerFactory.VMIReplicaSet()
	app.poolInformer = app.informerFactory.VMPool()

	app.persistentVolumeClaimInformer = app.informerFactory.PersistentVolumeClaim()
	app.persistentVolumeClaimCache = app.persistentVolumeClaimInformer.GetStore()

	app.pdbInformer = app.informerFactory.K8SInformerFactory().Policy().V1().PodDisruptionBudgets().Informer()

	app.vmInformer = app.informerFactory.VirtualMachine()

	app.migrationInformer = app.informerFactory.VirtualMachineInstanceMigration()

	app.controllerRevisionInformer = app.informerFactory.ControllerRevision()

	app.vmExportInformer = app.informerFactory.VirtualMachineExport()
	app.vmSnapshotInformer = app.informerFactory.VirtualMachineSnapshot()
	app.vmSnapshotContentInformer = app.informerFactory.VirtualMachineSnapshotContent()
	app.vmRestoreInformer = app.informerFactory.VirtualMachineRestore()
	app.storageClassInformer = app.informerFactory.StorageClass()
	app.caExportConfigMapInformer = app.informerFactory.KubeVirtExportCAConfigMap()
	app.exportRouteConfigMapInformer = app.informerFactory.ExportRouteConfigMap()
	app.unmanagedSecretInformer = app.informerFactory.UnmanagedSecrets()
	app.allPodInformer = app.informerFactory.Pod()
	app.exportServiceInformer = app.informerFactory.ExportService()
	app.resourceQuotaInformer = app.informerFactory.ResourceQuota()

	# 如果k8s中配置了datavolume服务,那么设置cdi
	if app.hasCDI {
		app.dataVolumeInformer = app.informerFactory.DataVolume()
		app.cdiInformer = app.informerFactory.CDI()
		app.cdiConfigInformer = app.informerFactory.CDIConfig()
		app.dataSourceInformer = app.informerFactory.DataSource()
		app.storageProfileInformer = app.informerFactory.StorageProfile()
		log.Log.Infof("CDI detected, DataVolume integration enabled")
	} else {
		# 否则的话就是个dummy(fake)的cdi实现。
		// Add a dummy DataVolume informer in the event datavolume support
		// is disabled. This lets the controller continue to work without
		// requiring a separate branching code path.
		app.dataVolumeInformer = app.informerFactory.DummyDataVolume()
		app.cdiInformer = app.informerFactory.DummyCDI()
		app.cdiConfigInformer = app.informerFactory.DummyCDIConfig()
		app.dataSourceInformer = app.informerFactory.DummyDataSource()
		app.storageProfileInformer = app.informerFactory.DummyStorageProfile()
		log.Log.Infof("CDI not detected, DataVolume integration disabled")
	}

	# 看看是不是redhat的openshift环境。 
	onOpenShift, err := clusterutil.IsOnOpenShift(app.clientSet)
	if err != nil {
		golog.Fatalf("Error determining cluster type: %v", err)
	}
	# 应该是openshift定制了一个operatorRoute这样的东西。
	if onOpenShift {
		log.Log.Info("we are on openshift")
		app.routeCache = app.informerFactory.OperatorRoute().GetStore()
	} else {
		log.Log.Info("we are on kubernetes")
		app.routeCache = app.informerFactory.DummyOperatorRoute().GetStore()
	}
	# 其它的配置
	app.ingressCache = app.informerFactory.Ingress().GetStore()
	app.migrationPolicyInformer = app.informerFactory.MigrationPolicy()

	app.vmCloneInformer = app.informerFactory.VirtualMachineClone()

	app.instancetypeInformer = app.informerFactory.VirtualMachineInstancetype()
	app.clusterInstancetypeInformer = app.informerFactory.VirtualMachineClusterInstancetype()
	app.preferenceInformer = app.informerFactory.VirtualMachinePreference()
	app.clusterPreferenceInformer = app.informerFactory.VirtualMachineClusterPreference()

	app.onOpenshift = onOpenShift

	if err := metrics.SetupMetrics(
		app.vmInformer,
		app.vmiInformer,
		app.clusterInstancetypeInformer,
		app.instancetypeInformer,
		app.clusterPreferenceInformer,
		app.preferenceInformer,
		app.migrationInformer,
		app.clusterConfig,
	); err != nil {
		golog.Fatal(err)
	}

	# 直到现在才是真正的初始化阶段,通用配置、replica配置
	app.initCommon()
	app.initReplicaSet()
	app.initPool()
	# 这个比较重要,这里就是virtualMachines的入口了 
	app.initVirtualMachines()
	app.initDisruptionBudgetController()
	app.initEvacuationController()
	app.initSnapshotController()
	app.initRestoreController()
	app.initExportController()
	app.initWorkloadUpdaterController()
	app.initCloneController()
	go app.Run()

	<-app.reInitChan
	cancel()
}

controller分解

context机制

上下文机制k8s和openstack均有实现,在一个请求的生命周期内,context会在各个函数之间来回的传递。 但有没有是一回事,用不用是另一回事。 在openstack中,context通常会带着认证和req_id到处乱跑,但k8s的context并没有看有这个机制的实现。

cache机制

我们通常说获取到其它资源时,有api、rpc两种方法,这在交互要求少或者不在乎速度的系统里,这两种方法是可行的。但这如果我要求请求快呢?redis、memcache等上吧!反正它们性能足够。

但是但是,这些东西必然是通过网络的,通过网络就会有信息传递的延迟、中断等不可预知的东西出现 。 那么k8s就使用了一种cache机制,cache会不断的watch着资源的变化 ,当有变化时,它就会cache到本机。

也就是说,它还是通过api等来进行资源的监视和获取,只不过是存储到了本地, 本地服务只需要不断的watch cache就行。

openstack中很少有这种cache机制,dhcp_agent倒是使用了。

各个controller

在init的时候,各个init会生成许多controller,这些controller会生成相应的对象,以监视事件的发生。然后再通过AddEventHandler方法,添加事件发生时的处理。

	c.hasSynced = func() bool {
		return vmiInformer.HasSynced() && vmInformer.HasSynced() &&
			dataVolumeInformer.HasSynced() && dataSourceInformer.HasSynced() &&
			pvcInformer.HasSynced() && crInformer.HasSynced() && podInformer.HasSynced()
	}

	_, err := vmInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{
		AddFunc:    c.addVirtualMachine,
		DeleteFunc: c.deleteVirtualMachine,
		UpdateFunc: c.updateVirtualMachine,
	})
	if err != nil {
		return nil, err
	}

run方法

最后的go app.Run()方法,最终是将各个controller的run方法调用起来。 在Run方法中,最后是由vca.leaderElector.Run(vca.ctx)方法调用的。

controller无论有多少个副本,但只有一个副本是主进程,其它的都处于热备状态,这点与ovn类似。

这个Run是k8s实现的,基本上不需要管它,只需要使用它来实现即可。 我们的函数其实需要实现其中的OnStartedLeading和OnStoppedLeading。

func (le *LeaderElector) Run(ctx context.Context) {
	defer runtime.HandleCrash()
	defer le.config.Callbacks.OnStoppedLeading()

	if !le.acquire(ctx) {
		return // ctx signalled done
	}
	ctx, cancel := context.WithCancel(ctx)
	defer cancel()
	go le.config.Callbacks.OnStartedLeading(ctx)
	le.renew(ctx)
}

在aplication.go中的OnStartedLeading方法中,我们最终看到了各个crontroller的run方法终于被调用了。

	go vca.evacuationController.Run(vca.evacuationControllerThreads, stop)
	go vca.disruptionBudgetController.Run(vca.disruptionBudgetControllerThreads, stop)
	go vca.nodeController.Run(vca.nodeControllerThreads, stop)
	go vca.vmiController.Run(vca.vmiControllerThreads, stop)
	go vca.rsController.Run(vca.rsControllerThreads, stop)
	go vca.poolController.Run(vca.poolControllerThreads, stop)
	go vca.vmController.Run(vca.vmControllerThreads, stop)
	go vca.migrationController.Run(vca.migrationControllerThreads, stop)

controller的run方法。

func (c *Controller) Run(threadiness int, stopCh <-chan struct{}) {
	defer controller.HandlePanic()
	defer c.Queue.ShutDown()
	log.Log.Info("Starting vmi controller.")

	// Wait for cache sync before we start the pod controller
	// 等待cache sync,在启动之前 
	cache.WaitForCacheSync(stopCh, c.hasSynced)

	// Sync the CIDs from exist VMIs
	// 先同步一遍已经有的ID
	var vmis []*virtv1.VirtualMachineInstance
	for _, obj := range c.vmiIndexer.List() {
		vmi := obj.(*virtv1.VirtualMachineInstance)
		vmis = append(vmis, vmi)
	}
	c.cidsMap.Sync(vmis)

	// Start the actual work
	// 根据默认配置的threadliness,来启动相应的协程
	for i := 0; i < threadiness; i++ {
		// wait.Until默认就是一个守护程序,这个就是一秒钟监听一次
		// 也就是说我们创建一个vmi,最慢的情况下kubevirt会在一秒钟后开始处理创建。 
		go wait.Until(c.runWorker, time.Second, stopCh)
	}

	<-stopCh
	log.Log.Info("Stopping vmi controller.")
}

run最后执行到Execute()中

func (c *Controller) Execute() bool {
	// 从队列中拿一个key
	key, quit := c.Queue.Get()
	// 如果一个key都没有,那么就是wait.Until继续 
	if quit {
		return false
	}

	// 添加一个跟踪
	virtControllerVMIWorkQueueTracer.StartTrace(key, "virt-controller VMI workqueue", trace.Field{Key: "Workqueue Key", Value: key})
	defer virtControllerVMIWorkQueueTracer.StopTrace(key)

	defer c.Queue.Done(key)
	
	// 最最最后的一层没有弯弯绕的调用了
	err := c.execute(key)

	if err != nil {
		log.Log.Reason(err).Infof("reenqueuing VirtualMachineInstance %v", key)
		c.Queue.AddRateLimited(key)
	} else {
		log.Log.V(4).Infof("processed VirtualMachineInstance %v", key)
		c.Queue.Forget(key)
	}
	return true
}

最后一个execute函数

func (c *Controller) execute(key string) error {

	// Fetch the latest Vm state from cache
	// 根据key从k8s那儿获取到详细的信息。
	obj, exists, err := c.vmiIndexer.GetByKey(key)

	if err != nil {
		return err
	}

	// Once all finalizers are removed the vmi gets deleted and we can clean all expectations
	if !exists {
		c.podExpectations.DeleteExpectations(key)
		c.vmiExpectations.DeleteExpectations(key)
		c.cidsMap.Remove(key)
		return nil
	}
	// 获取对象。是类型断言,如果obj不是VMI对象,就会触发panic 
	vmi := obj.(*virtv1.VirtualMachineInstance)

	// 日志
	logger := log.Log.Object(vmi)

	// this must be first step in execution. Writing the object
	// when api version changes ensures our api stored version is updated.

	if !controller.ObservedLatestApiVersionAnnotation(vmi) {
		vmi := vmi.DeepCopy()
		controller.SetLatestApiVersionAnnotation(vmi)
		key := controller.VirtualMachineInstanceKey(vmi)
		c.vmiExpectations.SetExpectations(key, 1, 0)
		_, err = c.clientset.VirtualMachineInstance(vmi.ObjectMeta.Namespace).Update(context.Background(), vmi, v1.UpdateOptions{})
		if err != nil {
			c.vmiExpectations.LowerExpectations(key, 1, 0)
			return err
		}
		return nil
	}

	// If needsSync is true (expectations fulfilled) we can make save assumptions if virt-handler or virt-controller owns the pod
	needsSync := c.podExpectations.SatisfiedExpectations(key) && c.vmiExpectations.SatisfiedExpectations(key) && c.pvcExpectations.SatisfiedExpectations(key)

	if !needsSync {
		return nil
	}

	// Only consider pods which belong to this vmi
	// excluding unfinalized migration targets from this list.
	// 获取到pod,如果是新建vmi,那么就会返回一个空Pod,后续再填入并创建vmi 
	pod, err := controller.CurrentVMIPod(vmi, c.podIndexer)
	if err != nil {
		logger.Reason(err).Error("Failed to fetch pods for namespace from cache.")
		return err
	}

	// Get all dataVolumes associated with this vmi
	// datavolumes,得事先定义了。 
	dataVolumes, err := storagetypes.ListDataVolumesFromVolumes(vmi.Namespace, vmi.Spec.Volumes, c.dataVolumeIndexer, c.pvcIndexer)
	if err != nil {
		logger.Reason(err).Error("Failed to fetch dataVolumes for namespace from cache.")
		return err
	}
	
	// 同步pod,就是真正的创建虚拟机了
	syncErr, pod := c.sync(vmi, pod, dataVolumes)

	err = c.updateStatus(vmi, pod, dataVolumes, syncErr)
	if err != nil {
		return err
	}

	if syncErr != nil && syncErr.RequiresRequeue() {
		return syncErr
	}

	return nil
}

最后,最后,进入sync函数,真正的创建Pod并且启动虚拟机了

// 这个里面我们需要关注这个函数 
templatePod, err = c.templateService.RenderLaunchManifest(vmi)
// 这是最后生成pod了,虚拟机启动的xml就是从此来的。

生成pod

func (t *templateService) renderLaunchManifest(vmi *v1.VirtualMachineInstance, imageIDs map[string]string, backendStoragePVCName string, tempPod bool) (*k8sv1.Pod, error) {
	// 三个判断 ,不是nil,有名字,有namespace 
	precond.MustNotBeNil(vmi)
	domain := precond.MustNotBeEmpty(vmi.GetObjectMeta().GetName())
	namespace := precond.MustNotBeEmpty(vmi.GetObjectMeta().GetNamespace())

	var userId int64 = util.RootUser
	// 不是管理员运行的?
	nonRoot := util.IsNonRootVMI(vmi)
	if nonRoot {
		userId = util.NonRootUID
	}
	
	// k8s中宽限期的概念,即一个Pod被删除并不是马上被删除的,而是有个宽限期,可以让pod执行一些清理程序 
	gracePeriodSeconds := gracePeriodInSeconds(vmi)

	// 加密云盘,一般用不上
	imagePullSecrets := imgPullSecrets(vmi.Spec.Volumes...)
	if t.imagePullSecret != "" {
		imagePullSecrets = appendUniqueImagePullSecret(imagePullSecrets, k8sv1.LocalObjectReference{
			Name: t.imagePullSecret,
		})
	}

	// Pad the virt-launcher grace period.
	// Ideally we want virt-handler to handle tearing down
	// the vmi without virt-launcher's termination forcing
	// the vmi down.
	// 这与openstack类似,一般来说虚拟机软关机可能遇到系统内的问题导致无法关机
	// 那么一旦超时的话就会进行硬关机。
	gracePeriodSeconds = gracePeriodSeconds + int64(15)
	gracePeriodKillAfter := gracePeriodSeconds + int64(15)

	// 网络的获取 和render,先不看它
	networkToResourceMap, err := network.GetNetworkToResourceMap(t.virtClient, vmi)
	if err != nil {
		return nil, err
	}
	resourceRenderer, err := t.newResourceRenderer(vmi, networkToResourceMap)
	if err != nil {
		return nil, err
	}
	resources := resourceRenderer.ResourceRequirements()

	//uefi启动获取。
	ovmfPath := t.clusterConfig.GetOVMFPath(vmi.Spec.Architecture)

	// 一堆勾子
	var requestedHookSidecarList hooks.HookSidecarList
	for _, sidecarCreator := range t.sidecarCreators {
		sidecars, err := sidecarCreator(vmi, t.clusterConfig.GetConfig())
		if err != nil {
			return nil, err
		}
		requestedHookSidecarList = append(requestedHookSidecarList, sidecars...)
	}

	// pod的启动项
	var command []string
	if tempPod {
		logger := log.DefaultLogger()
		logger.Infof("RUNNING doppleganger pod for %s", vmi.Name)
		command = []string{"/bin/bash",
			"-c",
			"echo", "bound PVCs"}
	} else {
		command = []string{"/usr/bin/virt-launcher-monitor",
			"--qemu-timeout", generateQemuTimeoutWithJitter(t.launcherQemuTimeout),
			"--name", domain,
			"--uid", string(vmi.UID),
			"--namespace", namespace,
			"--kubevirt-share-dir", t.virtShareDir,
			"--ephemeral-disk-dir", t.ephemeralDiskDir,
			"--container-disk-dir", t.containerDiskDir,
			"--grace-period-seconds", strconv.Itoa(int(gracePeriodSeconds)),
			"--hook-sidecars", strconv.Itoa(len(requestedHookSidecarList)),
			"--ovmf-path", ovmfPath,
		}
		if nonRoot {
			command = append(command, "--run-as-nonroot")
		}
		if customDebugFilters, exists := vmi.Annotations[v1.CustomLibvirtLogFiltersAnnotation]; exists {
			log.Log.Object(vmi).Infof("Applying custom debug filters for vmi %s: %s", vmi.Name, customDebugFilters)
			command = append(command, "--libvirt-log-filters", customDebugFilters)
		}
	}

	// 全虚拟化配置
	if t.clusterConfig.AllowEmulation() {
		command = append(command, "--allow-emulation")
	}

	if checkForKeepLauncherAfterFailure(vmi) {
		command = append(command, "--keep-after-failure")
	}

	_, ok := vmi.Annotations[v1.FuncTestLauncherFailFastAnnotation]
	if ok {
		command = append(command, "--simulate-crash")
	}

	// 配置pvc
	volumeRenderer, err := t.newVolumeRenderer(vmi, namespace, requestedHookSidecarList, backendStoragePVCName)
	if err != nil {
		return nil, err
	}

	// 定义一个container的render,还是那个概念,pod可以包含多个container,虽然通常情况下都是1对1 
	compute := t.newContainerSpecRenderer(vmi, volumeRenderer, resources, userId).Render(command)

	for networkName, resourceName := range networkToResourceMap {
		varName := fmt.Sprintf("KUBEVIRT_RESOURCE_NAME_%s", networkName)
		compute.Env = append(compute.Env, k8sv1.EnvVar{Name: varName, Value: resourceName})
	}

	virtLauncherLogVerbosity := t.clusterConfig.GetVirtLauncherVerbosity()

	if verbosity, isSet := vmi.Labels[logVerbosity]; isSet || virtLauncherLogVerbosity != virtconfig.DefaultVirtLauncherLogVerbosity {
		// Override the cluster wide verbosity level if a specific value has been provided for this VMI
		verbosityStr := fmt.Sprint(virtLauncherLogVerbosity)
		if isSet {
			verbosityStr = verbosity

			verbosityInt, err := strconv.Atoi(verbosity)
			if err != nil {
				return nil, fmt.Errorf("verbosity %s cannot cast to int: %v", verbosity, err)
			}

			virtLauncherLogVerbosity = uint(verbosityInt)
		}
		compute.Env = append(compute.Env, k8sv1.EnvVar{Name: ENV_VAR_VIRT_LAUNCHER_LOG_VERBOSITY, Value: verbosityStr})
	}

	if labelValue, ok := vmi.Labels[debugLogs]; (ok && strings.EqualFold(labelValue, "true")) || virtLauncherLogVerbosity > EXT_LOG_VERBOSITY_THRESHOLD {
		compute.Env = append(compute.Env, k8sv1.EnvVar{Name: ENV_VAR_LIBVIRT_DEBUG_LOGS, Value: "1"})
	}
	if labelValue, ok := vmi.Labels[virtiofsDebugLogs]; (ok && strings.EqualFold(labelValue, "true")) || virtLauncherLogVerbosity > EXT_LOG_VERBOSITY_THRESHOLD {
		compute.Env = append(compute.Env, k8sv1.EnvVar{Name: ENV_VAR_VIRTIOFSD_DEBUG_LOGS, Value: "1"})
	}

	compute.Env = append(compute.Env, k8sv1.EnvVar{
		Name: ENV_VAR_POD_NAME,
		ValueFrom: &k8sv1.EnvVarSource{
			FieldRef: &k8sv1.ObjectFieldSelector{
				FieldPath: "metadata.name",
			},
		},
	})

	// Make sure the compute container is always the first since the mutating webhook shipped with the sriov operator
	// for adding the requested resources to the pod will add them to the first container of the list
	// 开始生成containers配置. 还是那个问题, pod是一对多的container 
	containers := []k8sv1.Container{compute}
	// kubevirt的container disk的操作, 使用一个image当作volume,但一般情况下用不上。
	containersDisks := containerdisk.GenerateContainers(vmi, t.clusterConfig, imageIDs, containerDisks, virtBinDir)
	containers = append(containers, containersDisks...)
	
	// 类似于kernel和vmlinuz
	kernelBootContainer := containerdisk.GenerateKernelBootContainer(vmi, t.clusterConfig, imageIDs, containerDisks, virtBinDir)
	if kernelBootContainer != nil {
		log.Log.Object(vmi).Infof("kernel boot container generated")
		containers = append(containers, *kernelBootContainer)
	}

	// fs volume的挂载,文件系统的。 
	virtiofsContainers := generateVirtioFSContainers(vmi, t.launcherImage, t.clusterConfig)
	if virtiofsContainers != nil {
		containers = append(containers, virtiofsContainers...)
	}

	// 启动一个consolelog的container
	sconsolelogContainer := generateSerialConsoleLogContainer(vmi, t.launcherImage, t.clusterConfig, virtLauncherLogVerbosity)
	if sconsolelogContainer != nil {
		containers = append(containers, *sconsolelogContainer)
	}

	//不看了,这个仍然是sidecar的一堆东西。
	var sidecarVolumes []k8sv1.Volume
	for i, requestedHookSidecar := range requestedHookSidecarList {
		sidecarContainer := newSidecarContainerRenderer(
			sidecarContainerName(i), vmi, sidecarResources(vmi, t.clusterConfig), requestedHookSidecar, userId).Render(requestedHookSidecar.Command)

		if requestedHookSidecar.ConfigMap != nil {
			cm, err := t.virtClient.CoreV1().ConfigMaps(vmi.Namespace).Get(context.TODO(), requestedHookSidecar.ConfigMap.Name, metav1.GetOptions{})
			if err != nil {
				return nil, err
			}
			volumeSource := k8sv1.VolumeSource{
				ConfigMap: &k8sv1.ConfigMapVolumeSource{
					LocalObjectReference: k8sv1.LocalObjectReference{Name: cm.Name},
					DefaultMode:          pointer.P(int32(0755)),
				},
			}
			vol := k8sv1.Volume{
				Name:         cm.Name,
				VolumeSource: volumeSource,
			}
			sidecarVolumes = append(sidecarVolumes, vol)
		}
		if requestedHookSidecar.PVC != nil {
			volumeSource := k8sv1.VolumeSource{
				PersistentVolumeClaim: &k8sv1.PersistentVolumeClaimVolumeSource{
					ClaimName: requestedHookSidecar.PVC.Name,
				},
			}
			vol := k8sv1.Volume{
				Name:         requestedHookSidecar.PVC.Name,
				VolumeSource: volumeSource,
			}
			sidecarVolumes = append(sidecarVolumes, vol)
			if requestedHookSidecar.PVC.SharedComputePath != "" {
				containers[0].VolumeMounts = append(containers[0].VolumeMounts,
					k8sv1.VolumeMount{
						Name:      requestedHookSidecar.PVC.Name,
						MountPath: requestedHookSidecar.PVC.SharedComputePath,
					})
			}
		}
		containers = append(containers, sidecarContainer)
	}

	podAnnotations, err := t.generatePodAnnotations(vmi)
	if err != nil {
		return nil, err
	}
	if tempPod {
		// mark pod as temp - only used for provisioning
		podAnnotations[v1.EphemeralProvisioningObject] = "true"
	}

	var initContainers []k8sv1.Container

	if HaveContainerDiskVolume(vmi.Spec.Volumes) || util.HasKernelBootContainerImage(vmi) {
		initContainerCommand := []string{"/usr/bin/cp",
			"/usr/bin/container-disk",
			"/init/usr/bin/container-disk",
		}

		initContainers = append(
			initContainers,
			t.newInitContainerRenderer(vmi,
				initContainerVolumeMount(),
				initContainerResourceRequirementsForVMI(vmi, v1.ContainerDisk, t.clusterConfig),
				userId).Render(initContainerCommand))

		// this causes containerDisks to be pre-pulled before virt-launcher starts.
		initContainers = append(initContainers, containerdisk.GenerateInitContainers(vmi, t.clusterConfig, imageIDs, containerDisks, virtBinDir)...)

		kernelBootInitContainer := containerdisk.GenerateKernelBootInitContainer(vmi, t.clusterConfig, imageIDs, containerDisks, virtBinDir)
		if kernelBootInitContainer != nil {
			initContainers = append(initContainers, *kernelBootInitContainer)
		}
	}

	hostName := dns.SanitizeHostname(vmi)
	enableServiceLinks := false

	var podSeccompProfile *k8sv1.SeccompProfile = nil
	if seccompConf := t.clusterConfig.GetConfig().SeccompConfiguration; seccompConf != nil && seccompConf.VirtualMachineInstanceProfile != nil {
		vmProfile := seccompConf.VirtualMachineInstanceProfile
		if customProfile := vmProfile.CustomProfile; customProfile != nil {
			if customProfile.LocalhostProfile != nil {
				podSeccompProfile = &k8sv1.SeccompProfile{
					Type:             k8sv1.SeccompProfileTypeLocalhost,
					LocalhostProfile: customProfile.LocalhostProfile,
				}
			} else if customProfile.RuntimeDefaultProfile {
				podSeccompProfile = &k8sv1.SeccompProfile{
					Type: k8sv1.SeccompProfileTypeRuntimeDefault,
				}
			}
		}

	}
	//好了,最后生成Pod
	pod := k8sv1.Pod{
		ObjectMeta: metav1.ObjectMeta{
			GenerateName: "virt-launcher-" + domain + "-",
			Labels:       podLabels(vmi, hostName),
			Annotations:  podAnnotations,
			OwnerReferences: []metav1.OwnerReference{
				*metav1.NewControllerRef(vmi, v1.VirtualMachineInstanceGroupVersionKind),
			},
		},
		Spec: k8sv1.PodSpec{
			Hostname:                      hostName,
			Subdomain:                     vmi.Spec.Subdomain,
			SecurityContext:               computePodSecurityContext(vmi, podSeccompProfile),
			TerminationGracePeriodSeconds: &gracePeriodKillAfter,
			RestartPolicy:                 k8sv1.RestartPolicyNever,
			Containers:                    containers,
			InitContainers:                initContainers,
			NodeSelector:                  t.newNodeSelectorRenderer(vmi).Render(),
			Volumes:                       volumeRenderer.Volumes(),
			ImagePullSecrets:              imagePullSecrets,
			DNSConfig:                     vmi.Spec.DNSConfig,
			DNSPolicy:                     vmi.Spec.DNSPolicy,
			ReadinessGates:                readinessGates(),
			EnableServiceLinks:            &enableServiceLinks,
			SchedulerName:                 vmi.Spec.SchedulerName,
			Tolerations:                   vmi.Spec.Tolerations,
			TopologySpreadConstraints:     vmi.Spec.TopologySpreadConstraints,
		},
	}

	alignPodMultiCategorySecurity(&pod, t.clusterConfig.GetSELinuxLauncherType(), t.clusterConfig.DockerSELinuxMCSWorkaroundEnabled())

	// If we have a runtime class specified, use it, otherwise don't set a runtimeClassName
	runtimeClassName := t.clusterConfig.GetDefaultRuntimeClass()
	if runtimeClassName != "" {
		pod.Spec.RuntimeClassName = &runtimeClassName
	}

	if vmi.Spec.PriorityClassName != "" {
		pod.Spec.PriorityClassName = vmi.Spec.PriorityClassName
	}

	if vmi.Spec.Affinity != nil {
		pod.Spec.Affinity = vmi.Spec.Affinity.DeepCopy()
	}

	setNodeAffinityForPod(vmi, &pod)

	serviceAccountName := serviceAccount(vmi.Spec.Volumes...)
	if len(serviceAccountName) > 0 {
		pod.Spec.ServiceAccountName = serviceAccountName
		automount := true
		pod.Spec.AutomountServiceAccountToken = &automount
	} else if istio.ProxyInjectionEnabled(vmi) {
		automount := true
		pod.Spec.AutomountServiceAccountToken = &automount
	} else {
		automount := false
		pod.Spec.AutomountServiceAccountToken = &automount
	}

	pod.Spec.Volumes = append(pod.Spec.Volumes, sidecarVolumes...)

	return &pod, nil
}

等等

我们需要的xml到底是谁生成的?

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值