kubevirt-controller Execute入口代码解析
func Execute() {
var err error
# 生成app
var app = VirtControllerApp{}
# 获取leader选举的默认规则
app.LeaderElection = leaderelectionconfig.DefaultLeaderElectionConfiguration()
# 解析参数
service.Setup(&app)
# readyChan赋值
app.readyChan = make(chan bool, 1)
# 初步化日志
log.InitializeLogging("virt-controller")
# 限制访问速度?这里确实就会有flowcontroller的概念了
app.reloadableRateLimiter = ratelimiter.NewReloadableRateLimiter(flowcontrol.NewTokenBucketRateLimiter(virtconfig.DefaultVirtControllerQPS, virtconfig.DefaultVirtControllerBurst))
# 为 KubeVirt 客户端注册 REST 配置钩子,以便在构建其配置之前添加监控功能。具体来说,它设置了资源监控和 HTTP 请求的监控,以便收集和报告相关的性能指标。( from copilot)
clientmetrics.RegisterRestConfigHooks()
# 先获取一个配置,关于k8s的访问的client的配置
clientConfig, err := kubecli.GetKubevirtClientConfig()
if err != nil {
panic(err)
}
# 然后将这个k8s的clientConfig的RateLimiter等于自己的配置。
clientConfig.RateLimiter = app.reloadableRateLimiter
# 根据config获取到一个clientSet,可以理解为client集的意思。
app.clientSet, err = kubecli.GetKubevirtClientFromRESTConfig(clientConfig)
if err != nil {
golog.Fatal(err)
}
# 到这里时,才是真正的生成client
app.restClient = app.clientSet.RestClient()
// Bootstrapping. From here on the initialization order is important
app.kubevirtNamespace, err = clientutil.GetNamespace()
if err != nil {
golog.Fatalf("Error searching for namespace: %v", err)
}
# 设置controller的host
host, err := os.Hostname()
if err != nil {
golog.Fatalf("unable to get hostname: %v", err)
}
app.host = host
# 生成上下文, 在openstack中也有上下文。一般情况下,context更多的包含的是一些大家都用到的配置项。
ctx, cancel := context.WithCancel(context.Background())
stopChan := ctx.Done()
app.ctx = ctx
# 创建一个KubeInformerFactory,用于监视和缓存k8s资源的状态变化 。
app.informerFactory = controller.NewKubeInformerFactory(app.restClient, app.clientSet, nil, app.kubevirtNamespace)
# 获取到k8s的crd资源. informerFactory里面是用的k8s的cache机制来实现的。
app.crdInformer = app.informerFactory.CRD()
# 获取到k8s的kubevirt信息
app.kubeVirtInformer = app.informerFactory.KubeVirt()
# 不知道作用是什么
if err := app.kubeVirtInformer.SetWatchErrorHandler(func(r *cache.Reflector, err error) {
apiHealthVersion.Clear()
cache.DefaultWatchErrorHandler(r, err)
}); err != nil {
golog.Fatalf("failed to set the watch error handler: %v", err)
}
app.informerFactory.Start(stopChan)
# 现在就更新一下crdInformer
cache.WaitForCacheSync(stopChan, app.crdInformer.HasSynced, app.kubeVirtInformer.HasSynced)
# 获取到clusterConfig, 集群配置
app.clusterConfig, err = virtconfig.NewClusterConfig(app.crdInformer, app.kubeVirtInformer, app.kubevirtNamespace)
if err != nil {
panic(err)
}
app.reInitChan = make(chan string, 10)
# 判断集群的datavolume资源
app.hasCDI = app.clusterConfig.HasDataVolumeAPI()
# 设置几个回调函数
app.clusterConfig.SetConfigModifiedCallback(app.configModificationCallback)
app.clusterConfig.SetConfigModifiedCallback(app.shouldChangeLogVerbosity)
app.clusterConfig.SetConfigModifiedCallback(app.shouldChangeRateLimiter)
# 设置一些基本的restful service
webService := new(restful.WebService)
webService.Path("/").Consumes(restful.MIME_JSON).Produces(restful.MIME_JSON)
webService.Route(webService.GET("/healthz").To(healthz.KubeConnectionHealthzFuncFactory(app.clusterConfig, apiHealthVersion)).Doc("Health endpoint"))
webService.Route(webService.GET("/leader").To(app.leaderProbe).Doc("Leader endpoint"))
componentProfiler := profiler.NewProfileManager(app.clusterConfig)
webService.Route(webService.GET("/start-profiler").To(componentProfiler.HandleStartProfiler).Doc("start profiler endpoint"))
webService.Route(webService.GET("/stop-profiler").To(componentProfiler.HandleStopProfiler).Doc("stop profiler endpoint"))
webService.Route(webService.GET("/dump-profiler").To(componentProfiler.HandleDumpProfiler).Doc("dump profiler results endpoint"))
restful.Add(webService)
# 获取到一堆的信息
app.vmiInformer = app.informerFactory.VMI()
app.kvPodInformer = app.informerFactory.KubeVirtPod()
app.nodeInformer = app.informerFactory.KubeVirtNode()
app.namespaceStore = app.informerFactory.Namespace().GetStore()
app.namespaceInformer = app.informerFactory.Namespace()
app.vmiCache = app.vmiInformer.GetStore()
app.vmiRecorder = app.newRecorder(k8sv1.NamespaceAll, "virtualmachine-controller")
app.rsInformer = app.informerFactory.VMIReplicaSet()
app.poolInformer = app.informerFactory.VMPool()
app.persistentVolumeClaimInformer = app.informerFactory.PersistentVolumeClaim()
app.persistentVolumeClaimCache = app.persistentVolumeClaimInformer.GetStore()
app.pdbInformer = app.informerFactory.K8SInformerFactory().Policy().V1().PodDisruptionBudgets().Informer()
app.vmInformer = app.informerFactory.VirtualMachine()
app.migrationInformer = app.informerFactory.VirtualMachineInstanceMigration()
app.controllerRevisionInformer = app.informerFactory.ControllerRevision()
app.vmExportInformer = app.informerFactory.VirtualMachineExport()
app.vmSnapshotInformer = app.informerFactory.VirtualMachineSnapshot()
app.vmSnapshotContentInformer = app.informerFactory.VirtualMachineSnapshotContent()
app.vmRestoreInformer = app.informerFactory.VirtualMachineRestore()
app.storageClassInformer = app.informerFactory.StorageClass()
app.caExportConfigMapInformer = app.informerFactory.KubeVirtExportCAConfigMap()
app.exportRouteConfigMapInformer = app.informerFactory.ExportRouteConfigMap()
app.unmanagedSecretInformer = app.informerFactory.UnmanagedSecrets()
app.allPodInformer = app.informerFactory.Pod()
app.exportServiceInformer = app.informerFactory.ExportService()
app.resourceQuotaInformer = app.informerFactory.ResourceQuota()
# 如果k8s中配置了datavolume服务,那么设置cdi
if app.hasCDI {
app.dataVolumeInformer = app.informerFactory.DataVolume()
app.cdiInformer = app.informerFactory.CDI()
app.cdiConfigInformer = app.informerFactory.CDIConfig()
app.dataSourceInformer = app.informerFactory.DataSource()
app.storageProfileInformer = app.informerFactory.StorageProfile()
log.Log.Infof("CDI detected, DataVolume integration enabled")
} else {
# 否则的话就是个dummy(fake)的cdi实现。
// Add a dummy DataVolume informer in the event datavolume support
// is disabled. This lets the controller continue to work without
// requiring a separate branching code path.
app.dataVolumeInformer = app.informerFactory.DummyDataVolume()
app.cdiInformer = app.informerFactory.DummyCDI()
app.cdiConfigInformer = app.informerFactory.DummyCDIConfig()
app.dataSourceInformer = app.informerFactory.DummyDataSource()
app.storageProfileInformer = app.informerFactory.DummyStorageProfile()
log.Log.Infof("CDI not detected, DataVolume integration disabled")
}
# 看看是不是redhat的openshift环境。
onOpenShift, err := clusterutil.IsOnOpenShift(app.clientSet)
if err != nil {
golog.Fatalf("Error determining cluster type: %v", err)
}
# 应该是openshift定制了一个operatorRoute这样的东西。
if onOpenShift {
log.Log.Info("we are on openshift")
app.routeCache = app.informerFactory.OperatorRoute().GetStore()
} else {
log.Log.Info("we are on kubernetes")
app.routeCache = app.informerFactory.DummyOperatorRoute().GetStore()
}
# 其它的配置
app.ingressCache = app.informerFactory.Ingress().GetStore()
app.migrationPolicyInformer = app.informerFactory.MigrationPolicy()
app.vmCloneInformer = app.informerFactory.VirtualMachineClone()
app.instancetypeInformer = app.informerFactory.VirtualMachineInstancetype()
app.clusterInstancetypeInformer = app.informerFactory.VirtualMachineClusterInstancetype()
app.preferenceInformer = app.informerFactory.VirtualMachinePreference()
app.clusterPreferenceInformer = app.informerFactory.VirtualMachineClusterPreference()
app.onOpenshift = onOpenShift
if err := metrics.SetupMetrics(
app.vmInformer,
app.vmiInformer,
app.clusterInstancetypeInformer,
app.instancetypeInformer,
app.clusterPreferenceInformer,
app.preferenceInformer,
app.migrationInformer,
app.clusterConfig,
); err != nil {
golog.Fatal(err)
}
# 直到现在才是真正的初始化阶段,通用配置、replica配置
app.initCommon()
app.initReplicaSet()
app.initPool()
# 这个比较重要,这里就是virtualMachines的入口了
app.initVirtualMachines()
app.initDisruptionBudgetController()
app.initEvacuationController()
app.initSnapshotController()
app.initRestoreController()
app.initExportController()
app.initWorkloadUpdaterController()
app.initCloneController()
go app.Run()
<-app.reInitChan
cancel()
}
controller分解
context机制
上下文机制k8s和openstack均有实现,在一个请求的生命周期内,context会在各个函数之间来回的传递。 但有没有是一回事,用不用是另一回事。 在openstack中,context通常会带着认证和req_id到处乱跑,但k8s的context并没有看有这个机制的实现。
cache机制
我们通常说获取到其它资源时,有api、rpc两种方法,这在交互要求少或者不在乎速度的系统里,这两种方法是可行的。但这如果我要求请求快呢?redis、memcache等上吧!反正它们性能足够。
但是但是,这些东西必然是通过网络的,通过网络就会有信息传递的延迟、中断等不可预知的东西出现 。 那么k8s就使用了一种cache机制,cache会不断的watch着资源的变化 ,当有变化时,它就会cache到本机。
也就是说,它还是通过api等来进行资源的监视和获取,只不过是存储到了本地, 本地服务只需要不断的watch cache就行。
openstack中很少有这种cache机制,dhcp_agent倒是使用了。
各个controller
在init的时候,各个init会生成许多controller,这些controller会生成相应的对象,以监视事件的发生。然后再通过AddEventHandler方法,添加事件发生时的处理。
c.hasSynced = func() bool {
return vmiInformer.HasSynced() && vmInformer.HasSynced() &&
dataVolumeInformer.HasSynced() && dataSourceInformer.HasSynced() &&
pvcInformer.HasSynced() && crInformer.HasSynced() && podInformer.HasSynced()
}
_, err := vmInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: c.addVirtualMachine,
DeleteFunc: c.deleteVirtualMachine,
UpdateFunc: c.updateVirtualMachine,
})
if err != nil {
return nil, err
}
run方法
最后的go app.Run()
方法,最终是将各个controller的run方法调用起来。 在Run方法中,最后是由vca.leaderElector.Run(vca.ctx)方法调用的。
controller无论有多少个副本,但只有一个副本是主进程,其它的都处于热备状态,这点与ovn类似。
这个Run是k8s实现的,基本上不需要管它,只需要使用它来实现即可。 我们的函数其实需要实现其中的OnStartedLeading和OnStoppedLeading。
func (le *LeaderElector) Run(ctx context.Context) {
defer runtime.HandleCrash()
defer le.config.Callbacks.OnStoppedLeading()
if !le.acquire(ctx) {
return // ctx signalled done
}
ctx, cancel := context.WithCancel(ctx)
defer cancel()
go le.config.Callbacks.OnStartedLeading(ctx)
le.renew(ctx)
}
在aplication.go中的OnStartedLeading方法中,我们最终看到了各个crontroller的run方法终于被调用了。
go vca.evacuationController.Run(vca.evacuationControllerThreads, stop)
go vca.disruptionBudgetController.Run(vca.disruptionBudgetControllerThreads, stop)
go vca.nodeController.Run(vca.nodeControllerThreads, stop)
go vca.vmiController.Run(vca.vmiControllerThreads, stop)
go vca.rsController.Run(vca.rsControllerThreads, stop)
go vca.poolController.Run(vca.poolControllerThreads, stop)
go vca.vmController.Run(vca.vmControllerThreads, stop)
go vca.migrationController.Run(vca.migrationControllerThreads, stop)
controller的run方法。
func (c *Controller) Run(threadiness int, stopCh <-chan struct{}) {
defer controller.HandlePanic()
defer c.Queue.ShutDown()
log.Log.Info("Starting vmi controller.")
// Wait for cache sync before we start the pod controller
// 等待cache sync,在启动之前
cache.WaitForCacheSync(stopCh, c.hasSynced)
// Sync the CIDs from exist VMIs
// 先同步一遍已经有的ID
var vmis []*virtv1.VirtualMachineInstance
for _, obj := range c.vmiIndexer.List() {
vmi := obj.(*virtv1.VirtualMachineInstance)
vmis = append(vmis, vmi)
}
c.cidsMap.Sync(vmis)
// Start the actual work
// 根据默认配置的threadliness,来启动相应的协程
for i := 0; i < threadiness; i++ {
// wait.Until默认就是一个守护程序,这个就是一秒钟监听一次
// 也就是说我们创建一个vmi,最慢的情况下kubevirt会在一秒钟后开始处理创建。
go wait.Until(c.runWorker, time.Second, stopCh)
}
<-stopCh
log.Log.Info("Stopping vmi controller.")
}
run最后执行到Execute()中
func (c *Controller) Execute() bool {
// 从队列中拿一个key
key, quit := c.Queue.Get()
// 如果一个key都没有,那么就是wait.Until继续
if quit {
return false
}
// 添加一个跟踪
virtControllerVMIWorkQueueTracer.StartTrace(key, "virt-controller VMI workqueue", trace.Field{Key: "Workqueue Key", Value: key})
defer virtControllerVMIWorkQueueTracer.StopTrace(key)
defer c.Queue.Done(key)
// 最最最后的一层没有弯弯绕的调用了
err := c.execute(key)
if err != nil {
log.Log.Reason(err).Infof("reenqueuing VirtualMachineInstance %v", key)
c.Queue.AddRateLimited(key)
} else {
log.Log.V(4).Infof("processed VirtualMachineInstance %v", key)
c.Queue.Forget(key)
}
return true
}
最后一个execute函数
func (c *Controller) execute(key string) error {
// Fetch the latest Vm state from cache
// 根据key从k8s那儿获取到详细的信息。
obj, exists, err := c.vmiIndexer.GetByKey(key)
if err != nil {
return err
}
// Once all finalizers are removed the vmi gets deleted and we can clean all expectations
if !exists {
c.podExpectations.DeleteExpectations(key)
c.vmiExpectations.DeleteExpectations(key)
c.cidsMap.Remove(key)
return nil
}
// 获取对象。是类型断言,如果obj不是VMI对象,就会触发panic
vmi := obj.(*virtv1.VirtualMachineInstance)
// 日志
logger := log.Log.Object(vmi)
// this must be first step in execution. Writing the object
// when api version changes ensures our api stored version is updated.
if !controller.ObservedLatestApiVersionAnnotation(vmi) {
vmi := vmi.DeepCopy()
controller.SetLatestApiVersionAnnotation(vmi)
key := controller.VirtualMachineInstanceKey(vmi)
c.vmiExpectations.SetExpectations(key, 1, 0)
_, err = c.clientset.VirtualMachineInstance(vmi.ObjectMeta.Namespace).Update(context.Background(), vmi, v1.UpdateOptions{})
if err != nil {
c.vmiExpectations.LowerExpectations(key, 1, 0)
return err
}
return nil
}
// If needsSync is true (expectations fulfilled) we can make save assumptions if virt-handler or virt-controller owns the pod
needsSync := c.podExpectations.SatisfiedExpectations(key) && c.vmiExpectations.SatisfiedExpectations(key) && c.pvcExpectations.SatisfiedExpectations(key)
if !needsSync {
return nil
}
// Only consider pods which belong to this vmi
// excluding unfinalized migration targets from this list.
// 获取到pod,如果是新建vmi,那么就会返回一个空Pod,后续再填入并创建vmi
pod, err := controller.CurrentVMIPod(vmi, c.podIndexer)
if err != nil {
logger.Reason(err).Error("Failed to fetch pods for namespace from cache.")
return err
}
// Get all dataVolumes associated with this vmi
// datavolumes,得事先定义了。
dataVolumes, err := storagetypes.ListDataVolumesFromVolumes(vmi.Namespace, vmi.Spec.Volumes, c.dataVolumeIndexer, c.pvcIndexer)
if err != nil {
logger.Reason(err).Error("Failed to fetch dataVolumes for namespace from cache.")
return err
}
// 同步pod,就是真正的创建虚拟机了
syncErr, pod := c.sync(vmi, pod, dataVolumes)
err = c.updateStatus(vmi, pod, dataVolumes, syncErr)
if err != nil {
return err
}
if syncErr != nil && syncErr.RequiresRequeue() {
return syncErr
}
return nil
}
最后,最后,进入sync函数,真正的创建Pod并且启动虚拟机了
// 这个里面我们需要关注这个函数
templatePod, err = c.templateService.RenderLaunchManifest(vmi)
// 这是最后生成pod了,虚拟机启动的xml就是从此来的。
生成pod
func (t *templateService) renderLaunchManifest(vmi *v1.VirtualMachineInstance, imageIDs map[string]string, backendStoragePVCName string, tempPod bool) (*k8sv1.Pod, error) {
// 三个判断 ,不是nil,有名字,有namespace
precond.MustNotBeNil(vmi)
domain := precond.MustNotBeEmpty(vmi.GetObjectMeta().GetName())
namespace := precond.MustNotBeEmpty(vmi.GetObjectMeta().GetNamespace())
var userId int64 = util.RootUser
// 不是管理员运行的?
nonRoot := util.IsNonRootVMI(vmi)
if nonRoot {
userId = util.NonRootUID
}
// k8s中宽限期的概念,即一个Pod被删除并不是马上被删除的,而是有个宽限期,可以让pod执行一些清理程序
gracePeriodSeconds := gracePeriodInSeconds(vmi)
// 加密云盘,一般用不上
imagePullSecrets := imgPullSecrets(vmi.Spec.Volumes...)
if t.imagePullSecret != "" {
imagePullSecrets = appendUniqueImagePullSecret(imagePullSecrets, k8sv1.LocalObjectReference{
Name: t.imagePullSecret,
})
}
// Pad the virt-launcher grace period.
// Ideally we want virt-handler to handle tearing down
// the vmi without virt-launcher's termination forcing
// the vmi down.
// 这与openstack类似,一般来说虚拟机软关机可能遇到系统内的问题导致无法关机
// 那么一旦超时的话就会进行硬关机。
gracePeriodSeconds = gracePeriodSeconds + int64(15)
gracePeriodKillAfter := gracePeriodSeconds + int64(15)
// 网络的获取 和render,先不看它
networkToResourceMap, err := network.GetNetworkToResourceMap(t.virtClient, vmi)
if err != nil {
return nil, err
}
resourceRenderer, err := t.newResourceRenderer(vmi, networkToResourceMap)
if err != nil {
return nil, err
}
resources := resourceRenderer.ResourceRequirements()
//uefi启动获取。
ovmfPath := t.clusterConfig.GetOVMFPath(vmi.Spec.Architecture)
// 一堆勾子
var requestedHookSidecarList hooks.HookSidecarList
for _, sidecarCreator := range t.sidecarCreators {
sidecars, err := sidecarCreator(vmi, t.clusterConfig.GetConfig())
if err != nil {
return nil, err
}
requestedHookSidecarList = append(requestedHookSidecarList, sidecars...)
}
// pod的启动项
var command []string
if tempPod {
logger := log.DefaultLogger()
logger.Infof("RUNNING doppleganger pod for %s", vmi.Name)
command = []string{"/bin/bash",
"-c",
"echo", "bound PVCs"}
} else {
command = []string{"/usr/bin/virt-launcher-monitor",
"--qemu-timeout", generateQemuTimeoutWithJitter(t.launcherQemuTimeout),
"--name", domain,
"--uid", string(vmi.UID),
"--namespace", namespace,
"--kubevirt-share-dir", t.virtShareDir,
"--ephemeral-disk-dir", t.ephemeralDiskDir,
"--container-disk-dir", t.containerDiskDir,
"--grace-period-seconds", strconv.Itoa(int(gracePeriodSeconds)),
"--hook-sidecars", strconv.Itoa(len(requestedHookSidecarList)),
"--ovmf-path", ovmfPath,
}
if nonRoot {
command = append(command, "--run-as-nonroot")
}
if customDebugFilters, exists := vmi.Annotations[v1.CustomLibvirtLogFiltersAnnotation]; exists {
log.Log.Object(vmi).Infof("Applying custom debug filters for vmi %s: %s", vmi.Name, customDebugFilters)
command = append(command, "--libvirt-log-filters", customDebugFilters)
}
}
// 全虚拟化配置
if t.clusterConfig.AllowEmulation() {
command = append(command, "--allow-emulation")
}
if checkForKeepLauncherAfterFailure(vmi) {
command = append(command, "--keep-after-failure")
}
_, ok := vmi.Annotations[v1.FuncTestLauncherFailFastAnnotation]
if ok {
command = append(command, "--simulate-crash")
}
// 配置pvc
volumeRenderer, err := t.newVolumeRenderer(vmi, namespace, requestedHookSidecarList, backendStoragePVCName)
if err != nil {
return nil, err
}
// 定义一个container的render,还是那个概念,pod可以包含多个container,虽然通常情况下都是1对1
compute := t.newContainerSpecRenderer(vmi, volumeRenderer, resources, userId).Render(command)
for networkName, resourceName := range networkToResourceMap {
varName := fmt.Sprintf("KUBEVIRT_RESOURCE_NAME_%s", networkName)
compute.Env = append(compute.Env, k8sv1.EnvVar{Name: varName, Value: resourceName})
}
virtLauncherLogVerbosity := t.clusterConfig.GetVirtLauncherVerbosity()
if verbosity, isSet := vmi.Labels[logVerbosity]; isSet || virtLauncherLogVerbosity != virtconfig.DefaultVirtLauncherLogVerbosity {
// Override the cluster wide verbosity level if a specific value has been provided for this VMI
verbosityStr := fmt.Sprint(virtLauncherLogVerbosity)
if isSet {
verbosityStr = verbosity
verbosityInt, err := strconv.Atoi(verbosity)
if err != nil {
return nil, fmt.Errorf("verbosity %s cannot cast to int: %v", verbosity, err)
}
virtLauncherLogVerbosity = uint(verbosityInt)
}
compute.Env = append(compute.Env, k8sv1.EnvVar{Name: ENV_VAR_VIRT_LAUNCHER_LOG_VERBOSITY, Value: verbosityStr})
}
if labelValue, ok := vmi.Labels[debugLogs]; (ok && strings.EqualFold(labelValue, "true")) || virtLauncherLogVerbosity > EXT_LOG_VERBOSITY_THRESHOLD {
compute.Env = append(compute.Env, k8sv1.EnvVar{Name: ENV_VAR_LIBVIRT_DEBUG_LOGS, Value: "1"})
}
if labelValue, ok := vmi.Labels[virtiofsDebugLogs]; (ok && strings.EqualFold(labelValue, "true")) || virtLauncherLogVerbosity > EXT_LOG_VERBOSITY_THRESHOLD {
compute.Env = append(compute.Env, k8sv1.EnvVar{Name: ENV_VAR_VIRTIOFSD_DEBUG_LOGS, Value: "1"})
}
compute.Env = append(compute.Env, k8sv1.EnvVar{
Name: ENV_VAR_POD_NAME,
ValueFrom: &k8sv1.EnvVarSource{
FieldRef: &k8sv1.ObjectFieldSelector{
FieldPath: "metadata.name",
},
},
})
// Make sure the compute container is always the first since the mutating webhook shipped with the sriov operator
// for adding the requested resources to the pod will add them to the first container of the list
// 开始生成containers配置. 还是那个问题, pod是一对多的container
containers := []k8sv1.Container{compute}
// kubevirt的container disk的操作, 使用一个image当作volume,但一般情况下用不上。
containersDisks := containerdisk.GenerateContainers(vmi, t.clusterConfig, imageIDs, containerDisks, virtBinDir)
containers = append(containers, containersDisks...)
// 类似于kernel和vmlinuz
kernelBootContainer := containerdisk.GenerateKernelBootContainer(vmi, t.clusterConfig, imageIDs, containerDisks, virtBinDir)
if kernelBootContainer != nil {
log.Log.Object(vmi).Infof("kernel boot container generated")
containers = append(containers, *kernelBootContainer)
}
// fs volume的挂载,文件系统的。
virtiofsContainers := generateVirtioFSContainers(vmi, t.launcherImage, t.clusterConfig)
if virtiofsContainers != nil {
containers = append(containers, virtiofsContainers...)
}
// 启动一个consolelog的container
sconsolelogContainer := generateSerialConsoleLogContainer(vmi, t.launcherImage, t.clusterConfig, virtLauncherLogVerbosity)
if sconsolelogContainer != nil {
containers = append(containers, *sconsolelogContainer)
}
//不看了,这个仍然是sidecar的一堆东西。
var sidecarVolumes []k8sv1.Volume
for i, requestedHookSidecar := range requestedHookSidecarList {
sidecarContainer := newSidecarContainerRenderer(
sidecarContainerName(i), vmi, sidecarResources(vmi, t.clusterConfig), requestedHookSidecar, userId).Render(requestedHookSidecar.Command)
if requestedHookSidecar.ConfigMap != nil {
cm, err := t.virtClient.CoreV1().ConfigMaps(vmi.Namespace).Get(context.TODO(), requestedHookSidecar.ConfigMap.Name, metav1.GetOptions{})
if err != nil {
return nil, err
}
volumeSource := k8sv1.VolumeSource{
ConfigMap: &k8sv1.ConfigMapVolumeSource{
LocalObjectReference: k8sv1.LocalObjectReference{Name: cm.Name},
DefaultMode: pointer.P(int32(0755)),
},
}
vol := k8sv1.Volume{
Name: cm.Name,
VolumeSource: volumeSource,
}
sidecarVolumes = append(sidecarVolumes, vol)
}
if requestedHookSidecar.PVC != nil {
volumeSource := k8sv1.VolumeSource{
PersistentVolumeClaim: &k8sv1.PersistentVolumeClaimVolumeSource{
ClaimName: requestedHookSidecar.PVC.Name,
},
}
vol := k8sv1.Volume{
Name: requestedHookSidecar.PVC.Name,
VolumeSource: volumeSource,
}
sidecarVolumes = append(sidecarVolumes, vol)
if requestedHookSidecar.PVC.SharedComputePath != "" {
containers[0].VolumeMounts = append(containers[0].VolumeMounts,
k8sv1.VolumeMount{
Name: requestedHookSidecar.PVC.Name,
MountPath: requestedHookSidecar.PVC.SharedComputePath,
})
}
}
containers = append(containers, sidecarContainer)
}
podAnnotations, err := t.generatePodAnnotations(vmi)
if err != nil {
return nil, err
}
if tempPod {
// mark pod as temp - only used for provisioning
podAnnotations[v1.EphemeralProvisioningObject] = "true"
}
var initContainers []k8sv1.Container
if HaveContainerDiskVolume(vmi.Spec.Volumes) || util.HasKernelBootContainerImage(vmi) {
initContainerCommand := []string{"/usr/bin/cp",
"/usr/bin/container-disk",
"/init/usr/bin/container-disk",
}
initContainers = append(
initContainers,
t.newInitContainerRenderer(vmi,
initContainerVolumeMount(),
initContainerResourceRequirementsForVMI(vmi, v1.ContainerDisk, t.clusterConfig),
userId).Render(initContainerCommand))
// this causes containerDisks to be pre-pulled before virt-launcher starts.
initContainers = append(initContainers, containerdisk.GenerateInitContainers(vmi, t.clusterConfig, imageIDs, containerDisks, virtBinDir)...)
kernelBootInitContainer := containerdisk.GenerateKernelBootInitContainer(vmi, t.clusterConfig, imageIDs, containerDisks, virtBinDir)
if kernelBootInitContainer != nil {
initContainers = append(initContainers, *kernelBootInitContainer)
}
}
hostName := dns.SanitizeHostname(vmi)
enableServiceLinks := false
var podSeccompProfile *k8sv1.SeccompProfile = nil
if seccompConf := t.clusterConfig.GetConfig().SeccompConfiguration; seccompConf != nil && seccompConf.VirtualMachineInstanceProfile != nil {
vmProfile := seccompConf.VirtualMachineInstanceProfile
if customProfile := vmProfile.CustomProfile; customProfile != nil {
if customProfile.LocalhostProfile != nil {
podSeccompProfile = &k8sv1.SeccompProfile{
Type: k8sv1.SeccompProfileTypeLocalhost,
LocalhostProfile: customProfile.LocalhostProfile,
}
} else if customProfile.RuntimeDefaultProfile {
podSeccompProfile = &k8sv1.SeccompProfile{
Type: k8sv1.SeccompProfileTypeRuntimeDefault,
}
}
}
}
//好了,最后生成Pod
pod := k8sv1.Pod{
ObjectMeta: metav1.ObjectMeta{
GenerateName: "virt-launcher-" + domain + "-",
Labels: podLabels(vmi, hostName),
Annotations: podAnnotations,
OwnerReferences: []metav1.OwnerReference{
*metav1.NewControllerRef(vmi, v1.VirtualMachineInstanceGroupVersionKind),
},
},
Spec: k8sv1.PodSpec{
Hostname: hostName,
Subdomain: vmi.Spec.Subdomain,
SecurityContext: computePodSecurityContext(vmi, podSeccompProfile),
TerminationGracePeriodSeconds: &gracePeriodKillAfter,
RestartPolicy: k8sv1.RestartPolicyNever,
Containers: containers,
InitContainers: initContainers,
NodeSelector: t.newNodeSelectorRenderer(vmi).Render(),
Volumes: volumeRenderer.Volumes(),
ImagePullSecrets: imagePullSecrets,
DNSConfig: vmi.Spec.DNSConfig,
DNSPolicy: vmi.Spec.DNSPolicy,
ReadinessGates: readinessGates(),
EnableServiceLinks: &enableServiceLinks,
SchedulerName: vmi.Spec.SchedulerName,
Tolerations: vmi.Spec.Tolerations,
TopologySpreadConstraints: vmi.Spec.TopologySpreadConstraints,
},
}
alignPodMultiCategorySecurity(&pod, t.clusterConfig.GetSELinuxLauncherType(), t.clusterConfig.DockerSELinuxMCSWorkaroundEnabled())
// If we have a runtime class specified, use it, otherwise don't set a runtimeClassName
runtimeClassName := t.clusterConfig.GetDefaultRuntimeClass()
if runtimeClassName != "" {
pod.Spec.RuntimeClassName = &runtimeClassName
}
if vmi.Spec.PriorityClassName != "" {
pod.Spec.PriorityClassName = vmi.Spec.PriorityClassName
}
if vmi.Spec.Affinity != nil {
pod.Spec.Affinity = vmi.Spec.Affinity.DeepCopy()
}
setNodeAffinityForPod(vmi, &pod)
serviceAccountName := serviceAccount(vmi.Spec.Volumes...)
if len(serviceAccountName) > 0 {
pod.Spec.ServiceAccountName = serviceAccountName
automount := true
pod.Spec.AutomountServiceAccountToken = &automount
} else if istio.ProxyInjectionEnabled(vmi) {
automount := true
pod.Spec.AutomountServiceAccountToken = &automount
} else {
automount := false
pod.Spec.AutomountServiceAccountToken = &automount
}
pod.Spec.Volumes = append(pod.Spec.Volumes, sidecarVolumes...)
return &pod, nil
}
等等
我们需要的xml到底是谁生成的?