Virtualbox源码分析8: VM manager

·Virtualbox源码分析8: VM manager

VMM的代码里,处理VT-X虚拟化框架之外,还有各种Manager,从vmR3InitRing3()这个函数里,可以看到所有manager的初始化函数

/**
 * Initializes all R3 components of the VM
 */
static int vmR3InitRing3(PVM pVM, PUVM pUVM)
{
    //对每个VCPU向R0注册Emulate thread
    for (VMCPUID idCpu = 1; idCpu < pVM->cCpus; idCpu++)
    {
        rc = VMR3ReqCallWait(pVM, idCpu, (PFNRT)vmR3RegisterEMT, 2, pVM, idCpu);
    }
	//NEM: hyper-v模式下的native execution manager
	rc = NEMR3InitConfig(pVM);
	//Memory Manager.
	rc = MMR3Init(pVM);
	// CPU Monitor / Manager.
	rc = CPUMR3Init(pVM);
	rc = NEMR3InitAfterCPUM(pVM);
	// Page Manager and Monitor.
	rc = PGMR3Init(pVM);
	//Memory Manager
	rc = MMR3InitPaging(pVM);
	// Time Manager.
	rc = TMR3Init(pVM);
    // The Virtual Machine Monitor
	rc = VMMR3Init(pVM);
	//SELM - The Selector Manager.主要用用于二进制翻译模式
    rc = SELMR3Init(pVM);
    //TRPM - The Trap Monitor.用于二进制翻译模式处理异常分发
	rc = TRPMR3Init(pVM);
	//SSM - Saved State Manager. 用于保存VM状态
	rc = SSMR3RegisterStub(pVM, "CSAM", 0);
	rc = SSMR3RegisterStub(pVM, "PATM", 0);
	//IOM - Input / Output Monitor.
	rc = IOMR3Init(pVM);
	//Execution Monitor / Manager. VT-X vmexit里的模拟执行,和二进制翻译模式下的模拟执行(不是二进制翻译)
	rc = EMR3Init(pVM);
	//IEM - Interpreted Execution Manager.
	rc = IEMR3Init(pVM);
	// DBGF - Debugger Facility, 用于调试支持
	rc = DBGFR3Init(pVM);
	//GIM - Guest Interface Manager.
	rc = GIMR3Init(pVM);
	//PDM - Pluggable Device Manager. 可插拔的设备管理
	rc = PDMR3Init(pVM);
}

manager太多了,先从VM manager开始看起

8.1 VM Manager概述:

VM Manager提供一系列的API 用于创建用于运行Guest的VMM实例, GuestOS运行调度(Emulation Thread调度),上报GuestOS运行错误等功能。有R3和R0两部分代码,R3代码位于VMM\VMMR3\VM.cpp, R0代码位于VMM\VMMR0\VMMR0.cpp。

VM Manager可以理解成其他各个manager的包装,负责初始化和调用这些manager里的API。

8.2 VM Manager R3 APIs:

VM manager R3部分提供了一系列对外管理VM的API,调用这些API可以创建,启动,销毁,暂停,继续,调度多个虚拟机

VMMR3DECL(int)          VMR3Create(uint32_t cCpus, PCVMM2USERMETHODS pVm2UserCbs,
                                   PFNVMATERROR pfnVMAtError, void *pvUserVM,
                                   PFNCFGMCONSTRUCTOR pfnCFGMConstructor, void *pvUserCFGM,
                                   PVM *ppVM, PUVM *ppUVM);
VMMR3DECL(int)          VMR3PowerOn(PUVM pUVM);
VMMR3DECL(int)          VMR3Suspend(PUVM pUVM, VMSUSPENDREASON enmReason);
。。。。

VMR3Create

VMMR3DECL(int)   VMR3Create(uint32_t cCpus, PCVMM2USERMETHODS pVmm2UserMethods,
                            PFNVMATERROR pfnVMAtError, void *pvUserVM,
                            PFNCFGMCONSTRUCTOR pfnCFGMConstructor, void *pvUserCFGM,
                            PVM *ppVM, PUVM *ppUVM)

{
	vmR3CreateUVM(cCpus, pVmm2UserMethods, &pUVM);
	//初始化support drv(vboxdrv.sys)
	rc = SUPR3Init(&pUVM->vm.s.pSession);
	//call vmR3CreateU
	rc = VMR3ReqCallU(pUVM, VMCPUID_ANY, &pReq, RT_INDEFINITE_WAIT, VMREQFLAGS_VBOX_STATUS,
                              (PFNRT)vmR3CreateU, 4, pUVM, cCpus, pfnCFGMConstructor, pvUserCFGM);
	if (RT_SUCCESS(rc))
    {
    		//创建虚拟机成功
            if (ppVM)
                *ppVM = pUVM->pVM;
            if (ppUVM)
            {
                VMR3RetainUVM(pUVM);
                *ppUVM = pUVM;
            }
             return VINF_SUCCESS;
    }
    //下面是创建VM错误了,设置各种错误吗,让UI界面弹框提示
    //比如:
    VERR_VMX_IN_VMX_ROOT_MODE   //无法进入vmx root模式
    ......//一些启动VMX失败等错误
	VERR_VM_DRIVER_LOAD_ERROR //加载Vboxdrv.sys失败
	....//等
	
}

vmR3CreateUVM

static int vmR3CreateUVM(uint32_t cCpus, PCVMM2USERMETHODS pVmm2UserMethods, PUVM *ppUVM)
{
	//一些结构体内存申请和初始化
	//初始化PUVM结构体
	PUVM pUVM = (PUVM)RTMemPageAllocZ(RT_UOFFSETOF_DYN(UVM, aCpus[cCpus]));
	pUVM->vm.s.enmHaltMethod = VMHALTMETHOD_BOOTSTRAP;
	int rc = RTTlsAllocEx(&pUVM->vm.s.idxTLS, NULL);
	
	//3个manager的UVM,创建一些结构体的内存
	rc = PDMR3InitUVM(pUVM);
	rc = STAMR3InitUVM(pUVM);
	rc = MMR3InitUVM(pUVM);
	
	//对每个VCPU,都创建一个vmR3EmulationThread用于运行GuestOS
    //GuestOS的调度单位是VCPU
	for (i = 0; i < cCpus; i++)
     {
          rc = RTThreadCreateF(&pUVM->aCpus[i].vm.s.ThreadEMT, vmR3EmulationThread, &pUVM->aCpus[i],
                               _1M, RTTHREADTYPE_EMULATION, RTTHREADFLAGS_WAITABLE,
                               cCpus > 1 ? "EMT-%u" : "EMT", i);
          if (RT_FAILURE(rc))
              break;
		  //赋值
          pUVM->aCpus[i].vm.s.NativeThreadEMT = RTThreadGetNative(pUVM->aCpus[i].vm.s.ThreadEMT);
          return VINF_SUCCESS;
      }
}

vmR3CreateU

这个函数里创建VM

static int vmR3CreateU(PUVM pUVM, uint32_t cCpus, PFNCFGMCONSTRUCTOR pfnCFGMConstructor, void *pvUserCFGM)
{
	//加载VMMR0的驱动VMMR0.r0用于调用GVMM的IOCTL
	int rc = PDMR3LdrLoadVMMR0U(pUVM);
	//上一章介绍的VMMR0.r0 提供了一系列IOCTL,R3可以调用SUPR3CallVMMR0Ex来调用这些ioctl
	rc = SUPR3CallVMMR0Ex(NIL_RTR0PTR, NIL_VMCPUID, VMMR0_DO_GVMM_CREATE_VM, 0, &CreateVMReq.Hdr);
	//初始pVM和pVCPU
	//初始化配置
	rc = CFGMR3Init(pVM, pfnCFGMConstructor, pvUserCFGM);
	//获取配置,比如有多少个CPU,VM的名字等等
	rc = vmR3ReadBaseConfig(pVM, pUVM, cCpus);
    //本章开头的部分,初始化各个R3的模块
    vmR3InitRing3(pVM, pUVM);
	//初始化GVMMR0部分
	rc = vmR3InitRing0(pVM);	
	VMR3Relocate(pVM, 0 /* offDelta */);
    //二进制翻译的VM需要初始化RC components
    rc = vmR3InitRC(pVM);
	
	//到这边就创建完一个VM了,让VM进入hlt状态,这个时候没有开机
	rc = vmR3SetHaltMethodU(pUVM, VMHALTMETHOD_DEFAULT);
	//标记这个VM被创建出来了
	vmR3SetState(pVM, VMSTATE_CREATED, VMSTATE_CREATING);
	return VINF_SUCCESS;
	//中间如果出现任何错误,调用对应到term函数错误退出
    vmR3Destroy(pVM);
    SUPR3CallVMMR0Ex(CreateVMReq.pVMR0, 0 /*idCpu*/, VMMR0_DO_GVMM_DESTROY_VM, 0, NULL);
}

vmR3ReadBaseConfig

 //获取VM的配置
static int vmR3ReadBaseConfig(PVM pVM, PUVM pUVM, uint32_t cCpus)
{
	//多少个CPU
	rc = CFGMR3QueryU32Def(pRoot, "NumCPUs", &cCPUsCfg, 1);
	rc = CFGMR3QueryU32Def(pRoot, "CpuExecutionCap", &pVM->uCpuExecutionCap, 100);
	//vm名字
	rc = CFGMR3QueryStringAllocDef(pRoot, "Name", &pUVM->vm.s.pszName, "<unknown>");
	//vm的uuid,每个VM被创建出来之后都会有一个UUID
	rc = CFGMR3QueryBytes(pRoot, "UUID", &pUVM->vm.s.Uuid, sizeof(pUVM->vm.s.Uuid));
	
	rc = CFGMR3QueryBoolDef(pRoot, "PowerOffInsteadOfReset", &pVM->vm.s.fPowerOffInsteadOfReset, false);
}

VMR3Relocate

调用每个模块的relocate 函数,通知每个manager做一次GC,回收不需要的内存

VMMR3_INT_DECL(void) VMR3Relocate(PVM pVM, RTGCINTPTR offDelta)
{
    /*
     * The order here is very important!
     */
    PGMR3Relocate(pVM, offDelta);
    PDMR3LdrRelocateU(pVM->pUVM, offDelta);
    PGMR3Relocate(pVM, 0);              /* Repeat after PDM relocation. */
    CPUMR3Relocate(pVM);
    HMR3Relocate(pVM);
    SELMR3Relocate(pVM);
    VMMR3Relocate(pVM, offDelta);
    SELMR3Relocate(pVM);                /* !hack! fix stack! */
    TRPMR3Relocate(pVM, offDelta);
    IOMR3Relocate(pVM, offDelta);
    EMR3Relocate(pVM);
    TMR3Relocate(pVM, offDelta);
    IEMR3Relocate(pVM);
    DBGFR3Relocate(pVM, offDelta);
    PDMR3Relocate(pVM, offDelta);
    GIMR3Relocate(pVM, offDelta);
}

vmR3InitRing0

static int vmR3InitRing0(PVM pVM)
{
    //调用VMM里的VMMR3InitR0函数
    VMMR3InitR0(pVM);
    //通知初始化结束
    rc = vmR3InitDoCompleted(pVM, VMINITCOMPLETED_RING0);
    vmR3InitDoCompleted(pVM, VMINITCOMPLETED_HM);
}

VMR3PowerOn

VMMR3DECL(int) VMR3PowerOn(PUVM pUVM)
{
	VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_DESCENDING | VMMEMTRENDEZVOUS_FLAGS_STOP_ON_ERROR,
                                vmR3PowerOn, NULL);
}
static DECLCALLBACK(VBOXSTRICTRC) vmR3PowerOn(PVM pVM, PVMCPU pVCpu, void *pvUser)
{
    //最终调用PDM里的函数
    PDMR3PowerOn(pVM);
}

VMR3Suspend

VMMR3DECL(int) VMR3Suspend(PUVM pUVM, VMSUSPENDREASON enmReason)
{
	VMMR3EmtRendezvous(pUVM->pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_DESCENDING | VMMEMTRENDEZVOUS_FLAGS_STOP_ON_ERROR,
                                vmR3Suspend, (void *)(uintptr_t)enmReason);
}
static DECLCALLBACK(VBOXSTRICTRC) vmR3Suspend(PVM pVM, PVMCPU pVCpu, void *pvUser)
{
    //最终调用PDM里的函数
    PDMR3Suspend(pVM);
}

VMR3Resume

VMMR3DECL(int) VMR3Resume(PUVM pUVM, VMRESUMEREASON enmReason)
static DECLCALLBACK(VBOXSTRICTRC) vmR3Resume(PVM pVM, PVMCPU pVCpu, void *pvUser)
{
    //最终调用PDM里的函数
    PDMR3Resume(pVM);
}

VMR3Save/VMR3Teleport

保存虚拟机或者snapshot都会调用这个函数,保存虚拟机状态

VMMR3DECL(int) VMR3Save(PUVM pUVM, const char *pszFilename, bool fContinueAfterwards, PFNVMPROGRESS pfnProgress, void *pvUser,
                        bool *pfSuspended)
{
     vmR3SaveTeleport(pVM, 250 /*cMsMaxDowntime*/,
                              pszFilename, NULL /* pStreamOps */, NULL /* pvStreamOpsUser */,
                              enmAfter, pfnProgress, pvUser, pfSuspended,
                              false /* fSkipStateChanges */);
}
static int vmR3SaveTeleport(PVM pVM, uint32_t cMsMaxDowntime,
                            const char *pszFilename, PCSSMSTRMOPS pStreamOps, void *pvStreamOpsUser,
                            SSMAFTER enmAfter, PFNVMPROGRESS pfnProgress, void *pvProgressUser, bool *pfSuspended,
                            bool fSkipStateChanges)
{
    //调用vmR3Save
    int rc = VMR3ReqCallWait(pVM, 0 /*idDstCpu*/,
                             (PFNRT)vmR3Save, 10, pVM, cMsMaxDowntime, pszFilename, pStreamOps, pvStreamOpsUser,
                             enmAfter, pfnProgress, pvProgressUser, &pSSM, fSkipStateChanges);
    //保存虚拟机Step1,开始保存虚拟机状态 (需要时间)
    rc = SSMR3LiveDoStep1(pSSM);
    if (RT_SUCCESS(rc))
    {
        if (VMR3GetState(pVM) != VMSTATE_SAVING)
        {
            for(;;)
            {
                //调用vmR3LiveDoSuspend suspend虚拟机
                //最终调用PDM里的PDMR3Suspend API暂停虚拟机
        rc = VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_DESCENDING | VMMEMTRENDEZVOUS_FLAGS_STOP_ON_ERROR,
                                                vmR3LiveDoSuspend, pfSuspended);
        VMR3ReqCallWait(pVM, 0 /*idDstCpu*/, (PFNRT)vmR3LiveDoStep2, 2, pVM, pSSM);
                if (rc != VERR_TRY_AGAIN)
                    break;
            }
            if (RT_SUCCESS(rc))
                //suspend成功,继续保存
                rc = VMR3ReqCallWait(pVM, 0 /*idDstCpu*/, (PFNRT)vmR3LiveDoStep2, 2, pVM, pSSM);
            else
            {
                //保存失败,清除状态
                int rc2 = VMR3ReqCallWait(pVM, 0 /*idDstCpu*/, (PFNRT)SSMR3LiveDone, 1, pSSM);
                AssertMsg(rc2 == rc, ("%Rrc != %Rrc\n", rc2, rc)); NOREF(rc2);
            }
        }
        else
        {
            //保存失败,清除状态
            int rc2 = VMR3ReqCallWait(pVM, 0 /*idDstCpu*/, (PFNRT)SSMR3LiveDone, 1, pSSM)
            rc2 = VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_ONCE, vmR3LiveDoStep1Cleanup, pfSuspended);
            if (RT_FAILURE(rc2) && rc == VERR_SSM_CANCELLED)
                rc = rc2;
        }
    }
}

//完成虚拟机保存,最终调用的SSM里的API
static DECLCALLBACK(int) vmR3LiveDoStep2(PVM pVM, PSSMHANDLE pSSM)
{
    //继续保存虚拟机
    SSMR3LiveDoStep2(pSSM);
    //完成虚拟机保存,释放相关资源
    SSMR3LiveDone(pSSM);
    //保存虚拟机状态成VMSTATE_SUSPENDED
    vmR3SetState(pVM, VMSTATE_SUSPENDED, VMSTATE_SAVING);
}

8.3 VM Manager R0 APIs:

VMM\VMMR0\GVMMR0.cpp

以IOCTL的方式提供一些接口给VM manager使用,提供以下的接口

    /** Ask the GVMM to create a new VM. */
    VMMR0_DO_GVMM_CREATE_VM = 32,   对应GVMMR0CreateVM
    /** Ask the GVMM to destroy the VM. */
    VMMR0_DO_GVMM_DESTROY_VM,       对应GVMMR0DestroyVM
    /** Call GVMMR0RegisterVCpu(). */
    VMMR0_DO_GVMM_REGISTER_VMCPU,
    /** Call GVMMR0DeregisterVCpu(). */
    VMMR0_DO_GVMM_DEREGISTER_VMCPU,
    /** Call GVMMR0SchedHalt(). */
    VMMR0_DO_GVMM_SCHED_HALT,
    /** Call GVMMR0SchedWakeUp(). */
    VMMR0_DO_GVMM_SCHED_WAKE_UP,
    /** Call GVMMR0SchedPoke(). */
    VMMR0_DO_GVMM_SCHED_POKE,
    /** Call GVMMR0SchedWakeUpAndPokeCpus(). */
    VMMR0_DO_GVMM_SCHED_WAKE_UP_AND_POKE_CPUS,
    /** Call GVMMR0SchedPoll(). */
    VMMR0_DO_GVMM_SCHED_POLL,
    /** Call GVMMR0QueryStatistics(). */
    VMMR0_DO_GVMM_QUERY_STATISTICS,
    /** Call GVMMR0ResetStatistics(). */
    VMMR0_DO_GVMM_RESET_STATISTICS,

下面是对应的每个函数

GVMMR0CreateVM:创建一个VM,其实就是创建VM的全局变量和第一个EMT

GVMMR0DECL(int) GVMMR0CreateVM(PSUPDRVSESSION pSession, uint32_t cCpus, PGVM *ppGVM)
{
    //注册一个VM, VM关闭的时候会调用gvmmR0HandleObjDestructor回调
    pHandle->pvObj = SUPR0ObjRegister(pSession, SUPDRVOBJTYPE_VM, gvmmR0HandleObjDestructor, pGVMM, pHandle);
    if (pHandle->pvObj)
    {
        rc = SUPR0ObjVerifyAccess(pHandle->pvObj, pSession, NULL);
        if (RT_SUCCESS(rc))
        {
            //下面是各种初始化,如果发现有错误,则回退之前的操作,然后返回错误码
            //初始化PGVM全局变量
            PGVM pGVM = (PGVM)RTMemAllocZ(RT_UOFFSETOF_DYN(GVM, aCpus[cCpus]));
            //初始化PVM全局变量 (R0)
            PVM pVM = (PVM)RTR0MemObjAddress(pGVM->gvmm.s.VMMemObj);
            //分配cPages个物理页面并初始化
            RTR0MemObjAllocPage(&pGVM->gvmm.s.VMPagesMemObj, cPages * sizeof(SUPPAGE), false /* fExecutable */);
            for (uint32_t iPage = 0; iPage < cPages; iPage++)
            {
                paPages[iPage].uReserved = 0;
                paPages[iPage].Phys = RTR0MemObjGetPagePhysAddr(pGVM->gvmm.s.VMMemObj, iPage);
            }
            //map PVM 的R0内存到R3,保存在pVMR3中
            RTR0MemObjMapUser(&pGVM->gvmm.s.VMMapObj, pGVM->gvmm.s.VMMemObj, (RTR3PTR)-1, 0,
                                                       RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS);
            pVMR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMMapObj);
            //初始化每个VCPU
            for (VMCPUID i = 0; i < cCpus; i++)
            {
                pGVM->aCpus[i].pVCpu          = &pVM->aCpus[i];
                pGVM->aCpus[i].pVM            = pVM;
            }
            
            //VMPagesMemObj 物理地址map到R3
            RTR0MemObjMapUser(&pGVM->gvmm.s.VMPagesMapObj, pGVM->gvmm.s.VMPagesMemObj, (RTR3PTR)-1,
                                                           0 /* uAlignment */, RTMEM_PROT_READ | RTMEM_PROT_WRITE,
                                                           NIL_RTR0PROCESS);
            pVM->paVMPagesR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMPagesMapObj);
			
            //分配一个session给这个VM
            rc = SUPR0SetSessionVM(pSession, pGVM, pVM);
            //创建第一个线程的EMT
            rc = VMMR0ThreadCtxHookCreateForEmt(&pVM->aCpus[0]);
            //注册第一个线程的VCPU
            CPUMR0RegisterVCpuThread(&pVM->aCpus[0]);
        }
    }
}

GVMMR0DestroyVM

VM关闭的时候,gvmmR0HandleObjDestructor会被调到
static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvUser1, void *pvUser2)
{
    //释放session
    if (pGVM->pSession)
       SUPR0SetSessionVM(pGVM->pSession, NULL, NULL);
    //destory每个VCPU对应的EMT
    gvmmR0CleanupVM(pGVM);
    //释放CreateVM时申请的内存
    RTR0MemObjFree(pGVM->gvmm.s.VMPagesMapObj, false /* fFreeMappings */);
    RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */);
    RTR0MemObjFree(pGVM->gvmm.s.VMPagesMemObj, false /* fFreeMappings */); 
    RTR0MemObjFree(pGVM->gvmm.s.VMMemObj, false /* fFreeMappings */);
    for (VMCPUID i = 0; i < pGVM->cCpus; i++)
    {
        RTSemEventMultiDestroy(pGVM->aCpus[i].gvmm.s.HaltEventMulti);
    }
    //释放GVM
    RTMemFree(pGVM);
    //free handle
}
//关闭的工作都在上面的函数里完成了,所以这边只是检查所有EMT都deregistered了
GVMMR0DECL(int) GVMMR0DestroyVM(PGVM pGVM)
{
   for (VMCPUID idCpu = 1; idCpu < pGVM->cCpus; idCpu++)
            cNotDeregistered += pGVM->aCpus[idCpu].hEMT != ~(RTNATIVETHREAD)1; /* see GVMMR0DeregisterVCpu for the value */
    if (cNotDeregistered == 0)
    {
        //释放pvObj的引用
		SUPR0ObjRelease(pvObj, pHandle->pSession);
    }
    else
    {
        rc = VERR_GVMM_NOT_ALL_EMTS_DEREGISTERED;
    }
}

GVMMR0RegisterVCpu: 绑定一个VCPU和emulation thread

VirtualBox定义了几个VCPU的状态,当VMM进入和退出GuestOS的时候都会修改相应的状态

typedef enum VMCPUSTATE
{
    /** The customary invalid zero. */
    VMCPUSTATE_INVALID = 0,
    /** Virtual CPU has not yet been started.  */
    VMCPUSTATE_STOPPED,
    /** CPU started. */
    VMCPUSTATE_STARTED,
    /** CPU started in HM context. */
    VMCPUSTATE_STARTED_HM,
    /** Executing guest code and can be poked (RC or STI bits of HM). */
    VMCPUSTATE_STARTED_EXEC,
    /** Executing guest code in the recompiler. */
    VMCPUSTATE_STARTED_EXEC_REM,
    /** Executing guest code using NEM. */
    VMCPUSTATE_STARTED_EXEC_NEM,
    VMCPUSTATE_STARTED_EXEC_NEM_WAIT,
    VMCPUSTATE_STARTED_EXEC_NEM_CANCELED,
    /** Halted. */
    VMCPUSTATE_STARTED_HALTED,
    /** The end of valid virtual CPU states. */
    VMCPUSTATE_END,
    /** Ensure 32-bit type. */
    VMCPUSTATE_32BIT_HACK = 0x7fffffff
} VMCPUSTATE;
GVMMR0DECL(int) GVMMR0RegisterVCpu(PGVM pGVM, VMCPUID idCpu)
{
    //如果传入的VCPU没有对应的Emulate thread, 创建一个
    if (pGVM->aCpus[idCpu].hEMT == NIL_RTNATIVETHREAD)
    {
        //创建一个R0的EmulateThread
        rc = VMMR0ThreadCtxHookCreateForEmt(&pVM->aCpus[idCpu]);
        if (RT_SUCCESS(rc))
            //创建成功,向CPUM注册这个Thread
            CPUMR0RegisterVCpuThread(&pVM->aCpus[idCpu]);
        else
            //失败
            pVM->aCpus[idCpu].hNativeThreadR0 = pGVM->aCpus[idCpu].hEMT = NIL_RTNATIVETHREAD;
    }
}

GVMMR0DeregisterVCpu

GVMMR0DECL(int) GVMMR0DeregisterVCpu(PGVM pGVM, VMCPUID idCpu)
{
    //销毁R0的Emulate Thread
    VMMR0ThreadCtxHookDestroyForEmt(&pVM->aCpus[idCpu]);
    //变量置空
    pGVM->aCpus[idCpu].hEMT           = ~(RTNATIVETHREAD)1;
    pVM->aCpus[idCpu].hNativeThreadR0 = NIL_RTNATIVETHREAD;
}

GVMMR0SchedPoke : 查看一个VCPU是否在运行GuestOS代码

检查CPU是否运行在GuestOS里,R3根据这些信息,调度VCPU

GVMMR0DECL(int) GVMMR0SchedPoke(PGVM pGVM, PVM pVM, VMCPUID idCpu)
{
    //获取VCPU对应的hostCpuid
    RTCPUID idHostCpu = pVCpu->idHostCpu;
    //如果没有对应的HostCpuid或者VCPU的执行状态不是VMCPUSTATE_STARTED_EXE,返回当前VCPU没有在运行GuestOS代码
    if (    idHostCpu == NIL_RTCPUID
        ||  VMCPU_GET_STATE(pVCpu) != VMCPUSTATE_STARTED_EXEC)
    {
        pGVM->gvmm.s.StatsSched.cPokeNotBusy++;
        return VINF_GVM_NOT_BUSY_IN_GC;
    }

    //当前VCPU正在运行GuestOS代码
    RTMpPokeCpu(idHostCpu);
    return VINF_SUCCESS;
}

GVMMR0SchedPoll: wakeup VCPU

GVMMR0DECL(int) GVMMR0SchedPoll(PGVM pGVM, PVM pVM, VMCPUID idCpu, bool fYield)
{
    gvmmR0SchedDoWakeUps(pGVMM, u64Now);
}
//这个函数里遍历每个VM的每个vCPU,找到所有可以被wakeup的VCPU,尝试3论
//这里有个不太明白的,为什么先找u64HaltExpire时间短的
static unsigned gvmmR0SchedDoWakeUps(PGVMM pGVMM, uint64_t u64Now)
{
    uNsEarlyWakeUp1 = u64Now + pGVMM->nsEarlyWakeUp1;
    uNsEarlyWakeUp2 = u64Now + pGVMM->nsEarlyWakeUp2;
    //第一轮,找到一个VCPU的wait时间小于传入的u64Now
    //第二轮,找到一个VCPU的wait时间小于uNsEarlyWakeUp1
    //第三轮,找到一个VCPU的wait时间小于uNsEarlyWakeUp2
    //遍历每个VM,这边会遍历每一个VCPU,找到所有符合条件的VCPU唤醒
    for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
         i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
         i = pGVMM->aHandles[i].iNext)
    {
        //遍历VM里的VCPU
        for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
        {
            uint64_t    u64       = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
            if (u64 <= u64Now)  // 每一轮这个值不同
            {
                //唤醒这个VCPU
                if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
                {
                    int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
                }
		    }
		}
    }
}

GVMMR0SchedHalt:暂停一个EMT

GVMMR0DECL(int) GVMMR0SchedHalt(PGVM pGVM, PGVMCPU pGVCpu, uint64_t u64ExpireGipTime)
{
  uint64_t cNsInterval = u64ExpireGipTime - u64NowGip;
    if (    u64NowGip < u64ExpireGipTime
        &&  cNsInterval >= (pGVMM->cEMTs > pGVMM->cEMTsMeansCompany
                            ? pGVMM->nsMinSleepCompany
                            : pGVMM->nsMinSleepAlone))
    {
      //等待事件到来或者时间片到
    rc = RTSemEventMultiWaitEx(pGVCpu->gvmm.s.HaltEventMulti,
                                     RTSEMWAIT_FLAGS_ABSOLUTE | RTSEMWAIT_FLAGS_NANOSECS | RTSEMWAIT_FLAGS_INTERRUPTIBLE,
                                     u64NowGip > u64NowSys ? u64ExpireGipTime : u64NowSys + cNsInterval);
         if (rc == VINF_SUCCESS)
          {
              RTSemEventMultiReset(pGVCpu->gvmm.s.HaltEventMulti);
          }
          else if (rc == VERR_TIMEOUT)
          {
              pGVM->gvmm.s.StatsSched.cHaltTimeouts++;
              rc = VINF_SUCCESS;
          }
    }
  else
  {
    RTSemEventMultiReset(pGVCpu->gvmm.s.HaltEventMulti);
  }  
}

GVMMR0SchedWakeUp: 唤醒给定的一个在睡眠状态的vCpu

GVMMR0DECL(int) GVMMR0SchedWakeUpEx(PGVM pGVM, VMCPUID idCpu, bool fTakeUsedLock)
{
  if (idCpu < pGVM->cCpus)
        {
            //触发一个event
            rc = RTSemEventMultiSignal(pGVCpu->gvmm.s.HaltEventMulti);
		
            if (fTakeUsedLock && pGVMM->fDoEarlyWakeUps)
            {
                //查看所有VCPU,查看是否有可以唤醒的VCPU
                const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
                pGVM->gvmm.s.StatsSched.cWakeUpWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
            }
        }
        else
            rc = VERR_INVALID_CPU_ID;
}

GVMMR0QueryStatistics

获取GVMM相关的统计信息,cHaltCalls个数等

GVMMR0ResetStatistics

重制GVMM相关的统计信息

8.4 The Emulation Thread (EMT):

8.4.1 Emulation Thread

虚拟机创建的时候,会给每一个虚拟CPU创建一个Emulation Thread,负责执行GuestOS,死循环知道虚拟机退出

int vmR3EmulationThreadWithId(RTTHREAD hThreadSelf, PUVMCPU pUVCpu, VMCPUID idCpu)
{
   VMSTATE enmBefore = VMSTATE_CREATED;
  	//死循环
    for (;;)
    {
      //在执行VM之前先检查是否有pending的事件没有处理
      PVM    pVM   = pUVM->pVM;
        PVMCPU pVCpu = pUVCpu->pVCpu;
        if (!pVCpu || !pVM)
        {
          //pVCpu和pVM没有全部准备好
          
        }
       //VM被标记退出了,退出死循环
       if (  rc == VINF_EM_TERMINATE ||  pUVM->vm.s.fTerminateEMT)
           break;
      
      //所有事件都正确处理了
      if (RT_SUCCESS(rc))
       {
          pVM = pUVM->pVM;
        	pVCpu = pVM->apCpusR3[idCpu];
          if (   pVM->enmVMState == VMSTATE_RUNNING
            && VMCPUSTATE_IS_STARTED(VMCPU_GET_STATE(pVCpu)))
       	  {	
          	//这个函数是关键: 执行VM
          	rc = EMR3ExecuteVM(pVM, pVCpu);
        	}
        }
     
    }/* forever */
  
   //虚拟机退出的时候才会执行到这个地方
  if ( idCpu == 0 && (pVM = pUVM->pVM) != NULL)
   {
    vmR3SetTerminated(pVM);
    //关闭VM
    SUPR3CallVMMR0Ex(VMCC_GET_VMR0_FOR_CALL(pVM), 0, VMMR0_DO_GVMM_DESTROY_VM, 0, NULL);
  }
  else  if (idCpu != 0)
  {
    //只注销VCPU
    SUPR3CallVMMR0Ex(VMCC_GET_VMR0_FOR_CALL(pVM), idCpu, VMMR0_DO_GVMM_DEREGISTER_VMCPU, 0, NULL);
	}
}

8.4.2 Halt Method

和操作系统里的线程类似, EMT也提供了一套可以暂停/唤醒/等待的API,在VM处于不同状态的时候,对应的函数也不同

几种不同的halt methed
/** The halt method. */
typedef enum
{
    /** The usual invalid value. */
    VMHALTMETHOD_INVALID = 0,
    /** Use the method used during bootstrapping. */
    VMHALTMETHOD_BOOTSTRAP,
    /** Use the default method. */
    VMHALTMETHOD_DEFAULT,
    /** The old spin/yield/block method. */
    VMHALTMETHOD_OLD,
    /** The first go at a block/spin method. */
    VMHALTMETHOD_1,
    /** The first go at a more global approach. */
    VMHALTMETHOD_GLOBAL_1,
    /** The end of valid methods. (not inclusive of course) */
    VMHALTMETHOD_END,
    /** The usual 32-bit max value. */
    VMHALTMETHOD_32BIT_HACK = 0x7fffffff
} VMHALTMETHOD;

// 不同halt method对应的function函数
g_aHaltMethods[] =
{
    { VMHALTMETHOD_BOOTSTRAP, false, NULL,                NULL,   NULL,                vmR3BootstrapWait,   vmR3BootstrapNotifyCpuFF,   NULL },
    { VMHALTMETHOD_OLD,       false, NULL,                NULL,   vmR3HaltOldDoHalt,   vmR3DefaultWait,     vmR3DefaultNotifyCpuFF,     NULL },
    { VMHALTMETHOD_1,         false, vmR3HaltMethod1Init, NULL,   vmR3HaltMethod1Halt, vmR3DefaultWait,     vmR3DefaultNotifyCpuFF,     NULL },
    { VMHALTMETHOD_GLOBAL_1,   true, vmR3HaltGlobal1Init, NULL,   vmR3HaltGlobal1Halt, vmR3HaltGlobal1Wait, vmR3HaltGlobal1NotifyCpuFF, NULL },
};
CreatVM的时候,会设置pUVM->vm.s.enmHaltMethod = VMHALTMETHOD_BOOTSTRAP;
VM启动完成后,会设置成 vmR3SetHaltMethodU(pUVM, VMHALTMETHOD_DEFAULT);

int vmR3SetHaltMethodU(PUVM pUVM, VMHALTMETHOD enmHaltMethod)
{
    if (enmHaltMethod == VMHALTMETHOD_DEFAULT)
    {
        //可以从config里读取
        int rc = CFGMR3QueryU32(CFGMR3GetChild(CFGMR3GetRoot(pVM), "VM"), "HaltMethod", &u32);
        if (RT_SUCCESS(rc))
        else
            //config里没有设置,模式设置成VMHALTMETHOD_GLOBAL_1
		    enmHaltMethod = VMHALTMETHOD_GLOBAL_1;
}

8.4.3 暂停/唤醒 VCPU 相关函数

VMR3WaitHalted:停止一个VCPU知道可以继续运行
vmR3DefaultWait
static DECLCALLBACK(int) vmR3DefaultWait(PUVMCPU pUVCpu)
{
    for (;;)
    {
        //如果VCPU被suspend了,跳出循环
        if (    VM_FF_IS_ANY_SET(pVM, VM_FF_EXTERNAL_SUSPENDED_MASK)
            ||  VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_EXTERNAL_SUSPENDED_MASK))
            break;
        //调用wait函数等待
        rc = RTSemEventWait(pUVCpu->vm.s.EventSemWait, 1000);
	}
}
vmR3HaltGlobal1Wait
static DECLCALLBACK(int) vmR3HaltGlobal1Wait(PUVMCPU pUVCpu)
{
    for (;;)
    {
        //如果VCPU被suspend了,跳出循环
        if (    VM_FF_IS_ANY_SET(pVM, VM_FF_EXTERNAL_SUSPENDED_MASK)
            ||  VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_EXTERNAL_SUSPENDED_MASK))
            break;
        //调用IOCTL让R0去暂停VCPU
        rc = SUPR3CallVMMR0Ex(pVM->pVMR0, pVCpu->idCpu, VMMR0_DO_GVMM_SCHED_HALT, RTTimeNanoTS() + 1000000000 /* +1s */, NULL);
    }
}
vmR3BootstrapWait
static DECLCALLBACK(int) vmR3BootstrapWait(PUVMCPU pUVCpu)
{
    for (;;)
    {
        //检查是否可以被唤醒,或者VM已经结束了
        if (pUVM->vm.s.pNormalReqs   || pUVM->vm.s.pPriorityReqs)   /* global requests pending? */
            break;
        if (pUVCpu->vm.s.pNormalReqs || pUVCpu->vm.s.pPriorityReqs) /* local requests pending? */
            break;
        if (    pUVCpu->pVM
            &&  (   VM_FF_IS_ANY_SET(pUVCpu->pVM, VM_FF_EXTERNAL_SUSPENDED_MASK)
                 || VMCPU_FF_IS_ANY_SET(VMMGetCpu(pUVCpu->pVM), VMCPU_FF_EXTERNAL_SUSPENDED_MASK)
                )
            )
            break;
        if (pUVM->vm.s.fTerminateEMT)
            break;
        //sleep
        rc = RTSemEventWait(pUVCpu->vm.s.EventSemWait, 1000);
    }
}
VMR3NotifyCpuFFU: 唤醒一个VCPU
vmR3HaltGlobal1NotifyCpuFF
static DECLCALLBACK(void) vmR3HaltGlobal1NotifyCpuFF(PUVMCPU pUVCpu, uint32_t fFlags)
{
    //如果VCPU在等待状态,直接wakeup即可
    if (enmState == VMCPUSTATE_STARTED_HALTED || pUVCpu->vm.s.fWait)
    {
            int rc = SUPR3CallVMMR0Ex(pUVCpu->pVM->pVMR0, pUVCpu->idCpu, VMMR0_DO_GVMM_SCHED_WAKE_UP, 0, NULL);
    }
    //
    else if (   (fFlags & VMNOTIFYFF_FLAGS_POKE)
                 || !(fFlags & VMNOTIFYFF_FLAGS_DONE_REM))
    {
        if (enmState == VMCPUSTATE_STARTED_EXEC)
        {
            if (fFlags & VMNOTIFYFF_FLAGS_POKE)
            {
                int rc = SUPR3CallVMMR0Ex(pUVCpu->pVM->pVMR0, pUVCpu->idCpu, VMMR0_DO_GVMM_SCHED_POKE, 0, NULL);
                AssertRC(rc);
            }
        }
        else if (   enmState == VMCPUSTATE_STARTED_EXEC_NEM
                 || enmState == VMCPUSTATE_STARTED_EXEC_NEM_WAIT)
            NEMR3NotifyFF(pUVCpu->pVM, pVCpu, fFlags);
        //如果在运行二进制翻译模式而且没有结束二进制翻译
        else if (enmState == VMCPUSTATE_STARTED_EXEC_REM)
        {
            if (!(fFlags & VMNOTIFYFF_FLAGS_DONE_REM))
                REMR3NotifyFF(pUVCpu->pVM);
        }
    }
}
vmR3DefaultNotifyCpuFF
static DECLCALLBACK(void) vmR3DefaultNotifyCpuFF(PUVMCPU pUVCpu, uint32_t fFlags)
{
    if (pUVCpu->vm.s.fWait)
    {
        //如果VCPU被暂停了,唤醒它
        int rc = RTSemEventSignal(pUVCpu->vm.s.EventSemWait);
    }
    else
    {
        //如果在VMCPUSTATE_STARTED_EXEC_NEM,通知NEM manager
        if (   enmState == VMCPUSTATE_STARTED_EXEC_NEM
                || enmState == VMCPUSTATE_STARTED_EXEC_NEM_WAIT)
            NEMR3NotifyFF(pUVCpu->pVM, pVCpu, fFlags);
        // 如果在执行REM(二进制翻译),通知REM
        else if (   !(fFlags & VMNOTIFYFF_FLAGS_DONE_REM)
            && enmState == VMCPUSTATE_STARTED_EXEC_REM)
            REMR3NotifyFF(pUVCpu->pVM);
    }
}
   
vmR3BootstrapNotifyCpuFF
static DECLCALLBACK(void) vmR3BootstrapNotifyCpuFF(PUVMCPU pUVCpu, uint32_t fFlags)
{
   	//VM创建阶段使用这个函数,调用signal函数
    if (pUVCpu->vm.s.fWait)
    {
        int rc = RTSemEventSignal(pUVCpu->vm.s.EventSemWait);
    }
}
VMR3WaitHalted: 模拟CPU的halt指令,让一个VCPU循环等待直到被唤醒
vmR3HaltGlobal1Halt
static DECLCALLBACK(int) vmR3HaltGlobal1Halt(PUVMCPU pUVCpu, const uint32_t fMask, uint64_t u64Now)
{
    //标记这个VCPU正在等待
    ASMAtomicWriteBool(&pUVCpu->vm.s.fWait, true);
    //死循环
    for (;; cLoops++)
    {
        //如果有外部的中断到来,退出循环
        if (    VM_FF_IS_ANY_SET(pVM, VM_FF_EXTERNAL_HALTED_MASK)
            ||  VMCPU_FF_IS_ANY_SET(pVCpu, fMask))
            break;
        if (u64Delta >= pUVM->vm.s.Halt.Global1.cNsSpinBlockThresholdCfg)
        {
            //block lock
            VMMR3YieldStop(pVM);
            SUPR3CallVMMR0Ex(pVM->pVMR0, pVCpu->idCpu, VMMR0_DO_GVMM_SCHED_HALT, u64GipTime, NULL);
        }
       //如果循环次数是 0x2000的倍数,尝试wakeup这个VCPU
        else if (!(cLoops & 0x1fff))
        {
            //spinlock
             SUPR3CallVMMR0Ex(pVM->pVMR0, pVCpu->idCpu, VMMR0_DO_GVMM_SCHED_POLL, false /* don't yield */, NULL);
	   }
    }
    //标记这个VCPU被唤醒
     ASMAtomicUoWriteBool(&pUVCpu->vm.s.fWait, false);
}

vmR3HaltMethod1Halt
static DECLCALLBACK(int) vmR3HaltMethod1Halt(PUVMCPU pUVCpu, const uint32_t fMask, uint64_t u64Now)
{
    for (;; cLoops++)
    {
        if (!fSpinning || fBlockOnce)
        {
            //wait
             rc = RTSemEventWait(pUVCpu->vm.s.EventSemWait, cMilliSecs);
        }
    }
    
}
vmR3BootstrapWait
//虚拟机创建阶段使用这个函数,因为这个时候没有初始化VMMR0,所以只是一般的sleep
static DECLCALLBACK(int) vmR3BootstrapWait(PUVMCPU pUVCpu)
{
    ASMAtomicWriteBool(&pUVCpu->vm.s.fWait, true);
    for (;;)
     {
        //检查是否有中断到来
        if (pUVM->vm.s.pNormalReqs   || pUVM->vm.s.pPriorityReqs)   /* global requests pending? */
            break;
        if (pUVCpu->vm.s.pNormalReqs || pUVCpu->vm.s.pPriorityReqs) /* local requests pending? */
            break;

        if (    pUVCpu->pVM
            &&  (   VM_FF_IS_ANY_SET(pUVCpu->pVM, VM_FF_EXTERNAL_SUSPENDED_MASK)
                 || VMCPU_FF_IS_ANY_SET(VMMGetCpu(pUVCpu->pVM), VMCPU_FF_EXTERNAL_SUSPENDED_MASK)
                ))
            break;
        if (pUVM->vm.s.fTerminateEMT)
            break;
        //sleep
        RTSemEventWait(pUVCpu->vm.s.EventSemWait, 1000);
     }
    ASMAtomicUoWriteBool(&pUVCpu->vm.s.fWait, false);
}
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值