文章目录
·Virtualbox源码分析8: VM manager
VMM的代码里,处理VT-X虚拟化框架之外,还有各种Manager,从vmR3InitRing3()这个函数里,可以看到所有manager的初始化函数
/**
* Initializes all R3 components of the VM
*/
static int vmR3InitRing3(PVM pVM, PUVM pUVM)
{
//对每个VCPU向R0注册Emulate thread
for (VMCPUID idCpu = 1; idCpu < pVM->cCpus; idCpu++)
{
rc = VMR3ReqCallWait(pVM, idCpu, (PFNRT)vmR3RegisterEMT, 2, pVM, idCpu);
}
//NEM: hyper-v模式下的native execution manager
rc = NEMR3InitConfig(pVM);
//Memory Manager.
rc = MMR3Init(pVM);
// CPU Monitor / Manager.
rc = CPUMR3Init(pVM);
rc = NEMR3InitAfterCPUM(pVM);
// Page Manager and Monitor.
rc = PGMR3Init(pVM);
//Memory Manager
rc = MMR3InitPaging(pVM);
// Time Manager.
rc = TMR3Init(pVM);
// The Virtual Machine Monitor
rc = VMMR3Init(pVM);
//SELM - The Selector Manager.主要用用于二进制翻译模式
rc = SELMR3Init(pVM);
//TRPM - The Trap Monitor.用于二进制翻译模式处理异常分发
rc = TRPMR3Init(pVM);
//SSM - Saved State Manager. 用于保存VM状态
rc = SSMR3RegisterStub(pVM, "CSAM", 0);
rc = SSMR3RegisterStub(pVM, "PATM", 0);
//IOM - Input / Output Monitor.
rc = IOMR3Init(pVM);
//Execution Monitor / Manager. VT-X vmexit里的模拟执行,和二进制翻译模式下的模拟执行(不是二进制翻译)
rc = EMR3Init(pVM);
//IEM - Interpreted Execution Manager.
rc = IEMR3Init(pVM);
// DBGF - Debugger Facility, 用于调试支持
rc = DBGFR3Init(pVM);
//GIM - Guest Interface Manager.
rc = GIMR3Init(pVM);
//PDM - Pluggable Device Manager. 可插拔的设备管理
rc = PDMR3Init(pVM);
}
manager太多了,先从VM manager开始看起
8.1 VM Manager概述:
VM Manager提供一系列的API 用于创建用于运行Guest的VMM实例, GuestOS运行调度(Emulation Thread调度),上报GuestOS运行错误等功能。有R3和R0两部分代码,R3代码位于VMM\VMMR3\VM.cpp, R0代码位于VMM\VMMR0\VMMR0.cpp。
VM Manager可以理解成其他各个manager的包装,负责初始化和调用这些manager里的API。
8.2 VM Manager R3 APIs:
VM manager R3部分提供了一系列对外管理VM的API,调用这些API可以创建,启动,销毁,暂停,继续,调度多个虚拟机
VMMR3DECL(int) VMR3Create(uint32_t cCpus, PCVMM2USERMETHODS pVm2UserCbs,
PFNVMATERROR pfnVMAtError, void *pvUserVM,
PFNCFGMCONSTRUCTOR pfnCFGMConstructor, void *pvUserCFGM,
PVM *ppVM, PUVM *ppUVM);
VMMR3DECL(int) VMR3PowerOn(PUVM pUVM);
VMMR3DECL(int) VMR3Suspend(PUVM pUVM, VMSUSPENDREASON enmReason);
。。。。
VMR3Create
VMMR3DECL(int) VMR3Create(uint32_t cCpus, PCVMM2USERMETHODS pVmm2UserMethods,
PFNVMATERROR pfnVMAtError, void *pvUserVM,
PFNCFGMCONSTRUCTOR pfnCFGMConstructor, void *pvUserCFGM,
PVM *ppVM, PUVM *ppUVM)
{
vmR3CreateUVM(cCpus, pVmm2UserMethods, &pUVM);
//初始化support drv(vboxdrv.sys)
rc = SUPR3Init(&pUVM->vm.s.pSession);
//call vmR3CreateU
rc = VMR3ReqCallU(pUVM, VMCPUID_ANY, &pReq, RT_INDEFINITE_WAIT, VMREQFLAGS_VBOX_STATUS,
(PFNRT)vmR3CreateU, 4, pUVM, cCpus, pfnCFGMConstructor, pvUserCFGM);
if (RT_SUCCESS(rc))
{
//创建虚拟机成功
if (ppVM)
*ppVM = pUVM->pVM;
if (ppUVM)
{
VMR3RetainUVM(pUVM);
*ppUVM = pUVM;
}
return VINF_SUCCESS;
}
//下面是创建VM错误了,设置各种错误吗,让UI界面弹框提示
//比如:
VERR_VMX_IN_VMX_ROOT_MODE //无法进入vmx root模式
......//一些启动VMX失败等错误
VERR_VM_DRIVER_LOAD_ERROR //加载Vboxdrv.sys失败
....//等
}
vmR3CreateUVM
static int vmR3CreateUVM(uint32_t cCpus, PCVMM2USERMETHODS pVmm2UserMethods, PUVM *ppUVM)
{
//一些结构体内存申请和初始化
//初始化PUVM结构体
PUVM pUVM = (PUVM)RTMemPageAllocZ(RT_UOFFSETOF_DYN(UVM, aCpus[cCpus]));
pUVM->vm.s.enmHaltMethod = VMHALTMETHOD_BOOTSTRAP;
int rc = RTTlsAllocEx(&pUVM->vm.s.idxTLS, NULL);
//3个manager的UVM,创建一些结构体的内存
rc = PDMR3InitUVM(pUVM);
rc = STAMR3InitUVM(pUVM);
rc = MMR3InitUVM(pUVM);
//对每个VCPU,都创建一个vmR3EmulationThread用于运行GuestOS
//GuestOS的调度单位是VCPU
for (i = 0; i < cCpus; i++)
{
rc = RTThreadCreateF(&pUVM->aCpus[i].vm.s.ThreadEMT, vmR3EmulationThread, &pUVM->aCpus[i],
_1M, RTTHREADTYPE_EMULATION, RTTHREADFLAGS_WAITABLE,
cCpus > 1 ? "EMT-%u" : "EMT", i);
if (RT_FAILURE(rc))
break;
//赋值
pUVM->aCpus[i].vm.s.NativeThreadEMT = RTThreadGetNative(pUVM->aCpus[i].vm.s.ThreadEMT);
return VINF_SUCCESS;
}
}
vmR3CreateU
这个函数里创建VM
static int vmR3CreateU(PUVM pUVM, uint32_t cCpus, PFNCFGMCONSTRUCTOR pfnCFGMConstructor, void *pvUserCFGM)
{
//加载VMMR0的驱动VMMR0.r0用于调用GVMM的IOCTL
int rc = PDMR3LdrLoadVMMR0U(pUVM);
//上一章介绍的VMMR0.r0 提供了一系列IOCTL,R3可以调用SUPR3CallVMMR0Ex来调用这些ioctl
rc = SUPR3CallVMMR0Ex(NIL_RTR0PTR, NIL_VMCPUID, VMMR0_DO_GVMM_CREATE_VM, 0, &CreateVMReq.Hdr);
//初始pVM和pVCPU
//初始化配置
rc = CFGMR3Init(pVM, pfnCFGMConstructor, pvUserCFGM);
//获取配置,比如有多少个CPU,VM的名字等等
rc = vmR3ReadBaseConfig(pVM, pUVM, cCpus);
//本章开头的部分,初始化各个R3的模块
vmR3InitRing3(pVM, pUVM);
//初始化GVMMR0部分
rc = vmR3InitRing0(pVM);
VMR3Relocate(pVM, 0 /* offDelta */);
//二进制翻译的VM需要初始化RC components
rc = vmR3InitRC(pVM);
//到这边就创建完一个VM了,让VM进入hlt状态,这个时候没有开机
rc = vmR3SetHaltMethodU(pUVM, VMHALTMETHOD_DEFAULT);
//标记这个VM被创建出来了
vmR3SetState(pVM, VMSTATE_CREATED, VMSTATE_CREATING);
return VINF_SUCCESS;
//中间如果出现任何错误,调用对应到term函数错误退出
vmR3Destroy(pVM);
SUPR3CallVMMR0Ex(CreateVMReq.pVMR0, 0 /*idCpu*/, VMMR0_DO_GVMM_DESTROY_VM, 0, NULL);
}
vmR3ReadBaseConfig
//获取VM的配置
static int vmR3ReadBaseConfig(PVM pVM, PUVM pUVM, uint32_t cCpus)
{
//多少个CPU
rc = CFGMR3QueryU32Def(pRoot, "NumCPUs", &cCPUsCfg, 1);
rc = CFGMR3QueryU32Def(pRoot, "CpuExecutionCap", &pVM->uCpuExecutionCap, 100);
//vm名字
rc = CFGMR3QueryStringAllocDef(pRoot, "Name", &pUVM->vm.s.pszName, "<unknown>");
//vm的uuid,每个VM被创建出来之后都会有一个UUID
rc = CFGMR3QueryBytes(pRoot, "UUID", &pUVM->vm.s.Uuid, sizeof(pUVM->vm.s.Uuid));
rc = CFGMR3QueryBoolDef(pRoot, "PowerOffInsteadOfReset", &pVM->vm.s.fPowerOffInsteadOfReset, false);
}
VMR3Relocate
调用每个模块的relocate 函数,通知每个manager做一次GC,回收不需要的内存
VMMR3_INT_DECL(void) VMR3Relocate(PVM pVM, RTGCINTPTR offDelta)
{
/*
* The order here is very important!
*/
PGMR3Relocate(pVM, offDelta);
PDMR3LdrRelocateU(pVM->pUVM, offDelta);
PGMR3Relocate(pVM, 0); /* Repeat after PDM relocation. */
CPUMR3Relocate(pVM);
HMR3Relocate(pVM);
SELMR3Relocate(pVM);
VMMR3Relocate(pVM, offDelta);
SELMR3Relocate(pVM); /* !hack! fix stack! */
TRPMR3Relocate(pVM, offDelta);
IOMR3Relocate(pVM, offDelta);
EMR3Relocate(pVM);
TMR3Relocate(pVM, offDelta);
IEMR3Relocate(pVM);
DBGFR3Relocate(pVM, offDelta);
PDMR3Relocate(pVM, offDelta);
GIMR3Relocate(pVM, offDelta);
}
vmR3InitRing0
static int vmR3InitRing0(PVM pVM)
{
//调用VMM里的VMMR3InitR0函数
VMMR3InitR0(pVM);
//通知初始化结束
rc = vmR3InitDoCompleted(pVM, VMINITCOMPLETED_RING0);
vmR3InitDoCompleted(pVM, VMINITCOMPLETED_HM);
}
VMR3PowerOn
VMMR3DECL(int) VMR3PowerOn(PUVM pUVM)
{
VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_DESCENDING | VMMEMTRENDEZVOUS_FLAGS_STOP_ON_ERROR,
vmR3PowerOn, NULL);
}
static DECLCALLBACK(VBOXSTRICTRC) vmR3PowerOn(PVM pVM, PVMCPU pVCpu, void *pvUser)
{
//最终调用PDM里的函数
PDMR3PowerOn(pVM);
}
VMR3Suspend
VMMR3DECL(int) VMR3Suspend(PUVM pUVM, VMSUSPENDREASON enmReason)
{
VMMR3EmtRendezvous(pUVM->pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_DESCENDING | VMMEMTRENDEZVOUS_FLAGS_STOP_ON_ERROR,
vmR3Suspend, (void *)(uintptr_t)enmReason);
}
static DECLCALLBACK(VBOXSTRICTRC) vmR3Suspend(PVM pVM, PVMCPU pVCpu, void *pvUser)
{
//最终调用PDM里的函数
PDMR3Suspend(pVM);
}
VMR3Resume
VMMR3DECL(int) VMR3Resume(PUVM pUVM, VMRESUMEREASON enmReason)
static DECLCALLBACK(VBOXSTRICTRC) vmR3Resume(PVM pVM, PVMCPU pVCpu, void *pvUser)
{
//最终调用PDM里的函数
PDMR3Resume(pVM);
}
VMR3Save/VMR3Teleport
保存虚拟机或者snapshot都会调用这个函数,保存虚拟机状态
VMMR3DECL(int) VMR3Save(PUVM pUVM, const char *pszFilename, bool fContinueAfterwards, PFNVMPROGRESS pfnProgress, void *pvUser,
bool *pfSuspended)
{
vmR3SaveTeleport(pVM, 250 /*cMsMaxDowntime*/,
pszFilename, NULL /* pStreamOps */, NULL /* pvStreamOpsUser */,
enmAfter, pfnProgress, pvUser, pfSuspended,
false /* fSkipStateChanges */);
}
static int vmR3SaveTeleport(PVM pVM, uint32_t cMsMaxDowntime,
const char *pszFilename, PCSSMSTRMOPS pStreamOps, void *pvStreamOpsUser,
SSMAFTER enmAfter, PFNVMPROGRESS pfnProgress, void *pvProgressUser, bool *pfSuspended,
bool fSkipStateChanges)
{
//调用vmR3Save
int rc = VMR3ReqCallWait(pVM, 0 /*idDstCpu*/,
(PFNRT)vmR3Save, 10, pVM, cMsMaxDowntime, pszFilename, pStreamOps, pvStreamOpsUser,
enmAfter, pfnProgress, pvProgressUser, &pSSM, fSkipStateChanges);
//保存虚拟机Step1,开始保存虚拟机状态 (需要时间)
rc = SSMR3LiveDoStep1(pSSM);
if (RT_SUCCESS(rc))
{
if (VMR3GetState(pVM) != VMSTATE_SAVING)
{
for(;;)
{
//调用vmR3LiveDoSuspend suspend虚拟机
//最终调用PDM里的PDMR3Suspend API暂停虚拟机
rc = VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_DESCENDING | VMMEMTRENDEZVOUS_FLAGS_STOP_ON_ERROR,
vmR3LiveDoSuspend, pfSuspended);
VMR3ReqCallWait(pVM, 0 /*idDstCpu*/, (PFNRT)vmR3LiveDoStep2, 2, pVM, pSSM);
if (rc != VERR_TRY_AGAIN)
break;
}
if (RT_SUCCESS(rc))
//suspend成功,继续保存
rc = VMR3ReqCallWait(pVM, 0 /*idDstCpu*/, (PFNRT)vmR3LiveDoStep2, 2, pVM, pSSM);
else
{
//保存失败,清除状态
int rc2 = VMR3ReqCallWait(pVM, 0 /*idDstCpu*/, (PFNRT)SSMR3LiveDone, 1, pSSM);
AssertMsg(rc2 == rc, ("%Rrc != %Rrc\n", rc2, rc)); NOREF(rc2);
}
}
else
{
//保存失败,清除状态
int rc2 = VMR3ReqCallWait(pVM, 0 /*idDstCpu*/, (PFNRT)SSMR3LiveDone, 1, pSSM)
rc2 = VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_ONCE, vmR3LiveDoStep1Cleanup, pfSuspended);
if (RT_FAILURE(rc2) && rc == VERR_SSM_CANCELLED)
rc = rc2;
}
}
}
//完成虚拟机保存,最终调用的SSM里的API
static DECLCALLBACK(int) vmR3LiveDoStep2(PVM pVM, PSSMHANDLE pSSM)
{
//继续保存虚拟机
SSMR3LiveDoStep2(pSSM);
//完成虚拟机保存,释放相关资源
SSMR3LiveDone(pSSM);
//保存虚拟机状态成VMSTATE_SUSPENDED
vmR3SetState(pVM, VMSTATE_SUSPENDED, VMSTATE_SAVING);
}
8.3 VM Manager R0 APIs:
VMM\VMMR0\GVMMR0.cpp
以IOCTL的方式提供一些接口给VM manager使用,提供以下的接口
/** Ask the GVMM to create a new VM. */
VMMR0_DO_GVMM_CREATE_VM = 32, 对应GVMMR0CreateVM
/** Ask the GVMM to destroy the VM. */
VMMR0_DO_GVMM_DESTROY_VM, 对应GVMMR0DestroyVM
/** Call GVMMR0RegisterVCpu(). */
VMMR0_DO_GVMM_REGISTER_VMCPU,
/** Call GVMMR0DeregisterVCpu(). */
VMMR0_DO_GVMM_DEREGISTER_VMCPU,
/** Call GVMMR0SchedHalt(). */
VMMR0_DO_GVMM_SCHED_HALT,
/** Call GVMMR0SchedWakeUp(). */
VMMR0_DO_GVMM_SCHED_WAKE_UP,
/** Call GVMMR0SchedPoke(). */
VMMR0_DO_GVMM_SCHED_POKE,
/** Call GVMMR0SchedWakeUpAndPokeCpus(). */
VMMR0_DO_GVMM_SCHED_WAKE_UP_AND_POKE_CPUS,
/** Call GVMMR0SchedPoll(). */
VMMR0_DO_GVMM_SCHED_POLL,
/** Call GVMMR0QueryStatistics(). */
VMMR0_DO_GVMM_QUERY_STATISTICS,
/** Call GVMMR0ResetStatistics(). */
VMMR0_DO_GVMM_RESET_STATISTICS,
下面是对应的每个函数
GVMMR0CreateVM:创建一个VM,其实就是创建VM的全局变量和第一个EMT
GVMMR0DECL(int) GVMMR0CreateVM(PSUPDRVSESSION pSession, uint32_t cCpus, PGVM *ppGVM)
{
//注册一个VM, VM关闭的时候会调用gvmmR0HandleObjDestructor回调
pHandle->pvObj = SUPR0ObjRegister(pSession, SUPDRVOBJTYPE_VM, gvmmR0HandleObjDestructor, pGVMM, pHandle);
if (pHandle->pvObj)
{
rc = SUPR0ObjVerifyAccess(pHandle->pvObj, pSession, NULL);
if (RT_SUCCESS(rc))
{
//下面是各种初始化,如果发现有错误,则回退之前的操作,然后返回错误码
//初始化PGVM全局变量
PGVM pGVM = (PGVM)RTMemAllocZ(RT_UOFFSETOF_DYN(GVM, aCpus[cCpus]));
//初始化PVM全局变量 (R0)
PVM pVM = (PVM)RTR0MemObjAddress(pGVM->gvmm.s.VMMemObj);
//分配cPages个物理页面并初始化
RTR0MemObjAllocPage(&pGVM->gvmm.s.VMPagesMemObj, cPages * sizeof(SUPPAGE), false /* fExecutable */);
for (uint32_t iPage = 0; iPage < cPages; iPage++)
{
paPages[iPage].uReserved = 0;
paPages[iPage].Phys = RTR0MemObjGetPagePhysAddr(pGVM->gvmm.s.VMMemObj, iPage);
}
//map PVM 的R0内存到R3,保存在pVMR3中
RTR0MemObjMapUser(&pGVM->gvmm.s.VMMapObj, pGVM->gvmm.s.VMMemObj, (RTR3PTR)-1, 0,
RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS);
pVMR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMMapObj);
//初始化每个VCPU
for (VMCPUID i = 0; i < cCpus; i++)
{
pGVM->aCpus[i].pVCpu = &pVM->aCpus[i];
pGVM->aCpus[i].pVM = pVM;
}
//VMPagesMemObj 物理地址map到R3
RTR0MemObjMapUser(&pGVM->gvmm.s.VMPagesMapObj, pGVM->gvmm.s.VMPagesMemObj, (RTR3PTR)-1,
0 /* uAlignment */, RTMEM_PROT_READ | RTMEM_PROT_WRITE,
NIL_RTR0PROCESS);
pVM->paVMPagesR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMPagesMapObj);
//分配一个session给这个VM
rc = SUPR0SetSessionVM(pSession, pGVM, pVM);
//创建第一个线程的EMT
rc = VMMR0ThreadCtxHookCreateForEmt(&pVM->aCpus[0]);
//注册第一个线程的VCPU
CPUMR0RegisterVCpuThread(&pVM->aCpus[0]);
}
}
}
GVMMR0DestroyVM
VM关闭的时候,gvmmR0HandleObjDestructor会被调到
static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvUser1, void *pvUser2)
{
//释放session
if (pGVM->pSession)
SUPR0SetSessionVM(pGVM->pSession, NULL, NULL);
//destory每个VCPU对应的EMT
gvmmR0CleanupVM(pGVM);
//释放CreateVM时申请的内存
RTR0MemObjFree(pGVM->gvmm.s.VMPagesMapObj, false /* fFreeMappings */);
RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */);
RTR0MemObjFree(pGVM->gvmm.s.VMPagesMemObj, false /* fFreeMappings */);
RTR0MemObjFree(pGVM->gvmm.s.VMMemObj, false /* fFreeMappings */);
for (VMCPUID i = 0; i < pGVM->cCpus; i++)
{
RTSemEventMultiDestroy(pGVM->aCpus[i].gvmm.s.HaltEventMulti);
}
//释放GVM
RTMemFree(pGVM);
//free handle
}
//关闭的工作都在上面的函数里完成了,所以这边只是检查所有EMT都deregistered了
GVMMR0DECL(int) GVMMR0DestroyVM(PGVM pGVM)
{
for (VMCPUID idCpu = 1; idCpu < pGVM->cCpus; idCpu++)
cNotDeregistered += pGVM->aCpus[idCpu].hEMT != ~(RTNATIVETHREAD)1; /* see GVMMR0DeregisterVCpu for the value */
if (cNotDeregistered == 0)
{
//释放pvObj的引用
SUPR0ObjRelease(pvObj, pHandle->pSession);
}
else
{
rc = VERR_GVMM_NOT_ALL_EMTS_DEREGISTERED;
}
}
GVMMR0RegisterVCpu: 绑定一个VCPU和emulation thread
VirtualBox定义了几个VCPU的状态,当VMM进入和退出GuestOS的时候都会修改相应的状态
typedef enum VMCPUSTATE
{
/** The customary invalid zero. */
VMCPUSTATE_INVALID = 0,
/** Virtual CPU has not yet been started. */
VMCPUSTATE_STOPPED,
/** CPU started. */
VMCPUSTATE_STARTED,
/** CPU started in HM context. */
VMCPUSTATE_STARTED_HM,
/** Executing guest code and can be poked (RC or STI bits of HM). */
VMCPUSTATE_STARTED_EXEC,
/** Executing guest code in the recompiler. */
VMCPUSTATE_STARTED_EXEC_REM,
/** Executing guest code using NEM. */
VMCPUSTATE_STARTED_EXEC_NEM,
VMCPUSTATE_STARTED_EXEC_NEM_WAIT,
VMCPUSTATE_STARTED_EXEC_NEM_CANCELED,
/** Halted. */
VMCPUSTATE_STARTED_HALTED,
/** The end of valid virtual CPU states. */
VMCPUSTATE_END,
/** Ensure 32-bit type. */
VMCPUSTATE_32BIT_HACK = 0x7fffffff
} VMCPUSTATE;
GVMMR0DECL(int) GVMMR0RegisterVCpu(PGVM pGVM, VMCPUID idCpu)
{
//如果传入的VCPU没有对应的Emulate thread, 创建一个
if (pGVM->aCpus[idCpu].hEMT == NIL_RTNATIVETHREAD)
{
//创建一个R0的EmulateThread
rc = VMMR0ThreadCtxHookCreateForEmt(&pVM->aCpus[idCpu]);
if (RT_SUCCESS(rc))
//创建成功,向CPUM注册这个Thread
CPUMR0RegisterVCpuThread(&pVM->aCpus[idCpu]);
else
//失败
pVM->aCpus[idCpu].hNativeThreadR0 = pGVM->aCpus[idCpu].hEMT = NIL_RTNATIVETHREAD;
}
}
GVMMR0DeregisterVCpu
GVMMR0DECL(int) GVMMR0DeregisterVCpu(PGVM pGVM, VMCPUID idCpu)
{
//销毁R0的Emulate Thread
VMMR0ThreadCtxHookDestroyForEmt(&pVM->aCpus[idCpu]);
//变量置空
pGVM->aCpus[idCpu].hEMT = ~(RTNATIVETHREAD)1;
pVM->aCpus[idCpu].hNativeThreadR0 = NIL_RTNATIVETHREAD;
}
GVMMR0SchedPoke : 查看一个VCPU是否在运行GuestOS代码
检查CPU是否运行在GuestOS里,R3根据这些信息,调度VCPU
GVMMR0DECL(int) GVMMR0SchedPoke(PGVM pGVM, PVM pVM, VMCPUID idCpu)
{
//获取VCPU对应的hostCpuid
RTCPUID idHostCpu = pVCpu->idHostCpu;
//如果没有对应的HostCpuid或者VCPU的执行状态不是VMCPUSTATE_STARTED_EXE,返回当前VCPU没有在运行GuestOS代码
if ( idHostCpu == NIL_RTCPUID
|| VMCPU_GET_STATE(pVCpu) != VMCPUSTATE_STARTED_EXEC)
{
pGVM->gvmm.s.StatsSched.cPokeNotBusy++;
return VINF_GVM_NOT_BUSY_IN_GC;
}
//当前VCPU正在运行GuestOS代码
RTMpPokeCpu(idHostCpu);
return VINF_SUCCESS;
}
GVMMR0SchedPoll: wakeup VCPU
GVMMR0DECL(int) GVMMR0SchedPoll(PGVM pGVM, PVM pVM, VMCPUID idCpu, bool fYield)
{
gvmmR0SchedDoWakeUps(pGVMM, u64Now);
}
//这个函数里遍历每个VM的每个vCPU,找到所有可以被wakeup的VCPU,尝试3论
//这里有个不太明白的,为什么先找u64HaltExpire时间短的
static unsigned gvmmR0SchedDoWakeUps(PGVMM pGVMM, uint64_t u64Now)
{
uNsEarlyWakeUp1 = u64Now + pGVMM->nsEarlyWakeUp1;
uNsEarlyWakeUp2 = u64Now + pGVMM->nsEarlyWakeUp2;
//第一轮,找到一个VCPU的wait时间小于传入的u64Now
//第二轮,找到一个VCPU的wait时间小于uNsEarlyWakeUp1
//第三轮,找到一个VCPU的wait时间小于uNsEarlyWakeUp2
//遍历每个VM,这边会遍历每一个VCPU,找到所有符合条件的VCPU唤醒
for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
i = pGVMM->aHandles[i].iNext)
{
//遍历VM里的VCPU
for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
{
uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
if (u64 <= u64Now) // 每一轮这个值不同
{
//唤醒这个VCPU
if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
{
int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
}
}
}
}
}
GVMMR0SchedHalt:暂停一个EMT
GVMMR0DECL(int) GVMMR0SchedHalt(PGVM pGVM, PGVMCPU pGVCpu, uint64_t u64ExpireGipTime)
{
uint64_t cNsInterval = u64ExpireGipTime - u64NowGip;
if ( u64NowGip < u64ExpireGipTime
&& cNsInterval >= (pGVMM->cEMTs > pGVMM->cEMTsMeansCompany
? pGVMM->nsMinSleepCompany
: pGVMM->nsMinSleepAlone))
{
//等待事件到来或者时间片到
rc = RTSemEventMultiWaitEx(pGVCpu->gvmm.s.HaltEventMulti,
RTSEMWAIT_FLAGS_ABSOLUTE | RTSEMWAIT_FLAGS_NANOSECS | RTSEMWAIT_FLAGS_INTERRUPTIBLE,
u64NowGip > u64NowSys ? u64ExpireGipTime : u64NowSys + cNsInterval);
if (rc == VINF_SUCCESS)
{
RTSemEventMultiReset(pGVCpu->gvmm.s.HaltEventMulti);
}
else if (rc == VERR_TIMEOUT)
{
pGVM->gvmm.s.StatsSched.cHaltTimeouts++;
rc = VINF_SUCCESS;
}
}
else
{
RTSemEventMultiReset(pGVCpu->gvmm.s.HaltEventMulti);
}
}
GVMMR0SchedWakeUp: 唤醒给定的一个在睡眠状态的vCpu
GVMMR0DECL(int) GVMMR0SchedWakeUpEx(PGVM pGVM, VMCPUID idCpu, bool fTakeUsedLock)
{
if (idCpu < pGVM->cCpus)
{
//触发一个event
rc = RTSemEventMultiSignal(pGVCpu->gvmm.s.HaltEventMulti);
if (fTakeUsedLock && pGVMM->fDoEarlyWakeUps)
{
//查看所有VCPU,查看是否有可以唤醒的VCPU
const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
pGVM->gvmm.s.StatsSched.cWakeUpWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
}
}
else
rc = VERR_INVALID_CPU_ID;
}
GVMMR0QueryStatistics
获取GVMM相关的统计信息,cHaltCalls个数等
GVMMR0ResetStatistics
重制GVMM相关的统计信息
8.4 The Emulation Thread (EMT):
8.4.1 Emulation Thread
虚拟机创建的时候,会给每一个虚拟CPU创建一个Emulation Thread,负责执行GuestOS,死循环知道虚拟机退出
int vmR3EmulationThreadWithId(RTTHREAD hThreadSelf, PUVMCPU pUVCpu, VMCPUID idCpu)
{
VMSTATE enmBefore = VMSTATE_CREATED;
//死循环
for (;;)
{
//在执行VM之前先检查是否有pending的事件没有处理
PVM pVM = pUVM->pVM;
PVMCPU pVCpu = pUVCpu->pVCpu;
if (!pVCpu || !pVM)
{
//pVCpu和pVM没有全部准备好
}
//VM被标记退出了,退出死循环
if ( rc == VINF_EM_TERMINATE || pUVM->vm.s.fTerminateEMT)
break;
//所有事件都正确处理了
if (RT_SUCCESS(rc))
{
pVM = pUVM->pVM;
pVCpu = pVM->apCpusR3[idCpu];
if ( pVM->enmVMState == VMSTATE_RUNNING
&& VMCPUSTATE_IS_STARTED(VMCPU_GET_STATE(pVCpu)))
{
//这个函数是关键: 执行VM
rc = EMR3ExecuteVM(pVM, pVCpu);
}
}
}/* forever */
//虚拟机退出的时候才会执行到这个地方
if ( idCpu == 0 && (pVM = pUVM->pVM) != NULL)
{
vmR3SetTerminated(pVM);
//关闭VM
SUPR3CallVMMR0Ex(VMCC_GET_VMR0_FOR_CALL(pVM), 0, VMMR0_DO_GVMM_DESTROY_VM, 0, NULL);
}
else if (idCpu != 0)
{
//只注销VCPU
SUPR3CallVMMR0Ex(VMCC_GET_VMR0_FOR_CALL(pVM), idCpu, VMMR0_DO_GVMM_DEREGISTER_VMCPU, 0, NULL);
}
}
8.4.2 Halt Method
和操作系统里的线程类似, EMT也提供了一套可以暂停/唤醒/等待的API,在VM处于不同状态的时候,对应的函数也不同
几种不同的halt methed
/** The halt method. */
typedef enum
{
/** The usual invalid value. */
VMHALTMETHOD_INVALID = 0,
/** Use the method used during bootstrapping. */
VMHALTMETHOD_BOOTSTRAP,
/** Use the default method. */
VMHALTMETHOD_DEFAULT,
/** The old spin/yield/block method. */
VMHALTMETHOD_OLD,
/** The first go at a block/spin method. */
VMHALTMETHOD_1,
/** The first go at a more global approach. */
VMHALTMETHOD_GLOBAL_1,
/** The end of valid methods. (not inclusive of course) */
VMHALTMETHOD_END,
/** The usual 32-bit max value. */
VMHALTMETHOD_32BIT_HACK = 0x7fffffff
} VMHALTMETHOD;
// 不同halt method对应的function函数
g_aHaltMethods[] =
{
{ VMHALTMETHOD_BOOTSTRAP, false, NULL, NULL, NULL, vmR3BootstrapWait, vmR3BootstrapNotifyCpuFF, NULL },
{ VMHALTMETHOD_OLD, false, NULL, NULL, vmR3HaltOldDoHalt, vmR3DefaultWait, vmR3DefaultNotifyCpuFF, NULL },
{ VMHALTMETHOD_1, false, vmR3HaltMethod1Init, NULL, vmR3HaltMethod1Halt, vmR3DefaultWait, vmR3DefaultNotifyCpuFF, NULL },
{ VMHALTMETHOD_GLOBAL_1, true, vmR3HaltGlobal1Init, NULL, vmR3HaltGlobal1Halt, vmR3HaltGlobal1Wait, vmR3HaltGlobal1NotifyCpuFF, NULL },
};
CreatVM的时候,会设置pUVM->vm.s.enmHaltMethod = VMHALTMETHOD_BOOTSTRAP;
VM启动完成后,会设置成 vmR3SetHaltMethodU(pUVM, VMHALTMETHOD_DEFAULT);
int vmR3SetHaltMethodU(PUVM pUVM, VMHALTMETHOD enmHaltMethod)
{
if (enmHaltMethod == VMHALTMETHOD_DEFAULT)
{
//可以从config里读取
int rc = CFGMR3QueryU32(CFGMR3GetChild(CFGMR3GetRoot(pVM), "VM"), "HaltMethod", &u32);
if (RT_SUCCESS(rc))
else
//config里没有设置,模式设置成VMHALTMETHOD_GLOBAL_1
enmHaltMethod = VMHALTMETHOD_GLOBAL_1;
}
8.4.3 暂停/唤醒 VCPU 相关函数
VMR3WaitHalted:停止一个VCPU知道可以继续运行
vmR3DefaultWait
static DECLCALLBACK(int) vmR3DefaultWait(PUVMCPU pUVCpu)
{
for (;;)
{
//如果VCPU被suspend了,跳出循环
if ( VM_FF_IS_ANY_SET(pVM, VM_FF_EXTERNAL_SUSPENDED_MASK)
|| VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_EXTERNAL_SUSPENDED_MASK))
break;
//调用wait函数等待
rc = RTSemEventWait(pUVCpu->vm.s.EventSemWait, 1000);
}
}
vmR3HaltGlobal1Wait
static DECLCALLBACK(int) vmR3HaltGlobal1Wait(PUVMCPU pUVCpu)
{
for (;;)
{
//如果VCPU被suspend了,跳出循环
if ( VM_FF_IS_ANY_SET(pVM, VM_FF_EXTERNAL_SUSPENDED_MASK)
|| VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_EXTERNAL_SUSPENDED_MASK))
break;
//调用IOCTL让R0去暂停VCPU
rc = SUPR3CallVMMR0Ex(pVM->pVMR0, pVCpu->idCpu, VMMR0_DO_GVMM_SCHED_HALT, RTTimeNanoTS() + 1000000000 /* +1s */, NULL);
}
}
vmR3BootstrapWait
static DECLCALLBACK(int) vmR3BootstrapWait(PUVMCPU pUVCpu)
{
for (;;)
{
//检查是否可以被唤醒,或者VM已经结束了
if (pUVM->vm.s.pNormalReqs || pUVM->vm.s.pPriorityReqs) /* global requests pending? */
break;
if (pUVCpu->vm.s.pNormalReqs || pUVCpu->vm.s.pPriorityReqs) /* local requests pending? */
break;
if ( pUVCpu->pVM
&& ( VM_FF_IS_ANY_SET(pUVCpu->pVM, VM_FF_EXTERNAL_SUSPENDED_MASK)
|| VMCPU_FF_IS_ANY_SET(VMMGetCpu(pUVCpu->pVM), VMCPU_FF_EXTERNAL_SUSPENDED_MASK)
)
)
break;
if (pUVM->vm.s.fTerminateEMT)
break;
//sleep
rc = RTSemEventWait(pUVCpu->vm.s.EventSemWait, 1000);
}
}
VMR3NotifyCpuFFU: 唤醒一个VCPU
vmR3HaltGlobal1NotifyCpuFF
static DECLCALLBACK(void) vmR3HaltGlobal1NotifyCpuFF(PUVMCPU pUVCpu, uint32_t fFlags)
{
//如果VCPU在等待状态,直接wakeup即可
if (enmState == VMCPUSTATE_STARTED_HALTED || pUVCpu->vm.s.fWait)
{
int rc = SUPR3CallVMMR0Ex(pUVCpu->pVM->pVMR0, pUVCpu->idCpu, VMMR0_DO_GVMM_SCHED_WAKE_UP, 0, NULL);
}
//
else if ( (fFlags & VMNOTIFYFF_FLAGS_POKE)
|| !(fFlags & VMNOTIFYFF_FLAGS_DONE_REM))
{
if (enmState == VMCPUSTATE_STARTED_EXEC)
{
if (fFlags & VMNOTIFYFF_FLAGS_POKE)
{
int rc = SUPR3CallVMMR0Ex(pUVCpu->pVM->pVMR0, pUVCpu->idCpu, VMMR0_DO_GVMM_SCHED_POKE, 0, NULL);
AssertRC(rc);
}
}
else if ( enmState == VMCPUSTATE_STARTED_EXEC_NEM
|| enmState == VMCPUSTATE_STARTED_EXEC_NEM_WAIT)
NEMR3NotifyFF(pUVCpu->pVM, pVCpu, fFlags);
//如果在运行二进制翻译模式而且没有结束二进制翻译
else if (enmState == VMCPUSTATE_STARTED_EXEC_REM)
{
if (!(fFlags & VMNOTIFYFF_FLAGS_DONE_REM))
REMR3NotifyFF(pUVCpu->pVM);
}
}
}
vmR3DefaultNotifyCpuFF
static DECLCALLBACK(void) vmR3DefaultNotifyCpuFF(PUVMCPU pUVCpu, uint32_t fFlags)
{
if (pUVCpu->vm.s.fWait)
{
//如果VCPU被暂停了,唤醒它
int rc = RTSemEventSignal(pUVCpu->vm.s.EventSemWait);
}
else
{
//如果在VMCPUSTATE_STARTED_EXEC_NEM,通知NEM manager
if ( enmState == VMCPUSTATE_STARTED_EXEC_NEM
|| enmState == VMCPUSTATE_STARTED_EXEC_NEM_WAIT)
NEMR3NotifyFF(pUVCpu->pVM, pVCpu, fFlags);
// 如果在执行REM(二进制翻译),通知REM
else if ( !(fFlags & VMNOTIFYFF_FLAGS_DONE_REM)
&& enmState == VMCPUSTATE_STARTED_EXEC_REM)
REMR3NotifyFF(pUVCpu->pVM);
}
}
vmR3BootstrapNotifyCpuFF
static DECLCALLBACK(void) vmR3BootstrapNotifyCpuFF(PUVMCPU pUVCpu, uint32_t fFlags)
{
//VM创建阶段使用这个函数,调用signal函数
if (pUVCpu->vm.s.fWait)
{
int rc = RTSemEventSignal(pUVCpu->vm.s.EventSemWait);
}
}
VMR3WaitHalted: 模拟CPU的halt指令,让一个VCPU循环等待直到被唤醒
vmR3HaltGlobal1Halt
static DECLCALLBACK(int) vmR3HaltGlobal1Halt(PUVMCPU pUVCpu, const uint32_t fMask, uint64_t u64Now)
{
//标记这个VCPU正在等待
ASMAtomicWriteBool(&pUVCpu->vm.s.fWait, true);
//死循环
for (;; cLoops++)
{
//如果有外部的中断到来,退出循环
if ( VM_FF_IS_ANY_SET(pVM, VM_FF_EXTERNAL_HALTED_MASK)
|| VMCPU_FF_IS_ANY_SET(pVCpu, fMask))
break;
if (u64Delta >= pUVM->vm.s.Halt.Global1.cNsSpinBlockThresholdCfg)
{
//block lock
VMMR3YieldStop(pVM);
SUPR3CallVMMR0Ex(pVM->pVMR0, pVCpu->idCpu, VMMR0_DO_GVMM_SCHED_HALT, u64GipTime, NULL);
}
//如果循环次数是 0x2000的倍数,尝试wakeup这个VCPU
else if (!(cLoops & 0x1fff))
{
//spinlock
SUPR3CallVMMR0Ex(pVM->pVMR0, pVCpu->idCpu, VMMR0_DO_GVMM_SCHED_POLL, false /* don't yield */, NULL);
}
}
//标记这个VCPU被唤醒
ASMAtomicUoWriteBool(&pUVCpu->vm.s.fWait, false);
}
vmR3HaltMethod1Halt
static DECLCALLBACK(int) vmR3HaltMethod1Halt(PUVMCPU pUVCpu, const uint32_t fMask, uint64_t u64Now)
{
for (;; cLoops++)
{
if (!fSpinning || fBlockOnce)
{
//wait
rc = RTSemEventWait(pUVCpu->vm.s.EventSemWait, cMilliSecs);
}
}
}
vmR3BootstrapWait
//虚拟机创建阶段使用这个函数,因为这个时候没有初始化VMMR0,所以只是一般的sleep
static DECLCALLBACK(int) vmR3BootstrapWait(PUVMCPU pUVCpu)
{
ASMAtomicWriteBool(&pUVCpu->vm.s.fWait, true);
for (;;)
{
//检查是否有中断到来
if (pUVM->vm.s.pNormalReqs || pUVM->vm.s.pPriorityReqs) /* global requests pending? */
break;
if (pUVCpu->vm.s.pNormalReqs || pUVCpu->vm.s.pPriorityReqs) /* local requests pending? */
break;
if ( pUVCpu->pVM
&& ( VM_FF_IS_ANY_SET(pUVCpu->pVM, VM_FF_EXTERNAL_SUSPENDED_MASK)
|| VMCPU_FF_IS_ANY_SET(VMMGetCpu(pUVCpu->pVM), VMCPU_FF_EXTERNAL_SUSPENDED_MASK)
))
break;
if (pUVM->vm.s.fTerminateEMT)
break;
//sleep
RTSemEventWait(pUVCpu->vm.s.EventSemWait, 1000);
}
ASMAtomicUoWriteBool(&pUVCpu->vm.s.fWait, false);
}