文章目录
Virtualbox源码分析7: Hardware Assisted Virtualization Manager (HM)源码分析
有了前两章介绍的VT-x基础框架后,就可以调用这些API实现一个VMM
HM代码分成两个部分 R3/R0, 代码分别位于VMM\VMMR0\HMR0.cpp VMM\VMMR0\VMMR0.cpp VMM\VMMR3\HM.cpp里
代码分别编译到VMM.dll 和VMMR0.r0里
这3个文件里的代码主要是调用VT-x里的函数,包装API之类,下面简单介绍部分重要的函数代码。
4.3.1 VMMR0.cpp.
VMMR0.r0的初始化函数
DECLEXPORT(int) ModuleInit(void *hMod)
{
//Global VM Manager. 初始化
rc = GVMMR0Init();
//Global Memory Manager.初始化
rc = GMMR0Init();
//Hardware Assisted Virtualization Manager (HM) 初始化
rc = HMR0Init();
//Pluggable Device and Driver Manager 初始化
PDMR0Init(hMod);
//Internal networking 初始化
rc = IntNetR0Init();
}
ModuleTerm 是 ModuleInit对应的terminate函数
DECLEXPORT(void) ModuleTerm(void *hMod)
{
CPUMR0ModuleTerm();
IntNetR0Term();
HMR0Term();
GMMR0Term();
GVMMR0Term();
}
启动一个虚拟机的init函数
和上面的moduleinit类似,调用VMM里各个模块的VM初始化函数
static int vmmR0InitVM(PGVM pGVM, uint32_t uSvnRev, uint32_t uBuildType)
{
rc = GVMMR0InitVM(pGVM);
rc = HMR0InitVM(pVM);
rc = CPUMR0InitVM(pVM);
rc = EMR0InitVM(pGVM, pVM);
rc = GIMR0InitVM(pVM);
GVMMR0DoneInitVM(pGVM);
}
这个代码比较重要,是GuestOS代码入口, 当R3需要进入GuestOS当时候,会调用IOCTL最终进入这个函数
这个函数支持3种虚拟机的运行模式: 二进制翻译,VT,和Hyper-V模式。
VMMR0DECL(void) VMMR0EntryFast(PGVM pGVM, PVMCC pVMIgnored, VMCPUID idCpu, VMMR0OPERATION enmOperation)
{
//使用二进制翻译, 32位GuestOS会二进制翻译的GuestOS会进入这个分支
case VMMR0_DO_RAW_RUN:
{
//部分模式转换如果在VT开启的情况下需要关闭VT
int rc = HMR0EnterSwitcher(pVM, pVM->vmm.s.enmSwitcher, &fVTxDisabled);
for (;;)
{
//转化模式,进入GuestOS
rc = pVM->vmm.s.pfnR0ToRawMode(pVM);
if (rc != VINF_VMM_CALL_TRACER)
break;
}
//如果进入的时候关闭了VT,需要再次开启
HMR0LeaveSwitcher(pVM, fVTxDisabled);
}
//使用硬件虚拟化
case VMMR0_DO_HM_RUN:
{
for (;;) /* hlt loop */
{
//如果当前CPU处于suspend模式,比如收到power event,则不进入GuestOS代码
if (!HMR0SuspendPending())
{
//设置CPU状态成VMCPUSTATE_STARTED_HM
VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_HM);
//在当前CPU上开启VT-X,准备执行GuestOS代码
rc = HMR0Enter(pGVCpu);
//通过longjmp跳转到HMR0RunGuestCode执行 (上一章解释过这个函数)
rc = vmmR0CallRing3SetJmp(&pGVCpu->vmm.s.CallRing3JmpBufR0, HMR0RunGuestCode, pGVM, pGVCpu);
//设置CPU状态成VMCPUSTATE_STARTED
VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED);
}
else
{
//suspend状态,返回R3处理
rc = VINF_EM_RAW_INTERRUPT;
}
if (rc != VINF_EM_HALT)
{ /* we're not in a hurry for a HLT, so prefer this path */ }
else
{
//当GuestOS执行hlt指令的时候,VMM会返回VINF_EM_HALT
//HLT:HLT是让CPU停止动作的指令,让CPU进入待机状态。只要外部发生变化,比如按下键盘,或是移动鼠标,CPU就会醒过来,继续执行程序,所以必须等待外部中断到来才会继续执行GuestOS
pGVCpu->vmm.s.iLastGZRc = rc = vmmR0DoHalt(pGVM, pGVCpu);
if (rc == VINF_SUCCESS)
{
//如果外部中断到了,继续执行
pGVCpu->vmm.s.cR0HaltsSucceeded++;
continue;
}
//如果外部中断没有到,返回R3
pGVCpu->vmm.s.cR0HaltsToRing3++;
}
}
}
//NEM (Native Execution Manage) 在Hyper-V in Windows 模式下运行,6.0版本以上新加的
//TODO: 需要去调试一下这块代码,高级货,第一次见到,后面花些时间去研究一下
case VMMR0_DO_NEM_RUN:
{
//调用NEMR0RunGuestCode切换到GuestOS
int rc = vmmR0CallRing3SetJmp2(&pGVCpu->vmm.s.CallRing3JmpBufR0, NEMR0RunGuestCode, pGVM, idCpu);
}
case VMMR0_DO_NOP:
{
//什么都没做
}
}
下面看看VT-x模式下的vmmR0DoHalt这个函数
//这个函数返回VINF_SUCCESS 表示继续执行,其他值表示需要退回到R3
//执行了两轮等待,一轮是是轮询,一轮是sleep(u64GipTime),如果两轮等待之后还是没有外部中断发生,则退回到R3
static int vmmR0DoHalt(PGVM pGVM, PVM pVM, PGVMCPU pGVCpu, PVMCPU pVCpu)
{
//已经有APIC中断到来
if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_UPDATE_APIC))
APICUpdatePendingInterrupts(pVCpu);
//当VM和VCPU的状态有需要返回R3处理,这返回R3
if ( !VM_FF_IS_ANY_SET(pVM, fVmFFs)
&& !VMCPU_FF_IS_ANY_SET(pVCpu, fCpuFFs))
{
//当VCPU有pending的中断的时候,处理中断
uint64_t const fIntMask = VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC | VMCPU_FF_INTERRUPT_NESTED_GUEST
| VMCPU_FF_INTERRUPT_NMI | VMCPU_FF_INTERRUPT_SMI | VMCPU_FF_UNHALT;
if (VMCPU_FF_IS_ANY_SET(pVCpu, fIntMask))
return vmmR0DoHaltInterrupt(pVCpu, uMWait, enmInterruptibility);
//会循环一段等待是否有退出R3时间或者外部中断事件到来
uint32_t cSpinLoops = 42;
while (cSpinLoops-- > 0)
{
//还是上面的代码再跑一次
if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_UPDATE_APIC))
APICUpdatePendingInterrupts(pVCpu);
if (VM_FF_IS_ANY_SET(pVM, fVmFFs))
if (VMCPU_FF_IS_ANY_SET(pVCpu, fCpuFFs))
return VINF_EM_HALT;
if (VMCPU_FF_IS_ANY_SET(pVCpu, fIntMask))
return vmmR0DoHaltInterrupt(pVCpu, uMWait, enmInterruptibility);
}
//循环结束,还没有外部事件到来
//当前模拟线程暂停u64GipTime时间,其实就是等待u64GipTime时间
int rc = GVMMR0SchedHalt(pGVM, pVM, pGVCpu, u64GipTime);
//标记CPU状态成VMCPUSTATE_STARTED_HALTED
VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED, VMCPUSTATE_STARTED_HALTED);
//如果是时间片到或者有外部中断到来
if (rc == VINF_SUCCESS || rc == VERR_INTERRUPTED)
{
//又把上面的代码执行了一次
if ( !VM_FF_IS_ANY_SET(pVM, fVmFFs)
&& !VMCPU_FF_IS_ANY_SET(pVCpu, fCpuFFs))
{
if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_UPDATE_APIC))
APICUpdatePendingInterrupts(pVCpu);
if (VMCPU_FF_IS_ANY_SET(pVCpu, fIntMask))
{
STAM_REL_COUNTER_INC(&pVCpu->vmm.s.StatR0HaltExecFromBlock);
return vmmR0DoHaltInterrupt(pVCpu, uMWait, enmInterruptibility);
}
}
}
}
return VINF_EM_HALT;
}
下面看看二进制翻译下的GuestOS进入代码:
HMR0.cpp
VMMR0_INT_DECL(int) HMR0EnterSwitcher(PVM pVM, VMMSWITCHER enmSwitcher, bool *pfVTxDisabled)
{
//先根据Guest和HostOS决定是否需要关闭VT
//如果guest和host都是同一个运行模式(32位nopae host和32位nopae guest),则不需要关闭VT,因为不设计到CPU模式切换
switch (enmSwitcher)
{
case VMMSWITCHER_32_TO_32:
case VMMSWITCHER_PAE_TO_PAE:
return VINF_SUCCESS; /* safe switchers as they don't turn off paging */
case VMMSWITCHER_32_TO_PAE:
case VMMSWITCHER_PAE_TO_32: /* is this one actually used?? */
case VMMSWITCHER_AMD64_TO_32:
case VMMSWITCHER_AMD64_TO_PAE:
break; /* unsafe switchers */
}
//关闭VT
return VMXR0DisableCpu(pHostCpu->pvMemObj, pHostCpu->HCPhysMemObj) ;
}
在调用VMMR0EntryFast之前,根据GuestOS和HostOS的模式,决定了用哪个函数切换运行模式,这边只需要调用对应的汇编函数:
rc = pVM->vmm.s.pfnR0ToRawMode(pVM);
所有模式切换到代码在VMM\VMMSwitcher目录里,可以看到每一种组合的HostOS模式和GuestOS模式都会有一个单独asm文件,比如64位的Host到没有开PAE的32位Guest,使用的asm放在AMD64To32Bit.asm里。
具体实现后面会详细分一章具体介绍这些内存
vmmR0EntryExWorker这个函数类似于驱动程序中的DispatchRoutine,VMMR0驱动提供了一系列的IOCTL给VMMR3部分调用
static int vmmR0EntryExWorker(PGVM pGVM, VMCPUID idCpu, VMMR0OPERATION enmOperation,
PSUPVMMR0REQHDR pReqHdr, uint64_t u64Arg, PSUPDRVSESSION pSession)
{
//一些参数检查
...
//根据enmOperation调用不同的处理函数
switch (enmOperation)
{
case VMMR0_DO_GVMM_CREATE_VM:
GVMMR0CreateVMReq((PGVMMCREATEVMREQ)pReqHdr, pSession);
.....
}
}
关闭一个虚拟机
//Terminates the R0 bits for a particular VM instance.
VMMR0_INT_DECL(int) VMMR0TermVM(PGVM pGVM, VMCPUID idCpu)
{
if (GVMMR0DoingTermVM(pGVM))
{
GIMR0TermVM(pGVM);
PGMR0DynMapTermVM(pGVM);
HMR0TermVM(pGVM);
}
}
4.3.2 HMR0.cpp
提供一些API给R3调用
VMMR0_INT_DECL(int) HMR0Init(void)
{
//分别调用intel和amd的init函数
int rc = SUPR0GetVTSupport(&fCaps);
if (RT_SUCCESS(rc))
{
if (fCaps & SUPVTCAPS_VT_X)
{
rc = hmR0InitIntel();
。。。
}
else
{
rc = hmR0InitAmd();
。。。。
}
}
//注册两个callback
if (!g_HmR0.hwvirt.u.vmx.fUsingSUPR0EnableVTx)
{
//cpu offline callback
rc = RTMpNotificationRegister(hmR0MpEventCallback, NULL);
AssertRC(rc);
//当系统收到睡眠和关闭event时,需要关闭VT,唤醒时开启VT
rc = RTPowerNotificationRegister(hmR0PowerCallback, NULL);
AssertRC(rc);
}
}
//VMX的初始化代码
static int hmR0InitIntel(void)
{
/* Enable CR4.VMXE if it isn't already set. */
RTCCUINTREG const uOldCr4 = SUPR0ChangeCR4(X86_CR4_VMXE, RTCCUINTREG_MAX);
rc = VMXEnable(HCPhysScratchPage);
if (RT_SUCCESS(rc))
{
g_HmR0.hwvirt.u.vmx.fSupported = true;
VMXDisable();
}
rc = VMXR0GlobalInit();
/*
* Install the VT-x methods.
*/
g_HmR0.pfnEnterSession = VMXR0Enter;
g_HmR0.pfnThreadCtxCallback = VMXR0ThreadCtxCallback;
g_HmR0.pfnCallRing3Callback = VMXR0CallRing3Callback;
g_HmR0.pfnExportHostState = VMXR0ExportHostState;
g_HmR0.pfnRunGuestCode = VMXR0RunGuestCode;
g_HmR0.pfnEnableCpu = VMXR0EnableCpu;
g_HmR0.pfnDisableCpu = VMXR0DisableCpu;
g_HmR0.pfnInitVM = VMXR0InitVM;
g_HmR0.pfnTermVM = VMXR0TermVM;
g_HmR0.pfnSetupVM = VMXR0SetupVM;
}
terminate函数
VMMR0_INT_DECL(int) HMR0Term(void)
{
hmR0FirstRcInit(&FirstRc);
//对每个CPU调用hmR0DisableCpuCallback
rc = RTMpOnAll(hmR0DisableCpuCallback, NULL /* pvUser 1 */, &FirstRc);
//释放一些内存
//call VT-X的term函数
if (g_HmR0.hwvirt.u.vmx.fSupported)
VMXR0GlobalTerm();
else if (g_HmR0.hwvirt.u.svm.fSupported)
SVMR0GlobalTerm();
}
在当前CPU上调用EnableCPU函数
static int hmR0EnableCpu(PVMCC pVM, RTCPUID idCpu)
{
//调用vmxon就如root模式
hmR0VmxEnterRootMode(pHostCpu, pVM, HCPhysCpuPage, pvCpuPage);
//刷新TLB
if (pHwvirtMsrs->u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_ALL_CONTEXTS)
{
hmR0VmxFlushEpt(NULL /* pVCpu */, NULL /* pVmcsInfo */, VMXTLBFLUSHEPT_ALL_CONTEXTS);
}
}
调用上一章介绍的SetupVM函数
VMMR0_INT_DECL(int) HMR0SetupVM(PVMCC pVM)
{
if (!g_HmR0.fGlobalInit)
{
rc = hmR0EnableCpu(pVM, idCpu);
}
rc = g_HmR0.pfnSetupVM(pVM);
}
进入root模式
VMMR0_INT_DECL(int) hmR0EnterCpu(PVMCPUCC pVCpu)
{
/* Enable VT-x or AMD-V if local init is required, or enable if it's a freshly onlined CPU. */
if (!pHostCpu->fConfigured)
rc = hmR0EnableCpu(pVCpu->CTX_SUFF(pVM), idCpu);
/* Reload host-state (back from ring-3/migrated CPUs) and shared guest/host bits. */
if (g_HmR0.hwvirt.u.vmx.fSupported)
pVCpu->hm.s.fCtxChanged |= HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE;
else
pVCpu->hm.s.fCtxChanged |= HM_CHANGED_HOST_CONTEXT | HM_CHANGED_SVM_HOST_GUEST_SHARED_STATE;
}
VMMR0_INT_DECL(int) HMR0Enter(PVMCPUCC pVCpu)
{
int rc = hmR0EnterCpu(pVCpu);
if (RT_SUCCESS(rc))
{
rc = g_HmR0.pfnEnterSession(pVCpu);
rc = g_HmR0.pfnExportHostState(pVCpu);
}
}
退出root模式
VMMR0_INT_DECL(int) HMR0LeaveCpu(PVMCPUCC pVCpu)
{
int rc = hmR0DisableCpu(idCpu);
}
进入GuestOS
VMMR0_INT_DECL(int) HMR0RunGuestCode(PVMCC pVM, PVMCPUCC pVCpu)
{
VBOXSTRICTRC rcStrict = g_HmR0.pfnRunGuestCode(pVCpu);
}
让GCVirt的TLB失效(guest TLB)
VMMR0_INT_DECL(int) HMR0InvalidatePage(PVMCPUCC pVCpu, RTGCPTR GCVirt)
{
PVMCC pVM = pVCpu->CTX_SUFF(pVM);
if (pVM->hm.s.vmx.fSupported)
return VMXR0InvalidatePage(pVCpu, GCVirt);
return SVMR0InvalidatePage(pVCpu, GCVirt);
}
4.3.3.HM.cpp
VM Hardware Support Manager.
-
这个cpp提供了一系列的API:
初始化/结束API: hmR3Init hmR3InitFinalizeR0 hmR3InitFinalizeR0Intel hmR3InitFinalizeR0Amd hmR3InitFinalizeR3 hmR3TermCPU 打印VM信息的API hmR3Info hmR3InfoEventPending hmR3InfoSvmNstGstVmcbCache
-
对于头文件HMInternal.h,定义了几个很重要的结构体:
/**
* HM VM Instance data.
* Changes to this must checked against the padding of the hm union in VM!
*/
typedef struct HM
{
//一些状态和设置
/** Set when we've initialized VMX or SVM. */
bool fInitialized;
....
//vmx的信息
struct
{
//vmm的状态信息
bool fSupported;
/** VMX MSR values. */
VMXMSRS Msrs;
}vmx;
struct
{
//vmm的状态信息
bool fSupported;
}svm;
}HM;
//虚拟CPU的信息
typedef struct HMCPU
{
union /* no tag! */
{
/** VT-x data. */
struct
{
/** Guest VMCS information. */ VMCS结构体
VMXVMCSINFO VmcsInfo;
...
//保存host信息用于切换模式
/** Host LSTAR MSR to restore lazily while leaving VT-x. */
uint64_t u64HostMsrLStar;
...
//记录一些VT-x退出的异常信息
struct
{
。。。
} LastError;
} vmx;
struct
{
/** Physical address of the guest VMCB. */
RTHCPHYS HCPhysVmcb;
/** R0 memory object for the guest VMCB. */
RTR0MEMOBJ hMemObjVmcb;
/** Pointer to the guest VMCB. */
R0PTRTYPE(PSVMVMCB) pVmcb;
/** Pointer to the MSR bitmap. */
R0PTRTYPE(void *) pvMsrBitmap;
} svm;
//一些计数器
} HM_UNION_NM(u);
} HMCPU;
这个结构体在第五章里介绍过,这边不重复介绍
typedef struct VMXVMCSINFO
{
...
}
- 重要函数解析
VMM.dll的初始化函数
VMMR3_INT_DECL(int) HMR3Init(PVM pVM)
{
//获取并读取HM的配置保存到pVM->hm对应的全局变量里
PCFGMNODE pCfgHm = CFGMR3GetChild(CFGMR3GetRoot(pVM), "HM/");
//比如上一章里写到的cMaxResumeLoops值从这里可以获取
rc = CFGMR3QueryU32Def(pCfgHm, "MaxResumeLoops", &pVM->hm.s.cMaxResumeLoops, 0 /* set by R0 later */);
....
//检测是否支持VT,调用IOCTL让VMMR0处理
rc = SUPR3QueryVTCaps(&fCaps);
rc = SUPR3QueryVTxSupported(&pszWhy);
if (RT_SUCCESS(rc))
{
VM_SET_MAIN_EXECUTION_ENGINE(pVM, VM_EXEC_ENGINE_HW_VIRT);
}
else
{
//如果开启VT失败,会尝试是否有Hyper-v存在
int rc2 = NEMR3Init(pVM, true /*fFallback*/, fHMForced);
if (RT_FAILURE(rc2))
{
//如果NEM也启动失败,则只能用二进制翻译了,当然如果GuestOS是64位系统,是不能启动二进制翻译模式的
VM_SET_MAIN_EXECUTION_ENGINE(pVM, VM_EXEC_ENGINE_RAW_MODE);
}
}
//开启VT失败,则会根据R0返回的错误码,设置错误msg,返回给UI界面弹框
switch (rc)
{
case VERR_UNSUPPORTED_CPU:
pszMsg = "Unknown CPU, VT-x or AMD-v features cannot be ascertained";
.....
}
}
VMMR3_INT_DECL(int) HMR3InitCompleted(PVM pVM, VMINITCOMPLETED enmWhat)
{
switch (enmWhat)
{
case VMINITCOMPLETED_RING3:
return hmR3InitFinalizeR3(pVM);
case VMINITCOMPLETED_RING0:
return hmR3InitFinalizeR0(pVM);
default:
return VINF_SUCCESS;
}
}
hmR3InitFinalizeR0调用VMMR0的接口启动VT-x
static int hmR3InitFinalizeR0(PVM pVM)
{
rc = SUPR3CallVMMR0Ex(pVM->pVMR0, 0 /*idCpu*/, VMMR0_DO_HM_ENABLE, 0, NULL);
}
static int hmR3InitFinalizeR0Intel(PVM pVM)
{
rc = SUPR3CallVMMR0Ex(pVM->pVMR0, 0 /* idCpu */, VMMR0_DO_HM_SETUP_VM, 0 /* u64Arg */, NULL /* pReqHdr */);
pVM->hm.s.vmx.fEnabled = true;
//设置CPU的特性
CPUMR3SetGuestCpuIdFeature(pVM, CPUMCPUIDFEATURE_SEP);
if (pVM->hm.s.fAllow64BitGuests)
{
CPUMR3SetGuestCpuIdFeature(pVM, CPUMCPUIDFEATURE_PAE);
//长模式
CPUMR3SetGuestCpuIdFeature(pVM, CPUMCPUIDFEATURE_LONG_MODE);
//使用syscall
CPUMR3SetGuestCpuIdFeature(pVM, CPUMCPUIDFEATURE_SYSCALL); /* 64 bits only on Intel CPUs */
CPUMR3SetGuestCpuIdFeature(pVM, CPUMCPUIDFEATURE_LAHF);
CPUMR3SetGuestCpuIdFeature(pVM, CPUMCPUIDFEATURE_NX);
}
if (pVM->hm.s.fLargePages)
{
//设置EPT页表开启大页面,这样EPT页表和操作系统页表一样可以支持2M的大页面
PGMSetLargePageUsage(pVM, true);
}
//更具打印一些配置
return VINF_SUCCESS;
}
hmR3InitFinalizeR3 初始化一些计数器和内存。
static int hmR3InitFinalizeR3(PVM pVM)
{
for (VMCPUID i = 0; i < pVM->cCpus; i++)
{
//如果打开了VBOX_WITH_STATISTICS开关,初始化一些计数器。 比如vmexit个数等
//Guest Exit reason stats.
//Injected events stats
}
}
虚拟机reset API。对每个虚拟CPUreset
VMMR3_INT_DECL(void) HMR3Reset(PVM pVM)
{
for (VMCPUID i = 0; i < pVM->cCpus; i++)
{
PVMCPU pVCpu = &pVM->aCpus[i];
HMR3ResetCpu(pVCpu);
}
}
VMMR3_INT_DECL(void) HMR3ResetCpu(PVMCPU pVCpu)
{
//设置一些事件,各种manager里的循环代码读取到这些值的时候,就会跳出循环,比如HMR3IsActive
pVCpu->hm.s.fActive = false;
pVCpu->hm.s.Event.fPending = false;
pVCpu->hm.s.vmx.fWasInRealMode = true;
pVCpu->hm.s.vmx.u64MsrApicBase = 0;
pVCpu->hm.s.vmx.fSwitchedTo64on32 = false;
}
对应的也有虚拟机关闭的API
VMMR3_INT_DECL(int) HMR3Term(PVM pVM)
{
hmR3TermCPU(pVM);
}
static int hmR3TermCPU(PVM pVM)
{
//释放一些内存,和设置一些全局变量
}