文章目录
Native execution manager (Hyper-V兼容)
Windows April 2018 Update之后引入了一个新的运行模式:Hyper-V 模式, 当Hyper-V模式开启之后,windows会占用VT(抢占式),其他软件不能在操作系统上在开启VT功能,同时提供了一系列Hypervisor Platform APIs,其他软件可以调用这些API创建VMX Guest,后面几篇将介绍Virtualbox如何利用这一系列API实现VMX功能。
下图是一个Hyper-v的架构图,当Hyper-v开启的时候,当前的OS就是一个Root Partition,用hypervisor的API创建的虚拟机叫做Guest Partition。
20.1 winhvplatform.dll里的APIs
Hyper-V的公开API,Ring-3调用,文档在
https://docs.microsoft.com/en-us/virtualization/api/hypervisor-platform/hypervisor-platform
//创建Partition,等于创建一个VM
HRESULT WINAPI WHvSetupPartition(
_In_ WHV_PARTITION_HANDLE Partition
);
//获取hyper-v相关信息,比如是否存在hypervisor,hypervisor支持的特性等等。
HRESULT WINAPI WHvGetCapability(
_In_ WHV_CAPABILITY_CODE CapabilityCode,
_Out_writes_bytes_to_(CapabilityBufferSizeInBytes, *WrittenSizeInBytes) VOID* CapabilityBuffer,
_In_ UINT32 CapabilityBufferSizeInBytes,
_Out_opt_ UINT32 *WrittenSizeInBytes
);
//创建Partition,等于创建一个VM
HRESULT WINAPI WHvCreatePartition(
_Out_ WHV_PARTITION_HANDLE* Partition
);
//删除一个partition, 删除一个虚拟机
HRESULT WINAPI WHvDeletePartition(
_In_ WHV_PARTITION_HANDLE Partition
);
//获取partition的相关信息
HRESULT WINAPI WHvGetPartitionProperty(
_In_ WHV_PARTITION_HANDLE Partition,
_In_ WHV_PARTITION_PROPERTY_CODE PropertyCode,
_Out_writes_bytes_to_(PropertyBufferSizeInBytes, *WrittenSizeInBytes) VOID* PropertyBuffer,
_In_ UINT32 PropertyBufferSizeInBytes,
_Out_opt_ UINT32 *WrittenSizeInBytes
);
//设置partiion里的相关信息
HRESULT WINAPI WHvSetPartitionProperty(
_In_ WHV_PARTITION_HANDLE Partition,
_In_ WHV_PARTITION_PROPERTY_CODE PropertyCode,
_In_reads_bytes_(PropertyBufferSizeInBytes) const VOID* PropertyBuffer,
_In_ UINT32 PropertyBufferSizeInBytes
);
//map SourceAddress(Host虚拟地址)和GuestAddress
//相当于给虚拟机分配内存
HRESULT WINAPI WHvMapGpaRange(
_In_ WHV_PARTITION_HANDLE Partition,
_In_ VOID* SourceAddress,
_In_ WHV_GUEST_PHYSICAL_ADDRESS GuestAddress,
_In_ UINT64 SizeInBytes,
_In_ WHV_MAP_GPA_RANGE_FLAGS Flags
);
HRESULT WINAPI WHvTranslateGva(
_In_ WHV_PARTITION_HANDLE Partition,
_In_ UINT32 VpIndex,
_In_ WHV_GUEST_VIRTUAL_ADDRESS Gva,
_In_ WHV_TRANSLATE_GVA_FLAGS TranslateFlags,
_Out_ WHV_TRANSLATE_GVA_RESULT* TranslationResult,
_Out_ WHV_GUEST_PHYSICAL_ADDRESS* Gpa
);
//创建一个虚拟VCPU
HRESULT WINAPI WHvCreateVirtualProcessor(
_In_ WHV_PARTITION_HANDLE Partition,
_In_ UINT32 VpIndex,
_In_ UINT32 Flags
);
//删除一个VCPU
HRESULT WINAPI WHvDeleteVirtualProcessor(
_In_ WHV_PARTITION_HANDLE Partition,
_In_ UINT32 VpIndex
);
//VCPU开启运行GuestOS代码 类似于VMX里的VMlaunch
HRESULT WINAPI WHvRunVirtualProcessor(
_In_ WHV_PARTITION_HANDLE Partition,
_In_ UINT32 VpIndex,
_Out_writes_bytes_(ExitContextSizeInBytes) VOID* ExitContext,
_In_ UINT32 ExitContextSizeInBytes
);
//调用WHvRunVirtualProcessor运行GuestOS代码的线程可以被其他线程调用WHvCancelRunVirtualProcessor退出运行GuestOS代码
HRESULT WINAPI WHvCancelRunVirtualProcessor(
_In_ WHV_PARTITION_HANDLE Partition,
_In_ UINT32 VpIndex,
_In_ UINT32 Flags
);
//读取VCPU的寄存器值(WHvX64RegisterRax ...)
HRESULT WINAPI WHvGetVirtualProcessorRegisters(
_In_ WHV_PARTITION_HANDLE Partition,
_In_ UINT32 VpIndex,
_In_reads_(RegisterCount) const WHV_REGISTER_NAME* RegisterNames,
_In_ UINT32 RegisterCount,
_Out_writes_(RegisterCount) WHV_REGISTER_VALUE* RegisterValues
);
//设置VCPU的寄存器值(WHvX64RegisterRax ...)
HRESULT WINAPI WHvSetVirtualProcessorRegisters(
_In_ WHV_PARTITION_HANDLE Partition,
_In_ UINT32 VpIndex,
_In_reads_(RegisterCount) const WHV_REGISTER_NAME* RegisterNames,
_In_ UINT32 RegisterCount,
_In_reads_(RegisterCount) const WHV_REGISTER_VALUE* RegisterValues
);
20.2 VID.dll 的导出函数
VID.dll里定义了一些API给winhvplatform.dll调用,这个dll最终会通过IOCTL调用到Vid.sys里(类似windows的ntdll.dll)
VID.dll导出了很多函数但是没有官方文档,网上能找到有人逆向出来的头文件,可以参考下面的链接
https://github.com/Wenzel/vid-sys/blob/master/src/hyperv/vid.h
VirtualBox代码里也定义了部分用到的API,在include/nt/vid.h里
其实实现NEM,只需要调用上面winhvplatform.dll里的API即可,virtualbox之所以使用这些VID的函数,是为了代替使用winhvplatform.dll里的API,其实就是自己实现了一遍winhvplatform.dll里的函数,目的是为了最终把NEM的实现放到R0里执行(直接调用VID.sys里的IOCTL),而且Virtualbox已经实现了直接调用VID.sys的IOCTL的代码并开启,所以这部分的API其实已经没有再被使用。
比如WHvCreateVirtualProcessor这个函数的伪代码如下:最终调用了VidMessageSlotMap,VidMapVpRegisterPage 和VidStartVirtualProcessor。
WHvCreateVirtualProcessor(
_In_ WHV_PARTITION_HANDLE Partition,
_In_ UINT32 VpIndex,
_In_ UINT32 Flags
)
{
...
VID_MAPPED_MESSAGE_SLOT Output;
if(VidMessageSlotMap((VID_PARTITION_HANDLE)Partition, &Output,VpIndex) == false)
{
//return error
}
uint32_t out;
if(VidMapVpRegisterPage((VID_PARTITION_HANDLE)Partition,VpIndex,&out) == 0)
{
//return error;
}
VidStartVirtualProcessor((VID_PARTITION_HANDLE)Partition, VpIndex);
}
Virtualbox里定义了NEM_WIN_USE_OUR_OWN_RUN_API这个宏切换,现在没有并开启
//获取partition id
DECLIMPORT(BOOL) VIDAPI VidGetHvPartitionId(VID_PARTITION_HANDLE hPartition, HV_PARTITION_ID *pidPartition);
//WHvCreateVirtualProcessor调用这个函数,启动一个VCPU
DECLIMPORT(BOOL) VIDAPI VidStartVirtualProcessor(VID_PARTITION_HANDLE hPartition, HV_VP_INDEX iCpu);
//stop VCPU,并没有对应的WHv函数
DECLIMPORT(BOOL) VIDAPI VidStopVirtualProcessor(VID_PARTITION_HANDLE hPartition, HV_VP_INDEX iCpu);
//WHvCreateVirtualProcessor调用这个函数,获取一个VID_MAPPED_MESSAGE_SLOT,像创建一个mapping,给后面VidMessageSlotHandleAndGetNext使用
DECLIMPORT(BOOL) VIDAPI VidMessageSlotMap(VID_PARTITION_HANDLE hPartition, PVID_MAPPED_MESSAGE_SLOT pOutput, HV_VP_INDEX iCpu);
//调用完VidStartVirtualProcessor后调用这个API等待VMExit事件
DECLIMPORT(BOOL) VIDAPI VidMessageSlotHandleAndGetNext(VID_PARTITION_HANDLE hPartition, HV_VP_INDEX iCpu, uint32_t fFlags, uint32_t cMillies);
//用于调试的API
//获取VCPU的运行状态
DECLIMPORT(BOOL) VIDAPI VidGetVirtualProcessorRunningStatus(VID_PARTITION_HANDLE hPartition, HV_VP_INDEX iCpu,
VID_PROCESSOR_STATUS *penmStatus);
//获取VCPU的信息
DECLIMPORT(BOOL) VIDAPI VidGetVirtualProcessorState(VID_PARTITION_HANDLE hPartition, HV_VP_INDEX iCpu,
HV_REGISTER_NAME const *paRegNames, uint32_t cRegisters,
HV_REGISTER_VALUE *paRegValues);
//设置VCPU的状态
DECLIMPORT(BOOL) VIDAPI VidSetVirtualProcessorState(VID_PARTITION_HANDLE hPartition, HV_VP_INDEX iCpu,
HV_REGISTER_NAME const *paRegNames, uint32_t cRegisters,
HV_REGISTER_VALUE const *paRegValues);
20.3 直接调用VID.sys里的IOCTL
因为Hyper-V只提供了R3的API,而且这些API最终都需要通过IOCTL调用内核里的函数,频繁的IOCTL/VMExit导致性能大大下降,所以VirtualBox把循环执行GuestOS代码的函数移到了R0去实现,直接调用VID.sys里的接口(但这些接口都是未公开接口,如何保证向后兼容?)。
WHV_PARTITION_HANDLE里的第二个项是VID_PARTITION_HANDLE, NEM调用SUPR0IoCtlSetupForHandle从VID_PARTITION_HANDLE这个用户态handle获取到VID.sys的device object。
VMMR0_INT_DECL(int) NEMR0InitVMPart2(PGVM pGVM)
{
...
SUPR0IoCtlSetupForHandle(pGVM->pSession, pGVM->nem.s.hPartitionDevice, 0, &pGVM->nemr0.s.pIoCtlCtx);
...
}
SUPR0DECL(int) SUPR0IoCtlSetupForHandle(PSUPDRVSESSION pSession, intptr_t hHandle, uint32_t fFlags, PSUPR0IOCTLCTX *ppCtx)
{
//获取VID_PARTITION_HANDLE对应的R0的handle
PFILE_OBJECT pFileObject = NULL;
OBJECT_HANDLE_INFORMATION HandleInfo = { 0, 0 };
NTSTATUS rcNt = ObReferenceObjectByHandle((HANDLE)hHandle, /*FILE_WRITE_DATA*/0, *IoFileObjectType,
UserMode, (void **)&pFileObject, &HandleInfo);
//从fileobject获取 deviceobject
PDEVICE_OBJECT pDevObject = IoGetRelatedDeviceObject(pFileObject);
//申请一块内存保存这两个handle
PSUPR0IOCTLCTX pCtx = (PSUPR0IOCTLCTX)RTMemAllocZ(sizeof(*pCtx));
if (pCtx)
{
pCtx->u32Magic = SUPR0IOCTLCTX_MAGIC;
pCtx->cRefs = 1;
pCtx->pFileObject = pFileObject;
pCtx->pDeviceObject = pDevObject;
//从devicesobject里获取driverobject,然后获取pfnFastIoDeviceControl的函数地址
//这样使用pfnFastIoDeviceControl就可以直接调用VID.sys里的IOCTL,或者直接调用IoCallDriver。
PDRIVER_OBJECT pDrvObject = pDevObject->DriverObject;
if ( RT_VALID_PTR(pDrvObject->FastIoDispatch)
&& RT_VALID_PTR(pDrvObject->FastIoDispatch->FastIoDeviceControl))
pCtx->pfnFastIoDeviceControl = pDrvObject->FastIoDispatch->FastIoDeviceControl;
else
pCtx->pfnFastIoDeviceControl = NULL;
*ppCtx = pCtx;
return VINF_SUCCESS;
}
ObDereferenceObject(pFileObject);
}
调用IOCTL
//nemHCWinRunGC里调用IOCTL进入GuestOS代码
NEM_TMPL_STATIC VBOXSTRICTRC nemHCWinRunGC(PVMCC pVM, PVMCPUCC pVCpu)
{
pVCpu->nem.s.uIoCtlBuf.idCpu = pVCpu->idCpu;
NTSTATUS rcNt = nemR0NtPerformIoControl(pVM, pVCpu, pVM->nemr0.s.IoCtlStartVirtualProcessor.uFunction,
&pVCpu->nem.s.uIoCtlBuf.idCpu, sizeof(pVCpu->nem.s.uIoCtlBuf.idCpu),
NULL, 0);
}
//nemR0NtPerformIoControl->SUPR0IoCtlPerform
SUPR0DECL(int) SUPR0IoCtlPerform(PSUPR0IOCTLCTX pCtx, uintptr_t uFunction,
void *pvInput, RTR3PTR pvInputUser, size_t cbInput,
void *pvOutput, RTR3PTR pvOutputUser, size_t cbOutput,
int32_t *piNativeRc)
{
IO_STATUS_BLOCK Ios = RTNT_IO_STATUS_BLOCK_INITIALIZER;
if (pCtx->pfnFastIoDeviceControl)
{
//先尝试使用FastIoDeviceControl调用IOCTL
BOOLEAN fHandled = pCtx->pfnFastIoDeviceControl(pCtx->pFileObject,
TRUE /*Wait*/,
(void *)pvInputUser, (ULONG)cbInput,
(void *)pvOutputUser, (ULONG)cbOutput,
uFunction,
&Ios,
pCtx->pDeviceObject);
if (fHandled)
{
//获取返回值
if (piNativeRc)
{
*piNativeRc = Ios.Status;
return VINF_SUCCESS;
}
if (NT_SUCCESS(Ios.Status))
return VINF_SUCCESS;
return RTErrConvertFromNtStatus(Ios.Status);
}
}
//FastIoDeviceControl调用失败,尝试使用IOCallDriver
//获取input/output参数
switch (uFunction & 3)
{
case METHOD_BUFFERED:
break;
case METHOD_IN_DIRECT:
pvInput = (void *)pvInputUser;
break;
case METHOD_NEITHER:
pvInput = (void *)pvInputUser;
RT_FALL_THRU();
case METHOD_OUT_DIRECT:
pvOutput = (void *)pvOutputUser;
break;
}
//构造一个IoCallDriver request,调用IoCallDriver完成IOCTL
int rc;
KEVENT Event;
KeInitializeEvent(&Event, NotificationEvent, FALSE);
PIRP pIrp = IoBuildDeviceIoControlRequest(uFunction, pCtx->pDeviceObject,
pvInput, (ULONG)cbInput, pvOutput, (ULONG)cbOutput,
FALSE /* InternalDeviceControl */, &Event, &Ios);
if (pIrp)
{
IoGetNextIrpStackLocation(pIrp)->FileObject = pCtx->pFileObject;
//call IoCallDriver
NTSTATUS rcNt = IoCallDriver(pCtx->pDeviceObject, pIrp);
if (rcNt == STATUS_PENDING)
{
//等待IoCallDriver完成
rcNt = KeWaitForSingleObject(&Event, /* Object */
Executive, /* WaitReason */
KernelMode, /* WaitMode */
FALSE, /* Alertable */
NULL); /* TimeOut */
rcNt = Ios.Status;
}
//处理返回值
else if (NT_SUCCESS(rcNt) && Ios.Status != STATUS_SUCCESS)
rcNt = Ios.Status;
if (piNativeRc)
{
*piNativeRc = rcNt;
rc = VINF_SUCCESS;
}
else if (NT_SUCCESS(rcNt))
rc = VINF_SUCCESS;
else
rc = RTErrConvertFromNtStatus(rcNt);
}
else
{
if (piNativeRc)
*piNativeRc = STATUS_NO_MEMORY;
rc = VERR_NO_MEMORY;
}
}
20.4 和VMM通信的API:HvlInvokeHypercall
root partition和hypervisor的交互,主要通过HvlInvokeHypercall这个API。
NEM实现里主要调用这个API用于内存管理和读写寄存器,但其实完全可以通过只调用这个API完成整个虚拟机的调用。我的理解是NEM为了避免多次R3/R0切换,只把部分必要的API调用(循环执行GuestOS代码的部分)和map内存等操作放到R0完成,其他调用不多的API,还是调用原始的winhvplatform.dll里的API。
//函数定义,第一个是Hypercall ID,第二个是输入参数,第三个是输出参数
static uint64_t (*g_pfnHvlInvokeHypercall)(uint64_t uCallInfo, uint64_t HCPhysInput, uint64_t HCPhysOutput);
//Hypercall ID 定义在 include\nt\hyperv.h文件里,这些ID也是非公开的。
typedef enum
{
HvCallReserved0000 = 0,
HvCallSwitchVirtualAddressSpace,
HvCallFlushVirtualAddressSpace,
HvCallFlushVirtualAddressList,
HvCallGetLogicalProcessorRunTime,
/* 5, 6 & 7 are deprecated / reserved. */
HvCallNotifyLongSpinWait = 8,
HvCallParkLogicalProcessors, /**< @since v2 */
HvCallInvokeHypervisorDebugger, /**< @since v2 - not mentioned in TLFS v5.0b */
HvCallSendSyntheticClusterIpi, /**< @since v? */
...
}
//启动的时候,从NT内核里获取这个函数地址
VMMR0_INT_DECL(int) NEMR0InitVM(PGVM pGVM)
{
...
rc = RTR0DbgKrnlInfoQuerySymbol(hKrnlInfo, NULL, "HvlInvokeHypercall", (void **)&g_pfnHvlInvokeHypercall);
...
}
//调用Hypercall的例子,下面的代码是调用hypercall读取一个GPA里的数据
static int nemR3WinDummyReadGpa(PGVM pGVM, PGVMCPU pGVCpu, RTGCPHYS GCPhys)
{
uint64_t volatile uResult = g_pfnHvlInvokeHypercall(HvCallReadGpa, pGVCpu->nemr0.s.HCPhysHypercallData,
pGVCpu->nemr0.s.HCPhysHypercallData + sizeof(*pIn));
}
20.5 创建一个VM的例子:
使用WinHvPlatform里的公开APIs创建一个VM的简单例子
int createVM(void)
{
//load hv的dll
HMODULE hmod = LoadLibraryW(L"WinHvPlatform.dll");
//创建一个虚拟机(这个API只是创建handle)
WHV_PARTITION_HANDLE hPartition;
HRESULT hrc = WHvCreatePartition(&hPartition);
//设置VM只有单核
WHV_PARTITION_PROPERTY Property;
memset(&Property, 0, sizeof(Property));
Property.ProcessorCount = 1;
hrc = WHvSetPartitionProperty(hPartition, WHvPartitionPropertyCodeProcessorCount, &Property, sizeof(Property));
//设置CPUID和MSR都会产生VMExit
memset(&Property, 0, sizeof(Property));
Property.ExtendedVmExits.X64CpuidExit = 1;
Property.ExtendedVmExits.X64MsrExit = 1;
hrc = WHvSetPartitionProperty(hPartition, WHvPartitionPropertyCodeExtendedVmExits, &Property, sizeof(Property));
//设置好参数之后,调用WHvSetupPartition真正创建一个虚拟机
hrc = WHvSetupPartition(hPartition);
//创建一个VCPU
WHvCreateVirtualProcessor(hPartition, 0 /*idVCpu*/, 0 /*fFlags*/);
//申请一块内存给虚拟机用
g_pbMem = (unsigned char *)VirtualAlloc(NULL, g_cbMem, MEM_COMMIT, PAGE_READWRITE);
//map这块虚拟地址给指定的虚拟机物理地址,属性是可以可写可执行
WHvMapGpaRange(hPartition, g_pbMem, MY_MEM_BASE /*GCPhys*/, g_cbMem,
WHvMapGpaRangeFlagRead | WHvMapGpaRangeFlagWrite | WHvMapGpaRangeFlagExecute);
//进入GuestOS (进入虚拟机)
WHV_RUN_VP_EXIT_CONTEXT ExitInfo;
memset(&ExitInfo, 0, sizeof(ExitInfo));
WHvRunVirtualProcessor(g_hPartition, 0 /*idCpu*/, &ExitInfo, sizeof(ExitInfo));
}
参考资料
https://android.googlesource.com/platform/prebuilts/gcc/linux-x86/host/x86_64-w64-mingw32-4.8/+/refs/heads/emu-2.8-release/x86_64-w64-mingw32/include/WinHvPlatformDefs.h
https://github.com/Wenzel/vid-sys/blob/master/src/hyperv/vid.h