struct kvm {
struct mutex lock; /* protects the vcpus array and APIC accesses */
spinlock_t mmu_lock;
struct rw_semaphore slots_lock;
struct mm_struct *mm; /* userspace tied to this vm */
int nmemslots;
struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS +
KVM_PRIVATE_MEM_SLOTS];
struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
struct list_head vm_list;
struct file *filp;
struct kvm_io_bus mmio_bus;
struct kvm_io_bus pio_bus;
struct kvm_vm_stat stat;
struct kvm_arch arch;
};
struct kvm_vcpu {
struct kvm *kvm;
struct preempt_notifier preempt_notifier;
int vcpu_id;
struct mutex mutex;
int cpu;
struct kvm_run *run;
int guest_mode;
unsigned long requests;
struct kvm_guest_debug guest_debug;
int fpu_active;
int guest_fpu_loaded;
wait_queue_head_t wq;
int sigset_active;
sigset_t sigset;
struct kvm_vcpu_stat stat;
#ifdef CONFIG_HAS_IOMEM
int mmio_needed;
int mmio_read_completed;
int mmio_is_write;
int mmio_size;
unsigned char mmio_data[8];
gpa_t mmio_phys_addr;
#endif
struct kvm_vcpu_arch arch;
};
struct kvm_vcpu_arch {
u64 host_tsc;
int interrupt_window_open;
unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */
DECLARE_BITMAP(irq_pending, KVM_NR_INTERRUPTS);
unsigned long regs[NR_VCPU_REGS]; /* for rsp: vcpu_load_rsp_rip() */
unsigned long rip; /* needs vcpu_load_rsp_rip() */
unsigned long cr0;
unsigned long cr2;
unsigned long cr3;
unsigned long cr4;
unsigned long cr8;
u64 pdptrs[4]; /* pae */
u64 shadow_efer;
u64 apic_base;
struct kvm_lapic *apic; /* kernel irqchip context */
#define VCPU_MP_STATE_RUNNABLE 0
#define VCPU_MP_STATE_UNINITIALIZED 1
#define VCPU_MP_STATE_INIT_RECEIVED 2
#define VCPU_MP_STATE_SIPI_RECEIVED 3
#define VCPU_MP_STATE_HALTED 4
int mp_state;
int sipi_vector;
u64 ia32_misc_enable_msr;
bool tpr_access_reporting;
struct kvm_mmu mmu;
struct kvm_mmu_memory_cache mmu_pte_chain_cache;
struct kvm_mmu_memory_cache mmu_rmap_desc_cache;
struct kvm_mmu_memory_cache mmu_page_cache;
struct kvm_mmu_memory_cache mmu_page_header_cache;
gfn_t last_pt_write_gfn;
int last_pt_write_count;
u64 *last_pte_updated;
struct {
gfn_t gfn; /* presumed gfn during guest pte update */
struct page *page; /* page corresponding to that gfn */
} update_pte;
struct i387_fxsave_struct host_fx_image;
struct i387_fxsave_struct guest_fx_image;
gva_t mmio_fault_cr2;
struct kvm_pio_request pio;
void *pio_data;
struct kvm_queued_exception {
bool pending;
bool has_error_code;
u8 nr;
u32 error_code;
} exception;
struct {
int active;
u8 save_iopl;
struct kvm_save_segment {
u16 selector;
unsigned long base;
u32 limit;
u32 ar;
} tr, es, ds, fs, gs;
} rmode;
int halt_request; /* real mode on Intel only */
int cpuid_nent;
struct kvm_cpuid_entry2 cpuid_entries[KVM_MAX_CPUID_ENTRIES];
/* emulate context */
struct x86_emulate_ctxt emulate_ctxt;
};
1.创建虚拟机
vm_fd = ioctl(kvm_fd, KVM_CREATE_VM, 0);
static int kvm_dev_ioctl_create_vm(void)
{
int fd, r;
struct inode *inode;
struct file *file;
struct kvm *kvm;
kvm = kvm_create_vm();
if (IS_ERR(kvm))
return PTR_ERR(kvm);
r = anon_inode_getfd(&fd, &inode, &file, "kvm-vm", &kvm_vm_fops, kvm);
if (r) {
kvm_destroy_vm(kvm);
return r;
}
kvm->filp = file;
return fd;
}
这个kvm_vm_fops有个mmap的地址,估计和用户态的fd有关。
static struct kvm *kvm_create_vm(void)
{
struct kvm *kvm = kvm_arch_create_vm();
if (IS_ERR(kvm))
goto out;
kvm->mm = current->mm;
atomic_inc(&kvm->mm->mm_count);
spin_lock_init(&kvm->mmu_lock);
kvm_io_bus_init(&kvm->pio_bus);
mutex_init(&kvm->lock);
kvm_io_bus_init(&kvm->mmio_bus);
init_rwsem(&kvm->slots_lock);
spin_lock(&kvm_lock);
list_add(&kvm->vm_list, &vm_list);
spin_unlock(&kvm_lock);
out:
return kvm;
}
create下面有一些list,估计和vm的组织有关,暂先不关。
struct kvm *kvm_arch_create_vm(void)
{
struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
if (!kvm)
return ERR_PTR(-ENOMEM);
INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
return kvm;
}
这段代码更简单,就是个list的初始化。
创建vm的过程围绕struct kvm *kvm = kvm_create_vm();
2.设置虚拟机内存
struct kvm_userspace_memory_region region={
.slot = 0,
.guest_phys_addr = 0,
.memory_size = ram_size,
.userspace_addr = (u64)ram_start
};
ioctl(vm_fd, KVM_SET_USER_MEMORY_REGION, ®ion);
int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
struct
kvm_userspace_memory_region *mem,
int user_alloc)
{
if (mem->slot >= KVM_MEMORY_SLOTS)
return -EINVAL;
return kvm_set_memory_region(kvm, mem, user_alloc);
}
3.新建虚拟CPU
vcpu->vcpu_fd = ioctl(kvm->vm_fd, KVM_CREATE_VCPU, i);
/*
* Creates some virtual cpus. Good luck creating more than one.
*/
static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n)
{
int r;
struct kvm_vcpu *vcpu;
if (!valid_vcpu(n))
return -EINVAL;
vcpu = kvm_arch_vcpu_create(kvm, n);
if (IS_ERR(vcpu))
return PTR_ERR(vcpu);
preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops);
r = kvm_arch_vcpu_setup(vcpu);
if (r)
goto vcpu_destroy;
mutex_lock(&kvm->lock);
if (kvm->vcpus[n]) {
r = -EEXIST;
mutex_unlock(&kvm->lock);
goto vcpu_destroy;
}
kvm->vcpus[n] = vcpu;
mutex_unlock(&kvm->lock);
/* Now it's all set up, let userspace reach it */
r = create_vcpu_fd(vcpu);
if (r < 0)
goto unlink;
return r;
unlink:
mutex_lock(&kvm->lock);
kvm->vcpus[n] = NULL;
mutex_unlock(&kvm->lock);
vcpu_destroy:
kvm_arch_vcpu_destroy(vcpu);
return r;
}
创建虚拟cpu就是填充kvm_vcpu结构体的过程。过程如下vmx_create_vcpu:
这里有个vmx用来保存cpu的各种状态,恢复cpu的运行。但最后只返回了vcpu。
static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
{
int err;
struct vcpu_vmx *vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
int cpu;
if (!vmx)
return ERR_PTR(-ENOMEM);
err = kvm_vcpu_init(&vmx->vcpu, kvm, id);
if (err)
goto free_vcpu;
vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
if (!vmx->guest_msrs) {
err = -ENOMEM;
goto uninit_vcpu;
}
vmx->host_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
if (!vmx->host_msrs)
goto free_guest_msrs;
vmx->vmcs = alloc_vmcs();
if (!vmx->vmcs)
goto free_msrs;
vmcs_clear(vmx->vmcs);
cpu = get_cpu();
vmx_vcpu_load(&vmx->vcpu, cpu);
err = vmx_vcpu_setup(vmx);
vmx_vcpu_put(&vmx->vcpu);
put_cpu();
if (err)
goto free_vmcs;
if (vm_need_virtualize_apic_accesses(kvm))
if (alloc_apic_access_page(kvm) != 0)
goto free_vmcs;
return &vmx->vcpu;
free_vmcs:
free_vmcs(vmx->vmcs);
free_msrs:
kfree(vmx->host_msrs);
free_guest_msrs:
kfree(vmx->guest_msrs);
uninit_vcpu:
kvm_vcpu_uninit(&vmx->vcpu);
free_vcpu:
kmem_cache_free(kvm_vcpu_cache, vmx);
return ERR_PTR(err);
}
struct vcpu_vmx {
struct kvm_vcpu vcpu;
int launched;
u8 fail;
u32 idt_vectoring_info;
struct kvm_msr_entry *guest_msrs;
struct kvm_msr_entry *host_msrs;
int nmsrs;
int save_nmsrs;
int msr_offset_efer;
#ifdef CONFIG_X86_64
int msr_offset_kernel_gs_base;
#endif
struct vmcs *vmcs;
struct {
int loaded;
u16 fs_sel, gs_sel, ldt_sel;
int gs_ldt_reload_needed;
int fs_reload_needed;
int guest_efer_loaded;
} host_state;
struct {
struct {
bool pending;
u8 vector;
unsigned rip;
} irq;
} rmode;
};
接下来这个kvm_vcpu_init 把kvm_vpcu的run成员复制为页的地址。
int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
{
struct page *page;
int r;
mutex_init(&vcpu->mutex);
vcpu->cpu = -1;
vcpu->kvm = kvm;
vcpu->vcpu_id = id;
init_waitqueue_head(&vcpu->wq);
page = alloc_page(GFP_KERNEL | __GFP_ZERO);
vcpu->run = page_address(page);
r = kvm_arch_vcpu_init(vcpu);
return 0;
}
EXPORT_SYMBOL_GPL(kvm_vcpu_init);
这里还是对struct kvm_vcpu 结构的arch成员进行赋值。
int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
{
struct page *page;
struct kvm *kvm;
int r;
kvm = vcpu->kvm;
vcpu->arch.mmu.root_hpa = INVALID_PAGE;
if (!irqchip_in_kernel(kvm) || vcpu->vcpu_id == 0)
vcpu->arch.mp_state = VCPU_MP_STATE_RUNNABLE;
else
vcpu->arch.mp_state = VCPU_MP_STATE_UNINITIALIZED;
page = alloc_page(GFP_KERNEL | __GFP_ZERO);
vcpu->arch.pio_data = page_address(page);
r = kvm_mmu_create(vcpu);
if (irqchip_in_kernel(kvm)) {
r = kvm_create_lapic(vcpu);
if (r < 0)
goto fail_mmu_destroy;
}
return 0;
}
再看下kvm_arch_vcpu_setup
int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
{
int r;
/* We do fxsave: this must be aligned. */
BUG_ON((unsigned long)&vcpu->arch.host_fx_image & 0xF);
vcpu_load(vcpu);
r = kvm_arch_vcpu_reset(vcpu);
if (r == 0)
r = kvm_mmu_setup(vcpu);
vcpu_put(vcpu);
if (r < 0)
goto free_vcpu;
return 0;
free_vcpu:
kvm_x86_ops->vcpu_free(vcpu);
return r;
}
vmx和kvm_vcpu可以相互转化
static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
u64 msr;
int ret;
if (!init_rmode_tss(vmx->vcpu.kvm)) {
ret = -ENOMEM;
goto out;
}
vmx->vcpu.arch.rmode.active = 0;
vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
set_cr8(&vmx->vcpu, 0);
msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
if (vmx->vcpu.vcpu_id == 0)
msr |= MSR_IA32_APICBASE_BSP;
kvm_set_apic_base(&vmx->vcpu, msr);
fx_init(&vmx->vcpu);
/*
* GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode
* insists on having GUEST_CS_BASE == GUEST_CS_SELECTOR << 4. Sigh.
*/
if (vmx->vcpu.vcpu_id == 0) {
vmcs_write16(GUEST_CS_SELECTOR, 0xf000);
vmcs_writel(GUEST_CS_BASE, 0x000f0000);
} else {
vmcs_write16(GUEST_CS_SELECTOR, vmx->vcpu.arch.sipi_vector << 8);
vmcs_writel(GUEST_CS_BASE, vmx->vcpu.arch.sipi_vector << 12);
}
vmcs_write32(GUEST_CS_LIMIT, 0xffff);
vmcs_write32(GUEST_CS_AR_BYTES, 0x9b);
seg_setup(VCPU_SREG_DS);
seg_setup(VCPU_SREG_ES);
seg_setup(VCPU_SREG_FS);
seg_setup(VCPU_SREG_GS);
seg_setup(VCPU_SREG_SS);
vmcs_write16(GUEST_TR_SELECTOR, 0);
vmcs_writel(GUEST_TR_BASE, 0);
vmcs_write32(GUEST_TR_LIMIT, 0xffff);
vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
vmcs_write16(GUEST_LDTR_SELECTOR, 0);
vmcs_writel(GUEST_LDTR_BASE, 0);
vmcs_write32(GUEST_LDTR_LIMIT, 0xffff);
vmcs_write32(GUEST_LDTR_AR_BYTES, 0x00082);
vmcs_write32(GUEST_SYSENTER_CS, 0);
vmcs_writel(GUEST_SYSENTER_ESP, 0);
vmcs_writel(GUEST_SYSENTER_EIP, 0);
vmcs_writel(GUEST_RFLAGS, 0x02);
if (vmx->vcpu.vcpu_id == 0)
vmcs_writel(GUEST_RIP, 0xfff0);
else
vmcs_writel(GUEST_RIP, 0);
vmcs_writel(GUEST_RSP, 0);
/* todo: dr0 = dr1 = dr2 = dr3 = 0; dr6 = 0xffff0ff0 */
vmcs_writel(GUEST_DR7, 0x400);
vmcs_writel(GUEST_GDTR_BASE, 0);
vmcs_write32(GUEST_GDTR_LIMIT, 0xffff);
vmcs_writel(GUEST_IDTR_BASE, 0);
vmcs_write32(GUEST_IDTR_LIMIT, 0xffff);
vmcs_write32(GUEST_ACTIVITY_STATE, 0);
vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0);
vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0);
guest_write_tsc(0);
/* Special registers */
vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
setup_msrs(vmx);
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); /* 22.2.1 */
if (cpu_has_vmx_tpr_shadow()) {
vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0);
if (vm_need_tpr_shadow(vmx->vcpu.kvm))
vmcs_write64(VIRTUAL_APIC_PAGE_ADDR,
page_to_phys(vmx->vcpu.arch.apic->regs_page));
vmcs_write32(TPR_THRESHOLD, 0);
}
if (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm))
vmcs_write64(APIC_ACCESS_ADDR,
page_to_phys(vmx->vcpu.kvm->arch.apic_access_page));
vmx->vcpu.arch.cr0 = 0x60000010;
vmx_set_cr0(&vmx->vcpu, vmx->vcpu.arch.cr0); /* enter rmode */
vmx_set_cr4(&vmx->vcpu, 0);
#ifdef CONFIG_X86_64
vmx_set_efer(&vmx->vcpu, 0);
#endif
vmx_fpu_activate(&vmx->vcpu);
update_exception_bitmap(&vmx->vcpu);
return 0;
out:
return ret;
}
static int init_kvm_mmu(struct kvm_vcpu *vcpu)
{
ASSERT(vcpu);
ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
if (!is_paging(vcpu))
return nonpaging_init_context(vcpu);
else if (is_long_mode(vcpu))
return paging64_init_context(vcpu);
else if (is_pae(vcpu))
return paging32E_init_context(vcpu);
else
return paging32_init_context(vcpu);
}
4.设置运行模式
static void vmx_set_segment(struct kvm_vcpu *vcpu,
struct kvm_segment *var, int seg)
{
struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
u32 ar;
if (vcpu->arch.rmode.active && seg == VCPU_SREG_TR) {
vcpu->arch.rmode.tr.selector = var->selector;
vcpu->arch.rmode.tr.base = var->base;
vcpu->arch.rmode.tr.limit = var->limit;
vcpu->arch.rmode.tr.ar = vmx_segment_access_rights(var);
return;
}
vmcs_writel(sf->base, var->base);
vmcs_write32(sf->limit, var->limit);
vmcs_write16(sf->selector, var->selector);
if (vcpu->arch.rmode.active && var->s) {
/*
* Hack real-mode segments into vm86 compatibility.
*/
if (var->base == 0xffff0000 && var->selector == 0xf000)
vmcs_writel(sf->base, 0xf0000);
ar = 0xf3;
} else
ar = vmx_segment_access_rights(var);
vmcs_write32(sf->ar_bytes, ar);
}
5.运行虚拟机
ioctl(vcpu->fd, KVM_RUN, 0) < 0)
static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
u32 intr_info;
/*
* Loading guest fpu may have cleared host cr0.ts
*/
vmcs_writel(HOST_CR0, read_cr0());
asm(
/* Store host registers */
#ifdef CONFIG_X86_64
"push %%rdx; push %%rbp;"
"push %%rcx \n\t"
#else
"push %%edx; push %%ebp;"
"push %%ecx \n\t"
#endif
ASM_VMX_VMWRITE_RSP_RDX "\n\t"
/* Check if vmlaunch of vmresume is needed */
"cmpl $0, %c[launched](%0) \n\t"
/* Load guest registers. Don't clobber flags. */
#ifdef CONFIG_X86_64
"mov %c[cr2](%0), %%rax \n\t" //rax = vmx->cr2;
"mov %%rax, %%cr2 \n\t"
"mov %c[rax](%0), %%rax \n\t"
"mov %c[rbx](%0), %%rbx \n\t"
"mov %c[rdx](%0), %%rdx \n\t"
"mov %c[rsi](%0), %%rsi \n\t"
"mov %c[rdi](%0), %%rdi \n\t"
"mov %c[rbp](%0), %%rbp \n\t"
"mov %c[r8](%0), %%r8 \n\t"
"mov %c[r9](%0), %%r9 \n\t"
"mov %c[r10](%0), %%r10 \n\t"
"mov %c[r11](%0), %%r11 \n\t"
"mov %c[r12](%0), %%r12 \n\t"
"mov %c[r13](%0), %%r13 \n\t"
"mov %c[r14](%0), %%r14 \n\t"
"mov %c[r15](%0), %%r15 \n\t"
"mov %c[rcx](%0), %%rcx \n\t" /* kills %0 (rcx) */
#else
"mov %c[cr2](%0), %%eax \n\t"
"mov %%eax, %%cr2 \n\t"
"mov %c[rax](%0), %%eax \n\t"
"mov %c[rbx](%0), %%ebx \n\t"
"mov %c[rdx](%0), %%edx \n\t"
"mov %c[rsi](%0), %%esi \n\t"
"mov %c[rdi](%0), %%edi \n\t"
"mov %c[rbp](%0), %%ebp \n\t"
"mov %c[rcx](%0), %%ecx \n\t" /* kills %0 (ecx) */
#endif
/* Enter guest mode */
"jne .Llaunched \n\t"
ASM_VMX_VMLAUNCH "\n\t"
"jmp .Lkvm_vmx_return \n\t"
".Llaunched: " ASM_VMX_VMRESUME "\n\t"
".Lkvm_vmx_return: "
/* Save guest registers, load host registers, keep flags */
#ifdef CONFIG_X86_64
"xchg %0, (%%rsp) \n\t"
"mov %%rax, %c[rax](%0) \n\t"
"mov %%rbx, %c[rbx](%0) \n\t"
"pushq (%%rsp); popq %c[rcx](%0) \n\t"
"mov %%rdx, %c[rdx](%0) \n\t"
"mov %%rsi, %c[rsi](%0) \n\t"
"mov %%rdi, %c[rdi](%0) \n\t"
"mov %%rbp, %c[rbp](%0) \n\t"
"mov %%r8, %c[r8](%0) \n\t"
"mov %%r9, %c[r9](%0) \n\t"
"mov %%r10, %c[r10](%0) \n\t"
"mov %%r11, %c[r11](%0) \n\t"
"mov %%r12, %c[r12](%0) \n\t"
"mov %%r13, %c[r13](%0) \n\t"
"mov %%r14, %c[r14](%0) \n\t"
"mov %%r15, %c[r15](%0) \n\t"
"mov %%cr2, %%rax \n\t"
"mov %%rax, %c[cr2](%0) \n\t"
"pop %%rbp; pop %%rbp; pop %%rdx \n\t"
#else
"xchg %0, (%%esp) \n\t"
"mov %%eax, %c[rax](%0) \n\t"
"mov %%ebx, %c[rbx](%0) \n\t"
"pushl (%%esp); popl %c[rcx](%0) \n\t"
"mov %%edx, %c[rdx](%0) \n\t"
"mov %%esi, %c[rsi](%0) \n\t"
"mov %%edi, %c[rdi](%0) \n\t"
"mov %%ebp, %c[rbp](%0) \n\t"
"mov %%cr2, %%eax \n\t"
"mov %%eax, %c[cr2](%0) \n\t"
"pop %%ebp; pop %%ebp; pop %%edx \n\t"
#endif
"setbe %c[fail](%0) \n\t"
: : "c"(vmx), "d"((unsigned long)HOST_RSP),
[launched]"i"(offsetof(struct vcpu_vmx, launched)),
[fail]"i"(offsetof(struct vcpu_vmx, fail)),
[rax]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RAX])),
[rbx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RBX])),
[rcx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RCX])),
[rdx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RDX])),
[rsi]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RSI])),
[rdi]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RDI])),
[rbp]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RBP])),
#ifdef CONFIG_X86_64
[r8]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R8])),
[r9]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R9])),
[r10]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R10])),
[r11]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R11])),
[r12]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R12])),
[r13]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R13])),
[r14]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R14])),
[r15]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R15])),
#endif
[cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2))
: "cc", "memory"
#ifdef CONFIG_X86_64
, "rbx", "rdi", "rsi"
, "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
#else
, "ebx", "edi", "rsi"
#endif
);
vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
if (vmx->rmode.irq.pending)
fixup_rmode_irq(vmx);
vcpu->arch.interrupt_window_open =
(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0;
asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS));
vmx->launched = 1;
intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
/* We need to handle NMIs before interrupts are enabled */
if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) /* nmi */
asm("int $2");
}
5.接管中断和其他异常
6.小结
最终VMM通过VMX 管理VMCS来实现虚拟机的执行管理。但我感觉核心还是在vmx_vcpu_run中,其他就是VMCS的状态初始化。我们预先要实现的状态要初始化到VMCS状态里,通过run函数把虚拟机跑起来。
一下是kvm中的一些关键结构,做了些简化: