KVM之Hypercall机制

这里只关注x86架构。

按内核文档中,x86架构下,KVM Hypercall是一个3字节的指令,vmcall指令或者vmmcall指令。

通过寄存器rbx、rcx、rdx、rsi,最多传输四个参数。然后hypercall的调用号存放于rax,并且调用返回值也存放于rax中,不涉及其他寄存器。

原理分析
VM调用接口

arch/x86/include/asm/kvm_para.h中定义了hypercall接口:

#define KVM_HYPERCALL \
        ALTERNATIVE("vmcall", "vmmcall", X86_FEATURE_VMMCALL)

static inline long kvm_hypercall0(unsigned int nr)
    {
    long ret;
    asm volatile(KVM_HYPERCALL
             : "=a"(ret)
             : "a"(nr)
             : "memory");
    return ret;
}

static inline long kvm_hypercall1(unsigned int nr, unsigned long p1)
{
    long ret;
    asm volatile(KVM_HYPERCALL
             : "=a"(ret)
             : "a"(nr), "b"(p1)
             : "memory");
    return ret;
}

static inline long kvm_hypercall2(unsigned int nr, unsigned long p1,
                  unsigned long p2)
{
    long ret;
    asm volatile(KVM_HYPERCALL
             : "=a"(ret)
             : "a"(nr), "b"(p1), "c"(p2)
             : "memory");
    return ret;
}

static inline long kvm_hypercall3(unsigned int nr, unsigned long p1,
                  unsigned long p2, unsigned long p3)
{
    long ret;
    asm volatile(KVM_HYPERCALL
             : "=a"(ret)
             : "a"(nr), "b"(p1), "c"(p2), "d"(p3)
             : "memory");
    return ret;
}

static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
                  unsigned long p2, unsigned long p3,
                  unsigned long p4)
{
    long ret;
    asm volatile(KVM_HYPERCALL
             : "=a"(ret)
             : "a"(nr), "b"(p1), "c"(p2), "d"(p3), "S"(p4)
             : "memory");
    return ret;
}

static inline long kvm_sev_hypercall3(unsigned int nr, unsigned long p1,
                      unsigned long p2, unsigned long p3)
{
    long ret;

    asm volatile("vmmcall"
             : "=a"(ret)
             : "a"(nr), "b"(p1), "c"(p2), "d"(p3)
             : "memory");
    return ret;
}

主要不同是参数的不同。

KVM_HYPERCALL是一个3字节的指令序列,x86架构下即是vmcall指令。vmcall指令会导致VM exit到VMM。

以其中kvm_hypercall4()函数为例:

"=a"(ret):表示返回值存放在rax寄存器中;

"a"(nr), "b"(p1), "c"(p2), "d"(p3), "S"(p4):表示调用号nr存放于rax寄存器,p1参数存放于rbxp2存放于rcxp3存放于rdxp4存放于rsi

VMM处理接口

arch/x86/kvm/vmx/vmx.c中定义了vm_exit的handle处理:

/*
 * The exit handlers return 1 if the exit was handled fully and guest execution
 * may resume.  Otherwise they set the kvm_run parameter to indicate what needs
 * to be done to userspace and return 0.
 */
static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
	...
    [EXIT_REASON_VMCALL]                  = handle_vmcall,
	...
}

static int handle_vmcall(struct kvm_vcpu *vcpu)
{
    return kvm_emulate_hypercall(vcpu);
}

具体处理hypercall的函数kvm_emulate_hypercall()则在arch/x86/kvm/x86.c中定义:

int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
{
    unsigned long nr, a0, a1, a2, a3, ret;
    int op_64_bit;

    if (kvm_hv_hypercall_enabled(vcpu->kvm))
        return kvm_hv_hypercall(vcpu);

    nr = kvm_rax_read(vcpu);		/*从寄存器中取参数*/
    a0 = kvm_rbx_read(vcpu);
    a1 = kvm_rcx_read(vcpu);
    a2 = kvm_rdx_read(vcpu);
    a3 = kvm_rsi_read(vcpu);

    trace_kvm_hypercall(nr, a0, a1, a2, a3);

    op_64_bit = is_64_bit_mode(vcpu);
    if (!op_64_bit) {
        nr &= 0xFFFFFFFF;
        a0 &= 0xFFFFFFFF;
        a1 &= 0xFFFFFFFF;
        a2 &= 0xFFFFFFFF;
        a3 &= 0xFFFFFFFF;
    }

    if (kvm_x86_ops.get_cpl(vcpu) != 0) {
        ret = -KVM_EPERM;
        goto out;
    }
    
    ret = -KVM_ENOSYS;

    switch (nr) {			/*根据调用号nr进行分类处理*/
    case KVM_HC_VAPIC_POLL_IRQ:
        ret = 0;
        break;
    case KVM_HC_KICK_CPU:
        if (!guest_pv_has(vcpu, KVM_FEATURE_PV_UNHALT))
            break;

        kvm_pv_kick_cpu_op(vcpu->kvm, a0, a1);
        kvm_sched_yield(vcpu->kvm, a1);
        ret = 0;
        break;
#ifdef CONFIG_X86_64
    case KVM_HC_CLOCK_PAIRING:
        ret = kvm_pv_clock_pairing(vcpu, a0, a1);
        break;
#endif
    case KVM_HC_SEND_IPI:
        if (!guest_pv_has(vcpu, KVM_FEATURE_PV_SEND_IPI))
            break;

        ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2, a3, op_64_bit);
        break;
    case KVM_HC_SCHED_YIELD:
        if (!guest_pv_has(vcpu, KVM_FEATURE_PV_SCHED_YIELD))
            break;

        kvm_sched_yield(vcpu->kvm, a0);
        ret = 0;
        break;
    default:
        ret = -KVM_ENOSYS;
        break;
    }
out:
    if (!op_64_bit)
        ret = (u32)ret;
    kvm_rax_write(vcpu, ret);		/*将结果写回rax寄存器*/

    ++vcpu->stat.hypercalls;
    return kvm_skip_emulated_instruction(vcpu);
}
EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);

其中,调用号nr有以下几类:

  • KVM_HC_VAPIC_POLL_IRQ

    触发VM客户机退出,以便主机host再重新进入时检查挂起的中断。

  • KVM_HC_KICK_CPU

    将vcpu从HLT状态唤醒。

    使用举例:

    客户机中某个vcpu正由于登台某个资源(比如spinlock),一旦忙于等待超过时间阈值,则可以执行HLT指令。执行了HLT指令,VMM会将该vcpu睡眠继续等待。然后该VM客户机的另一个vcpu可以通过KVM_HC_KICK_CPU hypercall来唤醒指定APIC ID(a1参数)的vcpu,附加参数a0供以后使用。

  • KVM_HC_CLOCK_PAIRING

    同步VMM与VM的时钟。

    a0:主机拷贝的struct kvm_clock_offset结构体在VM中的物理地址

    a1: clock_type, ATM 只支持 KVM_CLOCK_PAIRING_WALLCLOCK (0) , (对应于主机host的 CLOCK_REALTIME 时钟)

  • KVM_HC_SEND_IPI

    发送核间中断至多个vCPUs。返回成功传送IPI的vCPU数量。

    hypercall允许客户机发送多播IPI,64位下最多128个目的地址,32位下最多64个目的地址。

  • KVM_HC_SCHED_YIELD

    用于yield如果IPI目标vcpu中有被preempted的。

    当正在发送多播IPI目标时,如果目标中有vCPU被抢占了,则yield让出。

VM调用hypercall实例

在 Linux-5.10.59 的内核代码中,搜索kvm_hypercall*的函数调用地方:

[root@localhost linux-5.10.59]# grep --include="*.c" -nr kvm_hypercall* .
./arch/arm64/kvm/pvtime.c:35:long kvm_hypercall_pv_features(struct kvm_vcpu *vcpu)
./arch/arm64/kvm/hypercalls.c:67:               val = kvm_hypercall_pv_features(vcpu);
./arch/x86/kvm/x86.c:8156:      trace_kvm_hypercall(nr, a0, a1, a2, a3);
./arch/x86/kernel/kvm.c:538:                    ret = kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap,
./arch/x86/kernel/kvm.c:549:            ret = kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap,
./arch/x86/kernel/kvm.c:594:                    kvm_hypercall1(KVM_HC_SCHED_YIELD, per_cpu(x86_cpu_to_apicid, cpu));
./arch/x86/kernel/kvm.c:872:    kvm_hypercall2(KVM_HC_KICK_CPU, flags, apicid);
./tools/testing/selftests/kvm/lib/x86_64/processor.c:1249:uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
./tools/testing/selftests/kvm/x86_64/kvm_pv_test.c:126: r = kvm_hypercall(hc->nr, 0, 0, 0, 0);
./drivers/ptp/ptp_kvm.c:58:             ret = kvm_hypercall2(KVM_HC_CLOCK_PAIRING,
./drivers/ptp/ptp_kvm.c:119:    ret = kvm_hypercall2(KVM_HC_CLOCK_PAIRING,
./drivers/ptp/ptp_kvm.c:180:    ret = kvm_hypercall2(KVM_HC_CLOCK_PAIRING, clock_pair_gpa,

以其中一处为例,arch/x86/kernel/kvm.c 中唤醒一个vcpu:

/* Kick a cpu by its apicid. Used to wake up a halted vcpu */
static void kvm_kick_cpu(int cpu)
{
    int apicid;
    unsigned long flags = 0;

    apicid = per_cpu(x86_cpu_to_apicid, cpu);
    kvm_hypercall2(KVM_HC_KICK_CPU, flags, apicid);
}
  • 1
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值