上一节将了QEMU 会调用kvm_vcpu_fops 来run 虚拟的cpu,具体code 如下:
2320 static struct file_operations kvm_vcpu_fops = {
2321 .release = kvm_vcpu_release,
2322 .unlocked_ioctl = kvm_vcpu_ioctl,
2323 #ifdef CONFIG_KVM_COMPAT
2324 .compat_ioctl = kvm_vcpu_compat_ioctl,
2325 #endif
2326 .mmap = kvm_vcpu_mmap,
2327 .llseek = noop_llseek,
2328 };
调用kvm_vcpu_ioctl
static long kvm_vcpu_ioctl(struct file *filp,
2415 unsigned int ioctl, unsigned long arg)
2416 {
2417 struct kvm_vcpu *vcpu = filp->private_data;
2418 void __user *argp = (void __user *)arg;
2419 int r;
2420 struct kvm_fpu *fpu = NULL;
2421 struct kvm_sregs *kvm_sregs = NULL;
2422
2423 if (vcpu->kvm->mm != current->mm)
2424 return -EIO;
2425
2426 if (unlikely(_IOC_TYPE(ioctl) != KVMIO))
2427 return -EINVAL;
2428
2429 #if defined(CONFIG_S390) || defined(CONFIG_PPC) || defined(CONFIG_MIPS)
2430 /*
2431 * Special cases: vcpu ioctls that are asynchronous to vcpu execution,
2432 * so vcpu_load() would break it.
2433 */
2434 if (ioctl == KVM_S390_INTERRUPT || ioctl == KVM_S390_IRQ || ioctl == KVM_INTERRUPT)
2435 return kvm_arch_vcpu_ioctl(filp, ioctl, arg);
2436 #endif
2437
2438
2439 r = vcpu_load(vcpu);
2440 if (r)
2441 return r;
2442 switch (ioctl) {
2443 case KVM_RUN:
2444 r = -EINVAL;
2445 if (arg)
2446 goto out;
2447 if (unlikely(vcpu->pid != current->pids[PIDTYPE_PID].pid)) {
2448 /* The thread running this VCPU changed. */
2449 struct pid *oldpid = vcpu->pid;
2450 struct pid *newpid = get_task_pid(current, PIDTYPE_PID);
2451
2452 rcu_assign_pointer(vcpu->pid, newpid);
2453 if (oldpid)
2454 synchronize_rcu();
2455 put_pid(oldpid);
2456 }
2457 r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run);
2458 trace_kvm_userspace_exit(vcpu->run->exit_reason, r);
2459 break;
2439行将寄存器装载到cpu中走2443的case ,最终调用kvm_arch_vcpu_ioctl_run 来运行虚拟cpu
549 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
550 {
551 int ret;
552 sigset_t sigsaved;
553
554 if (unlikely(!kvm_vcpu_initialized(vcpu)))
555 return -ENOEXEC;
556
557 ret = kvm_vcpu_first_run_init(vcpu);
558 if (ret)
559 return ret;
560
561 if (run->exit_reason == KVM_EXIT_MMIO) {
562 ret = kvm_handle_mmio_return(vcpu, vcpu->run);
563 if (ret)
564 return ret;
565 }
566
567 if (vcpu->sigset_active)
568 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
569
570 ret = 1;
571 run->exit_reason = KVM_EXIT_UNKNOWN;
572 while (ret > 0) {
573 /*
574 * Check conditions before entering the guest
575 */
576 cond_resched();
577
578 update_vttbr(vcpu->kvm);
579
580 if (vcpu->arch.power_off || vcpu->arch.pause)
581 vcpu_sleep(vcpu);
582
583 /*
584 * Preparing the interrupts to be injected also
585 * involves poking the GIC, which must be done in a
586 * non-preemptible context.
587 */
588 preempt_disable();
589 kvm_pmu_flush_hwstate(vcpu);
590 kvm_timer_flush_hwstate(vcpu);
591 kvm_vgic_flush_hwstate(vcpu);
592
593 local_irq_disable();
594
595 /*
596 * Re-check atomic conditions
597 */
598 if (signal_pending(current)) {
599 ret = -EINTR;
600 run->exit_reason = KVM_EXIT_INTR;
601 }
602
603 if (ret <= 0 || need_new_vmid_gen(vcpu->kvm) ||
604 vcpu->arch.power_off || vcpu->arch.pause) {
605 local_irq_enable();
606 kvm_pmu_sync_hwstate(vcpu);
607 kvm_timer_sync_hwstate(vcpu);
608 kvm_vgic_sync_hwstate(vcpu);
609 preempt_enable();
610 continue;
611 }
612
613 kvm_arm_setup_debug(vcpu);
614
615 /**************************************************************
616 * 进入客户模式开始运行
617 */
618 trace_kvm_entry(*vcpu_pc(vcpu));
619 __kvm_guest_enter();
620 vcpu->mode = IN_GUEST_MODE;
621
622 ret = kvm_call_hyp(__kvm_vcpu_run, vcpu);
623
624 vcpu->mode = OUTSIDE_GUEST_MODE;
625 vcpu->stat.exits++;
626 /*
627 * 退出客户模式
628 *************************************************************/
629
630 kvm_arm_clear_debug(vcpu);
631
632 /*
633 * We may have taken a host interrupt in HYP mode (ie
634 * while executing the guest). This interrupt is still
635 * pending, as we haven't serviced it yet!
636 *
637 * We're now back in SVC mode, with interrupts
638 * disabled. Enabling the interrupts now will have
639 * the effect of taking the interrupt again, in SVC
640 * mode this time.
641 */
642 local_irq_enable();
643
644 /*
645 * We do local_irq_enable() before calling kvm_guest_exit() so
646 * that if a timer interrupt hits while running the guest we
647 * account that tick as being spent in the guest. We enable
648 * preemption after calling kvm_guest_exit() so that if we get
649 * preempted we make sure ticks after that is not counted as
650 * guest time.
651 */
652 kvm_guest_exit();
653 trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
654
655 /*
656 * We must sync the PMU and timer state before the vgic state so
657 * that the vgic can properly sample the updated state of the
658 * interrupt line.
659 */
660 kvm_pmu_sync_hwstate(vcpu);
661 kvm_timer_sync_hwstate(vcpu);
662
663 kvm_vgic_sync_hwstate(vcpu);
664
665 preempt_enable();
666
667 ret = handle_exit(vcpu, run, ret);
668 }
669
670 if (vcpu->sigset_active)
671 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
672 return ret;
673 }
2320 static struct file_operations kvm_vcpu_fops = {
2321 .release = kvm_vcpu_release,
2322 .unlocked_ioctl = kvm_vcpu_ioctl,
2323 #ifdef CONFIG_KVM_COMPAT
2324 .compat_ioctl = kvm_vcpu_compat_ioctl,
2325 #endif
2326 .mmap = kvm_vcpu_mmap,
2327 .llseek = noop_llseek,
2328 };
调用kvm_vcpu_ioctl
static long kvm_vcpu_ioctl(struct file *filp,
2415 unsigned int ioctl, unsigned long arg)
2416 {
2417 struct kvm_vcpu *vcpu = filp->private_data;
2418 void __user *argp = (void __user *)arg;
2419 int r;
2420 struct kvm_fpu *fpu = NULL;
2421 struct kvm_sregs *kvm_sregs = NULL;
2422
2423 if (vcpu->kvm->mm != current->mm)
2424 return -EIO;
2425
2426 if (unlikely(_IOC_TYPE(ioctl) != KVMIO))
2427 return -EINVAL;
2428
2429 #if defined(CONFIG_S390) || defined(CONFIG_PPC) || defined(CONFIG_MIPS)
2430 /*
2431 * Special cases: vcpu ioctls that are asynchronous to vcpu execution,
2432 * so vcpu_load() would break it.
2433 */
2434 if (ioctl == KVM_S390_INTERRUPT || ioctl == KVM_S390_IRQ || ioctl == KVM_INTERRUPT)
2435 return kvm_arch_vcpu_ioctl(filp, ioctl, arg);
2436 #endif
2437
2438
2439 r = vcpu_load(vcpu);
2440 if (r)
2441 return r;
2442 switch (ioctl) {
2443 case KVM_RUN:
2444 r = -EINVAL;
2445 if (arg)
2446 goto out;
2447 if (unlikely(vcpu->pid != current->pids[PIDTYPE_PID].pid)) {
2448 /* The thread running this VCPU changed. */
2449 struct pid *oldpid = vcpu->pid;
2450 struct pid *newpid = get_task_pid(current, PIDTYPE_PID);
2451
2452 rcu_assign_pointer(vcpu->pid, newpid);
2453 if (oldpid)
2454 synchronize_rcu();
2455 put_pid(oldpid);
2456 }
2457 r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run);
2458 trace_kvm_userspace_exit(vcpu->run->exit_reason, r);
2459 break;
2439行将寄存器装载到cpu中走2443的case ,最终调用kvm_arch_vcpu_ioctl_run 来运行虚拟cpu
549 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
550 {
551 int ret;
552 sigset_t sigsaved;
553
554 if (unlikely(!kvm_vcpu_initialized(vcpu)))
555 return -ENOEXEC;
556
557 ret = kvm_vcpu_first_run_init(vcpu);
558 if (ret)
559 return ret;
560
561 if (run->exit_reason == KVM_EXIT_MMIO) {
562 ret = kvm_handle_mmio_return(vcpu, vcpu->run);
563 if (ret)
564 return ret;
565 }
566
567 if (vcpu->sigset_active)
568 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
569
570 ret = 1;
571 run->exit_reason = KVM_EXIT_UNKNOWN;
572 while (ret > 0) {
573 /*
574 * Check conditions before entering the guest
575 */
576 cond_resched();
577
578 update_vttbr(vcpu->kvm);
579
580 if (vcpu->arch.power_off || vcpu->arch.pause)
581 vcpu_sleep(vcpu);
582
583 /*
584 * Preparing the interrupts to be injected also
585 * involves poking the GIC, which must be done in a
586 * non-preemptible context.
587 */
588 preempt_disable();
589 kvm_pmu_flush_hwstate(vcpu);
590 kvm_timer_flush_hwstate(vcpu);
591 kvm_vgic_flush_hwstate(vcpu);
592
593 local_irq_disable();
594
595 /*
596 * Re-check atomic conditions
597 */
598 if (signal_pending(current)) {
599 ret = -EINTR;
600 run->exit_reason = KVM_EXIT_INTR;
601 }
602
603 if (ret <= 0 || need_new_vmid_gen(vcpu->kvm) ||
604 vcpu->arch.power_off || vcpu->arch.pause) {
605 local_irq_enable();
606 kvm_pmu_sync_hwstate(vcpu);
607 kvm_timer_sync_hwstate(vcpu);
608 kvm_vgic_sync_hwstate(vcpu);
609 preempt_enable();
610 continue;
611 }
612
613 kvm_arm_setup_debug(vcpu);
614
615 /**************************************************************
616 * 进入客户模式开始运行
617 */
618 trace_kvm_entry(*vcpu_pc(vcpu));
619 __kvm_guest_enter();
620 vcpu->mode = IN_GUEST_MODE;
621
622 ret = kvm_call_hyp(__kvm_vcpu_run, vcpu);
623
624 vcpu->mode = OUTSIDE_GUEST_MODE;
625 vcpu->stat.exits++;
626 /*
627 * 退出客户模式
628 *************************************************************/
629
630 kvm_arm_clear_debug(vcpu);
631
632 /*
633 * We may have taken a host interrupt in HYP mode (ie
634 * while executing the guest). This interrupt is still
635 * pending, as we haven't serviced it yet!
636 *
637 * We're now back in SVC mode, with interrupts
638 * disabled. Enabling the interrupts now will have
639 * the effect of taking the interrupt again, in SVC
640 * mode this time.
641 */
642 local_irq_enable();
643
644 /*
645 * We do local_irq_enable() before calling kvm_guest_exit() so
646 * that if a timer interrupt hits while running the guest we
647 * account that tick as being spent in the guest. We enable
648 * preemption after calling kvm_guest_exit() so that if we get
649 * preempted we make sure ticks after that is not counted as
650 * guest time.
651 */
652 kvm_guest_exit();
653 trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
654
655 /*
656 * We must sync the PMU and timer state before the vgic state so
657 * that the vgic can properly sample the updated state of the
658 * interrupt line.
659 */
660 kvm_pmu_sync_hwstate(vcpu);
661 kvm_timer_sync_hwstate(vcpu);
662
663 kvm_vgic_sync_hwstate(vcpu);
664
665 preempt_enable();
666
667 ret = handle_exit(vcpu, run, ret);
668 }
669
670 if (vcpu->sigset_active)
671 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
672 return ret;
673 }