kvm的基本结构分为KVM driver + QEMU,其中KVM是kernel内核的一个模块。主要负责虚拟机的创建,虚拟内存的分配,虚拟cpu寄存器的读写等.QEMU和kvm之前通过IOCTL来通信。我们一下主要讨论arm 的kvm在kernel的实现部分.
kvm在kernel中的code 主要分布在下面两个路径中.
linux/arch/arm/kvm
linux/virt/kvm
具体执行的code入口如下:
arch/arm/kvm/arm.c
1417 void kvm_arch_exit(void)
1418 {
1419 kvm_perf_teardown();
1420 }
1421
1422 static int arm_init(void)
1423 {
1424 int rc = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
1425 return rc;
1426 }
1427
1428 module_init(arm_init);
可以看到arm_init是入口函数,这个函数直接调用kvm_init
3708 int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
3709 struct module *module)
3710 {
3714 r = kvm_arch_init(opaque);
3725 r = kvm_irqfd_init();
3746 r = register_cpu_notifier(&kvm_cpu_notifier);
3749 register_reboot_notifier(&kvm_reboot_notifier);
3765 kvm_chardev_ops.owner = module;
3766 kvm_vm_fops.owner = module;
3767 kvm_vcpu_fops.owner = module;
3769 r = misc_register(&kvm_dev);
3775 register_syscore_ops(&kvm_syscore_ops);
3777 kvm_preempt_ops.sched_in = kvm_sched_in;
3778 kvm_preempt_ops.sched_out = kvm_sched_out;
3780 r = kvm_init_debug();
3786 r = kvm_vfio_ops_init();
3812 }
3813 EXPORT_SYMBOL_GPL(kvm_init);
这个函数有很多重要的调用,我们一个一个来看。
3714行kvm_arch_init主要初始化Hyp-mode(也就是说kvm 虚拟化需要cpu支持)和做memory map
1374 int kvm_arch_init(void *opaque)
1375 {
1376 int err;
1377 int ret, cpu;
1378
1379 if (!is_hyp_mode_available()) {
1380 kvm_err("HYP mode not available\n");
1381 return -ENODEV;
1382 }
1383
1384 for_each_online_cpu(cpu) {
1385 smp_call_function_single(cpu, check_kvm_target_cpu, &ret, 1);
1386 if (ret < 0) {
1387 kvm_err("Error, CPU %d not supported!\n", cpu);
1388 return -ENODEV;
1389 }
1390 }
1391 //从1379~1389检查cpu是否support Hyp-mode
1392 err = init_common_resources();//申请percpu kvm_host_cpu_state
1393 if (err)
1394 return err;
1395
1396 if (is_kernel_in_hyp_mode())
1397 err = init_vhe_mode();
1398 else
1399 err = init_hyp_mode();
// init hyp mode
1400 if (err)
1401 goto out_err;
1402
1403 err = init_subsystems();//做cpu_pm 初始化,virtual gic初始化,timer初始化,perf初始化
1404 if (err)
1405 goto out_hyp;
1406
1407 return 0;
1408
1409 out_hyp:
1410 teardown_hyp_mode();
1411 out_err:
1412 teardown_common_resources();
1413 return err;
1414 }
第二个函数3725 ,在一个cpu上创建一个名为kvm-irqfd-cleanup的workqueue
630 int kvm_irqfd_init(void)
631 {
632 irqfd_cleanup_wq = create_singlethread_workqueue("kvm-irqfd-cleanup");
633 if (!irqfd_cleanup_wq)
634 return -ENOMEM;
635
636 return 0;
637 }
第三个函数:注册一个cpu dying和start的通知链
3746 r = register_cpu_notifier(&kvm_cpu_notifier);
3492 static struct notifier_block kvm_cpu_notifier = {
3493 .notifier_call = kvm_cpu_hotplug,
3494 };
3211 static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
3212 void *v)
3213 {
3214 val &= ~CPU_TASKS_FROZEN;
3215 switch (val) {
3216 case CPU_DYING:
3217 hardware_disable();
3218 break;
3219 case CPU_STARTING:
3220 hardware_enable();
3221 break;
3222 }
3223 return NOTIFY_OK;
3224 }
第四个函数:注册一个reboot的通知链
3749 register_reboot_notifier(&kvm_reboot_notifier);
3241 static struct notifier_block kvm_reboot_notifier = {
3242 .notifier_call = kvm_reboot,
3243 .priority = 0,
3244 };
3226 static int kvm_reboot(struct notifier_block *notifier, unsigned long val,
3227 void *v)
3228 {
3229 /*
3230 * Some (well, at least mine) BIOSes hang on reboot if
3231 * in vmx root mode.
3232 *
3233 * And Intel TXT required VMX off for all cpu when system shutdown.
3234 */
3235 pr_info("kvm: exiting hardware virtualization\n");
3236 kvm_rebooting = true;
3237 on_each_cpu(hardware_disable_nolock, NULL, 1);
3238 return NOTIFY_OK;
3239 }
第五个函数: misc_register(&kvm_dev);这个函数最重要,后面会重点分析
注册一个字符设备,这个字符设备提供ioctl 和 QEMU 通讯.
第六个函数:给kvm_device_ops_table赋值
r = kvm_vfio_ops_init();
287 int kvm_vfio_ops_init(void)
288 {
289 return kvm_register_device_ops(&kvm_vfio_ops, KVM_DEV_TYPE_VFIO);
290 }
2734 int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type)
2735 {
2736 if (type >= ARRAY_SIZE(kvm_device_ops_table))
2737 return -ENOSPC;
2738
2739 if (kvm_device_ops_table[type] != NULL)
2740 return -EEXIST;
2741
2742 kvm_device_ops_table[type] = ops;
2743 return 0;
2744 }
等上面这些完成kvm 模块初始化就完成了,就等QEMU 通过ioctl来调用第五步中注册字符设备的ioctl函数了。
kvm在kernel中的code 主要分布在下面两个路径中.
linux/arch/arm/kvm
linux/virt/kvm
具体执行的code入口如下:
arch/arm/kvm/arm.c
1417 void kvm_arch_exit(void)
1418 {
1419 kvm_perf_teardown();
1420 }
1421
1422 static int arm_init(void)
1423 {
1424 int rc = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
1425 return rc;
1426 }
1427
1428 module_init(arm_init);
可以看到arm_init是入口函数,这个函数直接调用kvm_init
3708 int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
3709 struct module *module)
3710 {
3714 r = kvm_arch_init(opaque);
3725 r = kvm_irqfd_init();
3746 r = register_cpu_notifier(&kvm_cpu_notifier);
3749 register_reboot_notifier(&kvm_reboot_notifier);
3765 kvm_chardev_ops.owner = module;
3766 kvm_vm_fops.owner = module;
3767 kvm_vcpu_fops.owner = module;
3769 r = misc_register(&kvm_dev);
3775 register_syscore_ops(&kvm_syscore_ops);
3777 kvm_preempt_ops.sched_in = kvm_sched_in;
3778 kvm_preempt_ops.sched_out = kvm_sched_out;
3780 r = kvm_init_debug();
3786 r = kvm_vfio_ops_init();
3812 }
3813 EXPORT_SYMBOL_GPL(kvm_init);
这个函数有很多重要的调用,我们一个一个来看。
3714行kvm_arch_init主要初始化Hyp-mode(也就是说kvm 虚拟化需要cpu支持)和做memory map
1374 int kvm_arch_init(void *opaque)
1375 {
1376 int err;
1377 int ret, cpu;
1378
1379 if (!is_hyp_mode_available()) {
1380 kvm_err("HYP mode not available\n");
1381 return -ENODEV;
1382 }
1383
1384 for_each_online_cpu(cpu) {
1385 smp_call_function_single(cpu, check_kvm_target_cpu, &ret, 1);
1386 if (ret < 0) {
1387 kvm_err("Error, CPU %d not supported!\n", cpu);
1388 return -ENODEV;
1389 }
1390 }
1391 //从1379~1389检查cpu是否support Hyp-mode
1392 err = init_common_resources();//申请percpu kvm_host_cpu_state
1393 if (err)
1394 return err;
1395
1396 if (is_kernel_in_hyp_mode())
1397 err = init_vhe_mode();
1398 else
1399 err = init_hyp_mode();
// init hyp mode
1400 if (err)
1401 goto out_err;
1402
1403 err = init_subsystems();//做cpu_pm 初始化,virtual gic初始化,timer初始化,perf初始化
1404 if (err)
1405 goto out_hyp;
1406
1407 return 0;
1408
1409 out_hyp:
1410 teardown_hyp_mode();
1411 out_err:
1412 teardown_common_resources();
1413 return err;
1414 }
第二个函数3725 ,在一个cpu上创建一个名为kvm-irqfd-cleanup的workqueue
630 int kvm_irqfd_init(void)
631 {
632 irqfd_cleanup_wq = create_singlethread_workqueue("kvm-irqfd-cleanup");
633 if (!irqfd_cleanup_wq)
634 return -ENOMEM;
635
636 return 0;
637 }
第三个函数:注册一个cpu dying和start的通知链
3746 r = register_cpu_notifier(&kvm_cpu_notifier);
3492 static struct notifier_block kvm_cpu_notifier = {
3493 .notifier_call = kvm_cpu_hotplug,
3494 };
3211 static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
3212 void *v)
3213 {
3214 val &= ~CPU_TASKS_FROZEN;
3215 switch (val) {
3216 case CPU_DYING:
3217 hardware_disable();
3218 break;
3219 case CPU_STARTING:
3220 hardware_enable();
3221 break;
3222 }
3223 return NOTIFY_OK;
3224 }
第四个函数:注册一个reboot的通知链
3749 register_reboot_notifier(&kvm_reboot_notifier);
3241 static struct notifier_block kvm_reboot_notifier = {
3242 .notifier_call = kvm_reboot,
3243 .priority = 0,
3244 };
3226 static int kvm_reboot(struct notifier_block *notifier, unsigned long val,
3227 void *v)
3228 {
3229 /*
3230 * Some (well, at least mine) BIOSes hang on reboot if
3231 * in vmx root mode.
3232 *
3233 * And Intel TXT required VMX off for all cpu when system shutdown.
3234 */
3235 pr_info("kvm: exiting hardware virtualization\n");
3236 kvm_rebooting = true;
3237 on_each_cpu(hardware_disable_nolock, NULL, 1);
3238 return NOTIFY_OK;
3239 }
第五个函数: misc_register(&kvm_dev);这个函数最重要,后面会重点分析
注册一个字符设备,这个字符设备提供ioctl 和 QEMU 通讯.
第六个函数:给kvm_device_ops_table赋值
r = kvm_vfio_ops_init();
287 int kvm_vfio_ops_init(void)
288 {
289 return kvm_register_device_ops(&kvm_vfio_ops, KVM_DEV_TYPE_VFIO);
290 }
2734 int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type)
2735 {
2736 if (type >= ARRAY_SIZE(kvm_device_ops_table))
2737 return -ENOSPC;
2738
2739 if (kvm_device_ops_table[type] != NULL)
2740 return -EEXIST;
2741
2742 kvm_device_ops_table[type] = ops;
2743 return 0;
2744 }
等上面这些完成kvm 模块初始化就完成了,就等QEMU 通过ioctl来调用第五步中注册字符设备的ioctl函数了。