本节首先分析Qemu的初始化top level流程;从而引出Qemu各大功能模块的描述。最后分析Qemu与内核态KVM的通讯接口。
1.1.1Main的主流程
main– (vl.c function main)
a) module_call_init(MODULE_INIT_QOM);--设备驱动初始化和注册 type_init(x86_cpu_register_types)(target-i386/cpu.c)
b) module_call_init(MODULE_INIT_MACHINE); -- 机器类型注册初始化
machine_init(pc_machine_init)
c) socket_init
d) qemu_init_cpu_loop
e) configure_accelerator--tcg对KVM而言采用kvm type, 并调用kvm_init
accel_list[i].init();accel_list[] = {
{ "tcg", "tcg",tcg_available, tcg_init, &tcg_allowed },
{ "xen", "Xen",xen_available, xen_init, &xen_allowed },
{ "kvm","KVM", kvm_available, kvm_init, &kvm_allowed }, //open /dev/kvm
{ "qtest", "QTest",qtest_available, qtest_init, &qtest_allowed }, }
f) qemu_init_main_loop; –
qeume_mutex_lock
qemue_event_init
qemu_signal_init
g) qemu_init_cpu_loop
.begin = kvm_begin,
.commit = kvm_commit,
.region_add = kvm_region_add,
.region_del = kvm_region_del,
.region_nop = kvm_region_nop,
.log_start = kvm_log_start,
.log_stop = kvm_log_stop,
.log_sync = kvm_log_sync,
.log_global_start = kvm_log_global_start,
.log_global_stop = kvm_log_global_stop,
.eventfd_add = kvm_eventfd_add,
.eventfd_del = kvm_eventfd_del,
.priority = 10,
};
memory_region_add_subregion ==> listener_add_address_space ==>region_add
kvm_region_add==> kvm_set_phys_mem==>kvm_set_user_memory_region ==>
kvm_vm_ioctl(s,KVM_SET_USER_MEMORY_REGION, &mem);
虚拟机内存初始化:
pc_init1==>pc_memory_init==>memory_region_add_subregion(memory.c) 添加内存区域到虚拟机的内存管理结构; 第3章将分析内存虚拟化。
1.1.4Qemu IO管理
main==>cpu_exec_init_all()
void cpu_exec_init_all(void)
{
memory_map_init();
io_mem_init();
}
X86 有两种硬件访问方式PIO 与 MMIO, 下面分别讲解
(1) PIO
isa_cirrus_vga采用IO port方式访问
IO port的注册
vga_initfn (cirrus_vga.c)==》 cirrus_init_common ==》 register_ioport_read(ioport.c)
int register_ioport_read(pio_addr_t start,int length, int size,
IOPortReadFunc *func,void *opaque)
{
......
for(i = start; i < start + length; ++i) {
ioport_read_table[bsize][i] = func;
if (ioport_opaque[i] != NULL && ioport_opaque[i] != opaque)
hw_error("register_ioport_read: invalid opaque for address0x%x",
i);
ioport_opaque[i] = opaque;
}
return 0;
}
当虚拟机由IO port引起VM-Exit时
kvm_handle_io==> cpu_inl (ioport.c)==> ioport_read
static uint32_t ioport_read(int index,uint32_t address)
{
static IOPortReadFunc * const default_func[3] = {
default_ioport_readb,
default_ioport_readw,
default_ioport_readl
};
IOPortReadFunc *func = ioport_read_table[index][address];
if (!func)
func = default_func[index];
return func(ioport_opaque[address], address);
}
(2)MMIO
cirrus_init_common (CirrusVGAState * s, intdevice_id, int is_pci,
MemoryRegion *system_memory)
{
........
memory_region_init(&s->low_mem_container,
"cirrus-lowmem-container",0x20000);
memory_region_init_io(&s->low_mem, &cirrus_vga_mem_ops, s,
"cirrus-low-memory", 0x20000);
memory_region_add_subregion(&s->low_mem_container, 0,&s->low_mem);
.......
}
定义mmio的read,write
static const MemoryRegionOpscirrus_vga_mem_ops = {
.read = cirrus_vga_mem_read,
.write = cirrus_vga_mem_write,
.endianness = DEVICE_LITTLE_ENDIAN,
.impl = {
.min_access_size = 1,
.max_access_size = 1,
},
};
当虚拟机由IO port引起VM-Exit时
cpu_physical_memory_rw(exec.c)==>io_mem_read(memory.c)==>
memory_region_dispatch_read==> access_with_adjusted_size
static uint64_tmemory_region_dispatch_read1(MemoryRegion *mr,
target_phys_addr_t addr,
unsigned size)
{
.......
access_with_adjusted_size(addr, &data, size,
mr->ops->impl.min_access_size,
mr->ops->impl.max_access_size,
memory_region_read_accessor, mr);
return data;
}
memory_region_read_accessor ==>mr->ops->read
第5.1节将详细介绍io的管理框架
1.1.5Qemu IO thread
IO thread 用来管理虚拟机的IO 读写,如对block设备的访问。5.4节将做详细介绍
int main_loop_wait(int nonblocking)
{
int ret;
uint32_t timeout = UINT32_MAX;
if (nonblocking) {
timeout = 0;
}else {
qemu_bh_update_timeout(&timeout);
}
/* poll any events */
/* XXX: separate device handlers from system ones */
nfds = -1;
FD_ZERO(&rfds);
FD_ZERO(&wfds);
FD_ZERO(&xfds);
#ifdef CONFIG_SLIRP
slirp_update_timeout(&timeout);
slirp_select_fill(&nfds, &rfds, &wfds, &xfds);
#endif
qemu_iohandler_fill(&nfds, &rfds, &wfds, &xfds);
ret = os_host_main_loop_wait(timeout);
qemu_iohandler_poll(&rfds, &wfds, &xfds, ret);
#ifdef CONFIG_SLIRP
slirp_select_poll(&rfds, &wfds, &xfds, (ret < 0));
#endif
qemu_run_all_timers();
/* Check bottom-halves last in case any of the earlier events triggered
them. */
qemu_bh_poll();
return ret;
}
Qemu中常用的IO描述符有下面几类:
· block io:虚拟磁盘相关的io,为了保证高性能,主要使用aio;
· qemu_notify_event
例子:qemu的时钟模拟利用了linux kernel的signalfd, 定期产生SIGALRM信号(qemu-timer.c;
· eventfd:主要用于qemu和kvm之间的notifier, 比如qemu的模拟设备可以通过notifier向kvm发送一个模拟中断,kvm也可以通过notifier向qemu报告guest的各种状态;
address_space_update_topology==>address_space_update_ioeventfds==>address_space_add_del_ioeventfds==>MEMORY_LISTENER_CALL==>eventfd_add(kvm_mem_ioeventfd_add)==>kvm_vm_ioctl(kvm_state,KVM_IOEVENTFD, &iofd);
· socket:用于虚拟机迁移,qmp管理等
该函数同时还负责轮询系统中所有的定时器,并调用定时器的回调函数;
IO Handler
用来表示一个IO描述符,其结构定义如下;iohandler.c中定义了一个全局的链表io_handlers,并提供qemu_set_fd_handler()和qemu_set_fd_handler2()函数将一个fd加入到这个链表QLIST_INSERT_HEAD; 在IO thread主循环中qemu_iohandler_fill()函数负责将io_handlers链表中的所有描述符,加入select测试集合。
IO thread同步
Qemu IO thread和vcputhread使用一个全局共享线程锁来保证同步,函数qemu_mutex_lock_iothread()和qemu_mutex_unlock_iothread()分别用来获取和释放该锁
1.1.6 Qemu的模块
下面的表格是本系列文章将会分析到的代码和其对应的模块:
模块名与描述 | 文件 | 章节 |
参数管理与main函数 | Vl.c Qemu-config.c Arch_init.c Qemu-opt.c | 1.1 8.3 |
Kvm访问接口层 | Target-i386\Kvm.c Kvm-all.c | 1.2 2章 |
设备对象模型 | Qdev.c; qdev-propreties.c module.c | 2.1 |
Machine与cpu管理 | Hw\pc_piix.c Hw\pc.c Target-i386\Machine.c cpu_exec.c | 第2章 |
中断与时间管理 | Hw\kvm\(ioapic.c, i8259.c,i8254.c, apic.c clockc) | 第4章 |
内存管理 | Memory.c Memory-mapping.c Exec.c | 第3章 |
硬件辅助虚拟化 | Hw\(pci.c, pcie.c,pci-birdge.c,piix_pci.c) Hw\ide\(core.c,pci.c,piix.c, piix.c) | 5.1 5.2 5.3 |
半虚拟化 | Hw\(virtio.c, virtio-pci.c, virtio-ballon.c) | 6章 |
直接io | Hw\kvm\pci-assign.c) | 7章 |
块设备 | Block.c Blockdev.c Block\raw-posix.c | 5.4 |
异步io | Aio.c posix-aio-compact.c iohandler.c main-loop.c | 5.4 |
字符设备 | Qemu-char.c | 8.1 |
管理模块 | Qmp.c; hmp.c qdev-monitor.c Monitor.c Vmsave.c | 8.1 8.2 |