操作设备存在两种接口:I/O 端口和 I/O 内存,下面分析虚拟机如何截获和模拟这两种情况的。
1.用户空间访问内核数据结构信息
内存映射可被实现来提供用户程序对设备内存的直接存取,KVM 内核代表每个 VCPU 的 struct kvm_run 数据结构被 mmap用户空间,从而用户空间可以读取 struct kvm_run 中的信息,对于mmio读写操作来说,可以知道
其地址和大小。当然可以读取其他的信息例如struct kvm_coalesced_mmio_ring等。
mmap_size = kvm_ioctl(kvm_state, KVM_GET_VCPU_MMAP_SIZE, 0);
env->kvm_run =mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, env->kvm_fd,0);
static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
struct kvm_vcpu *vcpu = vma->vm_file->private_data;
struct page *page;
if (vmf->pgoff == 0)
page = virt_to_page(vcpu->run);
#ifdef CONFIG_X86
else if (vmf->pgoff == KVM_PIO_PAGE_OFFSET)
page = virt_to_page(vcpu->arch.pio_data);
#endif
#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
else if (vmf->pgoff == KVM_COALESCED_MMIO_PAGE_OFFSET)
page = virt_to_page(vcpu->kvm->coalesced_mmio_ring);
#endif
else
return VM_FAULT_SIGBUS;
get_page(page);
vmf->page = page;
return 0;
}
static const struct vm_operations_struct kvm_vcpu_vm_ops = {
.fault = kvm_vcpu_fault,
};
static int kvm_vcpu_mmap(struct file *file, struct vm_area_struct *vma)
{
vma->vm_ops = &kvm_vcpu_vm_ops;
return 0;
}
2.I/O 内存-mmio的截获和模拟流程
首先用户态对mmio地址的读写函数注册
然后MMIO 会被 KVM 内核截获
最后从KVM 内核返回到用户空间,由 qemu-kvm 来完成 MMIO 读写的模拟。
qemu-kvm 用户态
=================
设备都注册自己特定mmio地址的读写函数(read/write) 函数
iomemtype = cpu_register_io_memory(hpet_ram_read,hpet_ram_write, s);
cpu_register_physical_memory(HPET_BASE, 0x400, iomemtype);
int cpu_register_io_memory(CPUReadMemoryFunc * const *mem_read,
CPUWriteMemoryFunc * const *mem_write,
void *opaque)
{
return cpu_register_io_memory_fixed(0, mem_read, mem_write, opaque);
}
static int cpu_register_io_memory_fixed(int io_index,
CPUReadMemoryFunc * const *mem_read,
CPUWriteMemoryFunc * const *mem_write,
void *opaque)
{
int i, subwidth = 0;
if (io_index <= 0) {
io_index = get_free_io_mem_idx();
if (io_index == -1)
return io_index;
} else {
io_index >>= IO_MEM_SHIFT;
if (io_index >= IO_MEM_NB_ENTRIES)
return -1;
}
for(i = 0;i < 3; i++) {
if (!mem_read[i] || !mem_write[i])
subwidth = IO_MEM_SUBWIDTH;
io_mem_read[io_index][i] = mem_read[i];
io_mem_write[io_index][i] = mem_write[i];
}
io_mem_opaque[io_index] = opaque;
return (io_index << IO_MEM_SHIFT) | subwidth;
}
int kvm_run(CPUState *env)
{
case KVM_EXIT_MMIO:
r = handle_mmio(env);
}
static int handle_mmio(CPUState *env)
{
unsigned long addr = env->kvm_run->mmio.phys_addr;
struct kvm_run *kvm_run = env->kvm_run;
void *data = kvm_run->mmio.data;
/* hack: Red Hat 7.1 generates these weird accesses. */
if ((addr > 0xa0000 - 4 && addr <= 0xa0000) && kvm_run->mmio.len == 3)
return 0;
cpu_physical_memory_rw(addr, data, kvm_run->mmio.len, kvm_run->mmio.is_write);
return 0;
}
void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
int len, int is_write)
{
int l, io_index;
uint8_t *ptr;
uint32_t val;
target_phys_addr_t page;
unsigned long pd;
PhysPageDesc *p;
while (len > 0) {
page = addr & TARGET_PAGE_MASK;
l = (page + TARGET_PAGE_SIZE) - addr;
if (l > len)
l = len;
p = phys_page_find(page >> TARGET_PAGE_BITS);
if (!p) {
pd = IO_MEM_UNASSIGNED;
} else {
1.用户空间访问内核数据结构信息
内存映射可被实现来提供用户程序对设备内存的直接存取,KVM 内核代表每个 VCPU 的 struct kvm_run 数据结构被 mmap用户空间,从而用户空间可以读取 struct kvm_run 中的信息,对于mmio读写操作来说,可以知道
其地址和大小。当然可以读取其他的信息例如struct kvm_coalesced_mmio_ring等。
mmap_size = kvm_ioctl(kvm_state, KVM_GET_VCPU_MMAP_SIZE, 0);
env->kvm_run =mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, env->kvm_fd,0);
static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
struct kvm_vcpu *vcpu = vma->vm_file->private_data;
struct page *page;
if (vmf->pgoff == 0)
page = virt_to_page(vcpu->run);
#ifdef CONFIG_X86
else if (vmf->pgoff == KVM_PIO_PAGE_OFFSET)
page = virt_to_page(vcpu->arch.pio_data);
#endif
#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
else if (vmf->pgoff == KVM_COALESCED_MMIO_PAGE_OFFSET)
page = virt_to_page(vcpu->kvm->coalesced_mmio_ring);
#endif
else
return VM_FAULT_SIGBUS;
get_page(page);
vmf->page = page;
return 0;
}
static const struct vm_operations_struct kvm_vcpu_vm_ops = {
.fault = kvm_vcpu_fault,
};
static int kvm_vcpu_mmap(struct file *file, struct vm_area_struct *vma)
{
vma->vm_ops = &kvm_vcpu_vm_ops;
return 0;
}
2.I/O 内存-mmio的截获和模拟流程
首先用户态对mmio地址的读写函数注册
然后MMIO 会被 KVM 内核截获
最后从KVM 内核返回到用户空间,由 qemu-kvm 来完成 MMIO 读写的模拟。
qemu-kvm 用户态
=================
设备都注册自己特定mmio地址的读写函数(read/write) 函数
iomemtype = cpu_register_io_memory(hpet_ram_read,hpet_ram_write, s);
cpu_register_physical_memory(HPET_BASE, 0x400, iomemtype);
int cpu_register_io_memory(CPUReadMemoryFunc * const *mem_read,
CPUWriteMemoryFunc * const *mem_write,
void *opaque)
{
return cpu_register_io_memory_fixed(0, mem_read, mem_write, opaque);
}
static int cpu_register_io_memory_fixed(int io_index,
CPUReadMemoryFunc * const *mem_read,
CPUWriteMemoryFunc * const *mem_write,
void *opaque)
{
int i, subwidth = 0;
if (io_index <= 0) {
io_index = get_free_io_mem_idx();
if (io_index == -1)
return io_index;
} else {
io_index >>= IO_MEM_SHIFT;
if (io_index >= IO_MEM_NB_ENTRIES)
return -1;
}
for(i = 0;i < 3; i++) {
if (!mem_read[i] || !mem_write[i])
subwidth = IO_MEM_SUBWIDTH;
io_mem_read[io_index][i] = mem_read[i];
io_mem_write[io_index][i] = mem_write[i];
}
io_mem_opaque[io_index] = opaque;
return (io_index << IO_MEM_SHIFT) | subwidth;
}
int kvm_run(CPUState *env)
{
case KVM_EXIT_MMIO:
r = handle_mmio(env);
}
static int handle_mmio(CPUState *env)
{
unsigned long addr = env->kvm_run->mmio.phys_addr;
struct kvm_run *kvm_run = env->kvm_run;
void *data = kvm_run->mmio.data;
/* hack: Red Hat 7.1 generates these weird accesses. */
if ((addr > 0xa0000 - 4 && addr <= 0xa0000) && kvm_run->mmio.len == 3)
return 0;
cpu_physical_memory_rw(addr, data, kvm_run->mmio.len, kvm_run->mmio.is_write);
return 0;
}
void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
int len, int is_write)
{
int l, io_index;
uint8_t *ptr;
uint32_t val;
target_phys_addr_t page;
unsigned long pd;
PhysPageDesc *p;
while (len > 0) {
page = addr & TARGET_PAGE_MASK;
l = (page + TARGET_PAGE_SIZE) - addr;
if (l > len)
l = len;
p = phys_page_find(page >> TARGET_PAGE_BITS);
if (!p) {
pd = IO_MEM_UNASSIGNED;
} else {