先界定几个术语
GPA:Guest 物理地址
HVA:HOST 虚拟地址
MR:Memory Region,QEMU虚拟机中地址空间的管理结构
例如:
static int kvm_set_user_memory_region(KVMState *s, KVMSlot *slot)
{
struct kvm_userspace_memory_region mem;
.......
/*通过KVM_SET_USER_MEMORY_REGION进入Kernel*/
mem.guest_phys_addr = slot->start_addr;
mem.userspace_addr = (unsigned long)slot->ram;
mem.memory_size = slot->memory_size;
return kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
}
|
我们看一下,内核是处理如何操作的
kvm_vm_compat_ioctl
-> kvm_vm_ioctl (case KVM_SET_USER_MEMORY_REGION)
-> kvm_vm_ioctl_set_memory_region
-> kvm_set_memory_region
-> __kvm_set_memory_region
int __kvm_set_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem)
{
struct kvm_memory_slot old, new/*待添加的slot的内容*/;
......
/*设置memslot的base_gfn、npages等域*/
base_gfn = mem->guest_phys_addr >> PAGE_SHIFT;
/*memory_size为0,就说明npages = 0, npages 在后面会被标记为KVM_MR_DELETE,说明是内存删除动作*/
npages = mem->memory_size >> PAGE_SHIFT;
/*判断操作的类型*/
r = -EINVAL;
if (npages) { /*非删除*/
if (!old.npages) /*老的页面数为0,说明是新增*/
change = KVM_MR_CREATE;
else { /* Modify an existing slot. */
if (base_gfn != old.base_gfn) /*新旧插槽只有gfn不同,那么就是MOVE*/
change = KVM_MR_MOVE;
else if (new.flags != old.flags)
change = KVM_MR_FLAGS_ONLY;
else { /* Nothing to change. */
r = 0;
goto out;
}
}
} else if (old.npages) { /*待设定的页面数位0,说明是删除*/
/*memory_size = 0*/
change = KVM_MR_DELETE;
} else /* Modify a non-existent slot: disallowed. */
goto out;
/*
* 处理和已经存在的memslots的重叠,发现重叠返回-EEXIST
*/
if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) {
/* Check for overlaps */
r = -EEXIST;
kvm_for_each_memslot(slot, kvm->memslots) {
if ((slot->id >= KVM_USER_MEM_SLOTS) ||
(slot->id == mem->slot)/*当前要加入的slot,不管,直接跳过*/)
continue;
/* new_end > slot_base && new_base < slot_end,说明已经有覆盖该段内存了*/
if (!((base_gfn + npages <= slot->base_gfn) ||
(base_gfn >= slot->base_gfn + slot->npages)))
goto out;
}
}
if (change == KVM_MR_CREATE) {
/*初始化HVA的内容*/
new.userspace_addr = mem->userspace_addr;
/*设定memslot中arch相关的内容*/
if (kvm_arch_create_memslot(&new, npages))
goto out_free;
}
if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) {
r = -ENOMEM;
slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots),GFP_KERNEL);
if (!slots)
goto out_free;
slot = id_to_memslot(slots, mem->slot);
slot->flags |= KVM_MEMSLOT_INVALID;
old_memslots = install_new_memslots(kvm, slots, NULL);
/* slot was deleted or moved, clear iommu mapping */
kvm_iommu_unmap_pages(kvm, &old);
/* From this point no new shadow pages pointing to a deleted,
* or moved, memslot will be created.
*
* validation of sp->gfn happens in:
* - gfn_to_hva (kvm_read_guest, gfn_to_pfn)
* - kvm_is_visible_gfn (mmu_check_roots)
*/
kvm_arch_flush_shadow_memslot(kvm, slot);
slots = old_memslots;
}
......
}
从上面的代码看来:
如果memory_size大于0,那么这次内存操作的动作为“向虚拟机中添加内存或者更新内存布局”
如果memory_size等于0,那么这次内存操作的动作为“将guest_phys_addr 为起始地址的那块内存块从虚拟机中删除”
OK,知道了上面的内容,让我们来理解一个QEMU里面的关键函数,这个函数会在后面讲解QEMU内存初始化流程的博文中再次提到。它就是kvm_set_phys_mem,通过该函数,QEMU可以修改虚拟机注册在内核中的内存,达到为虚拟机添加/删除/移动内存的目的
在讲解前,我们先了解一下什么是overlap。
overlap就是重叠的意思,见图1

图1: overlap内存
所谓slot可以理解为物理机上的一个内存插槽上插入的内存,已有slot,就是虚拟机中已经有的内存
当新加入的section管理的地址范围,完全落入已有slot的范围内(见图1),这样就叫做overlap,注意是完全落入哦,重叠一部分不算,参加函数kvm_lookup_overlapping_slot
/* * 在KVMSlot插槽数组中查找同制定范围重叠的KVMSlot * 注意: 这里完全覆盖才算找到 */ static KVMSlot *kvm_lookup_overlapping_slot(KVMState *s, hwaddr start_addr, hwaddr end_addr) { for (i = 0; i < s->nr_slots; i++) { KVMSlot *mem = &s->slots[i]; ..... /*找到有地址空间重叠的KVMSlot区域*/ if (end_addr > mem->start_addr && start_addr < mem->start_addr + mem->memory_size) { found = mem; } } return found; } |
那么QEMU会怎么处理这种overlap的情况呢,是这样的:
1. 将已有slot分为三个部分,prefix slot + overlap + suffix slot
2. 将已有的slot内存,通过KVM_SET_USER_MEMORY_REGION,从内核KVM模块中删除
3. 通过KVM_SET_USER_MEMORY_REGION,向内核kvm模块注册添加prefix slot内存
4. 通过KVM_SET_USER_MEMORY_REGION,向内核kvm模块注册添加suffix slot内存
5. 通过KVM_SET_USER_MEMORY_REGION,向内核kvm模块注册添加overlap内存,起始overlap内存就是本次这次的section的内存
OK,这样就将内存加入进去了。
一定有人会问,为什么不直接利用原来的内存,为什么要新分配呢?
我的理解是这样的,kvm_set_phys_mem是一个操作内存的统一接口,添加内存和删除内存都通过改接口,如果是删除overlap部分,kvm_set_phys_mem的分段工作就不言而喻了。而且如果添加的section的属性变了,如从RAM变成了ROM,那么重新进行添加也是必要的。
当然,prefix slot 和 suffix slot 的长度可以为0,不用多解释了,就是正好覆盖到起始或结束位置
好了,有了上面的理解,我们就来可以分析一下kvm_set_phys_mem函数了
主要流程如图2所示,就不多废话了

图2
下面是代码分析
/*设定虚拟机物理内存*/ static void kvm_set_phys_mem(MemoryRegionSection *section, bool add) { KVMState *s = kvm_state; KVMSlot *mem, old; int err; MemoryRegion *mr = section->mr; bool log_dirty = memory_region_is_logging(mr); bool writeable = !mr->readonly && !mr->rom_device; bool readonly_flag = mr->readonly || memory_region_is_romd(mr); hwaddr start_addr = section->offset_within_address_space; /*start_addr为该段section内存GPA的起始地址*/ ram_addr_t size = int128_get64(section->size); /*start_addr为该段section内存的大小*/ void *ram = NULL; unsigned delta; /* kvm works in page size chunks, but the function may be called with sub-page size and unaligned start address. */ /*页面对齐,对页面起始地址和长度进行微调*/ delta = TARGET_PAGE_ALIGN(size) - size; if (delta > size) { return; } start_addr += delta; size -= delta; size &= TARGET_PAGE_MASK; if (!size || (start_addr & ~TARGET_PAGE_MASK)) { return; } /*如果注册的内存是ROM,进行权限校验*/ if (!memory_region_is_ram(mr)) { if (writeable || !kvm_readonly_mem_allowed) { return; } else if (!mr->romd_mode) { /* If the memory device is not in romd_mode, then we actually want * to remove the kvm memory slot so all accesses will trap. */ add = false; } } /* * memory_region_get_ram_ptr 是MR管理的内存块的HVA * HVA + section->offset_within_region + delta; 就是section所在HVA * ram 就是section所在HVA */ ram = memory_region_get_ram_ptr(mr) + section->offset_within_region + delta; while (1) { /*查找和制定区间重叠的KVMSLOT*/ mem = kvm_lookup_overlapping_slot(s, start_addr, start_addr + size); /* 如果没有重叠,就跳出循环, 直接添加内存*/ if (!mem) { break; } /*从这里开始,说明找到了mem和制定内存返回重叠了*/ if (add && start_addr >= mem->start_addr && (start_addr + size <= mem->start_addr + mem->memory_size) && (ram - start_addr == mem->ram - mem->start_addr)) { /* The new slot fits into the existing one and comes with * identical parameters - update flags and done. */ kvm_slot_dirty_pages_log_change(mem, log_dirty); return; } /*临时保存原有slot信息*/ old = *mem; if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) { kvm_physical_sync_dirty_bitmap(section); } /*通过将memory_size设置为0,通过KVM_SET_USER_MEMORY_REGION从内核KVM模块中删除已经注册的内存*/ /* unregister the overlapping slot */ mem->memory_size = 0; /*设定虚拟机物理内存*/ err = kvm_set_user_memory_region(s, mem); if (err) { fprintf(stderr, "%s: error unregistering overlapping slot: %s\n", __func__, strerror(-err)); abort(); } /* Workaround for older KVM versions: we can't join slots, even not by * unregistering the previous ones and then registering the larger * slot. We have to maintain the existing fragmentation. Sigh. * * This workaround assumes that the new slot starts at the same * address as the first existing one. If not or if some overlapping * slot comes around later, we will fail (not seen in practice so far) * - and actually require a recent KVM version. */ /* * 一般不会走这里,因为broken_set_mem_region为0,暂时不看 */ if (s->broken_set_mem_region && old.start_addr == start_addr && old.memory_size < size && add) { mem = kvm_alloc_slot(s); mem->memory_size = old.memory_size; mem->start_addr = old.start_addr; mem->ram = old.ram; mem->flags = kvm_mem_flags(s, log_dirty, readonly_flag); err = kvm_set_user_memory_region(s, mem); if (err) { fprintf(stderr, "%s: error updating slot: %s\n", __func__, strerror(-err)); abort(); } start_addr += old.memory_size; ram += old.memory_size; size -= old.memory_size; continue; } /* * 通过KVM_SET_USER_MEMORY_REGION,向KVM内核模块注册prefix slot部分内存 * 通过临时保存的old,计算prefix slot的起始地址和大小 */ /* register prefix slot */ if (old.start_addr < start_addr) { mem = kvm_alloc_slot(s); mem->memory_size = start_addr - old.start_addr; mem->start_addr = old.start_addr; mem->ram = old.ram; mem->flags = kvm_mem_flags(s, log_dirty, readonly_flag); err = kvm_set_user_memory_region(s, mem); if (err) { fprintf(stderr, "%s: error registering prefix slot: %s\n", __func__, strerror(-err)); #ifdef TARGET_PPC fprintf(stderr, "%s: This is probably because your kernel's " \ "PAGE_SIZE is too big. Please try to use 4k " \ "PAGE_SIZE!\n", __func__); #endif abort(); } } /* * 通过KVM_SET_USER_MEMORY_REGION,向KVM内核模块注册suffix slot部分内存 * 通过临时保存的old,计算suffix slot的起始地址和大小 */ /* register suffix slot */ if (old.start_addr + old.memory_size > start_addr + size) { ram_addr_t size_delta; mem = kvm_alloc_slot(s); mem->start_addr = start_addr + size; size_delta = mem->start_addr - old.start_addr; mem->memory_size = old.memory_size - size_delta; mem->ram = old.ram + size_delta; mem->flags = kvm_mem_flags(s, log_dirty, readonly_flag); err = kvm_set_user_memory_region(s, mem); if (err) { fprintf(stderr, "%s: error registering suffix slot: %s\n", __func__, strerror(-err)); abort(); } } } /* in case the KVM bug workaround already "consumed" the new slot */ if (!size) { return; } if (!add) { return; } /*新分配一个slot,将section的信息填入其中,并通过KVM_SET_USER_MEMORY_REGION向KVM内核模块中加入/删除内存*/ mem = kvm_alloc_slot(s); mem->memory_size = size; mem->start_addr = start_addr; mem->ram = ram; mem->flags = kvm_mem_flags(s, log_dirty, readonly_flag); err = kvm_set_user_memory_region(s, mem); if (err) { fprintf(stderr, "%s: error registering slot: %s\n", __func__, strerror(-err)); abort(); } } |
这里补充说明一个关键数据结构MemoryRegionSection,这个结构指定了一段内存,这段内存将被注册到内核KVM模块,进行加入或删除操作。
不多说了,上代码
struct MemoryRegionSection { MemoryRegion *mr; AddressSpace *address_space; /* * 相当于在region内偏移量,region上面挂载了一块从HOST上分配的内存,通过这个offset,就可以计算这个section在HOST内存上的HVA了 */ hwaddr offset_within_region; /*该段内存的大小*/ Int128 size; /* * AS内偏移量,该值是GPA,相当于从GUEST物理地址0处开始的偏移量,也就是说,这个值是该段内存GPA的起始地址 * 这很好理解,如果AS代表的是系统内存,那么AS内的偏移量当然是物理地址 */ hwaddr offset_within_address_space; /*指明是ROM还是RAM*/ bool readonly; }; |