3.2 Qemu Memory管理

                 

3.2.1 Qemu内存管理结构

(1) KVM内存管理初始化

main(vl.c)==>configure_accelerator==>kvm_init(kvm_all.c)==> memory_listener_register(&kvm_memory_listener,NULL);

 

Qemu中可以注册多个listener, 用memory_listeners链表来维护

voidmemory_listener_register(MemoryListener *listener, MemoryRegion *filter)

{

    MemoryListener *other = NULL;

 

    listener->address_space_filter = filter;

    if (QTAILQ_EMPTY(&memory_listeners)

        || listener->priority >=QTAILQ_LAST(&memory_listeners,

                                            memory_listeners)->priority) {

       QTAILQ_INSERT_TAIL(&memory_listeners, listener, link);

    } else {

        QTAILQ_FOREACH(other,&memory_listeners, link) {

            if (listener->priority <other->priority) {

                break;

            }

        }

        QTAILQ_INSERT_BEFORE(other, listener,link);

    }

    listener_add_address_space(listener,&address_space_memory);

    listener_add_address_space(listener,&address_space_io);

}

static MemoryListenerkvm_memory_listener = {

    .begin = kvm_begin,

    .commit = kvm_commit,

    .region_add = kvm_region_add,

    .region_del = kvm_region_del,

    .region_nop = kvm_region_nop,

    .log_start = kvm_log_start,

    .log_stop = kvm_log_stop,

    .log_sync = kvm_log_sync,

    .log_global_start = kvm_log_global_start,

    .log_global_stop = kvm_log_global_stop,

    .eventfd_add = kvm_eventfd_add,

    .eventfd_del = kvm_eventfd_del,

   .priority = 10,

};

kvm_region_add==>kvm_set_phys_mem(section, true);

kvm_region_del==>kvm_set_phys_mem(section, false);

log_global_xxx用于动态迁移,本章暂不讨论。

kvm_eventfd_add,kvm_eventfd_del用于eventfd的管理

 

(2) System Memory初始化化

在Qemu初始化时会 main(vl.c)==>cpu_exec_init_all(exec.c)==>memory_map_init(exec.c)

static voidmemory_map_init(void)

{

    system_memory =g_malloc(sizeof(*system_memory));

    memory_region_init(system_memory,"system", INT64_MAX);

    set_system_memory_map(system_memory);

 

    system_io = g_malloc(sizeof(*system_io));

    memory_region_init(system_io,"io", 65536);

    set_system_io_map(system_io);

 

   memory_listener_register(&core_memory_listener, system_memory);

   memory_listener_register(&io_memory_listener, system_io);

}

qemu中系统内存system_memory来管理,io内存用system_io来管理,io内存管理将在第5章分析。static MemoryRegion *system_memory.MemoryRegion可以有子区域。 而memory_lister负责处理添加和移除内存区域的管理。

 

set_system_memory_map(system_memory);用system_memory来初始化address_space_memory.

 

void set_system_memory_map(MemoryRegion*mr)

{

    memory_region_transaction_begin();

    address_space_memory.root = mr;

    memory_region_transaction_commit();

}

AddressSpace的定义如下:

struct AddressSpace {

    MemoryRegion *root;

    FlatView current_map;

    int ioeventfd_nb;

    MemoryRegionIoeventfd *ioeventfds;

};

 

(3) Memory Listener 管理

voidmemory_region_transaction_begin(void)

{

    qemu_flush_coalesced_mmio_buffer();

    ++memory_region_transaction_depth;

}

qemu_flush_coalesced_mmio_buffer==>kvm_flush_coalesced_mmio_buffer(kvm_all.c)

 

voidmemory_region_transaction_commit(void)

{

    --memory_region_transaction_depth;

    if (!memory_region_transaction_depth) {

        MEMORY_LISTENER_CALL_GLOBAL(begin,Forward);

 

        if (address_space_memory.root) {

            address_space_update_topology(&address_space_memory);

        }

        if (address_space_io.root) {

           address_space_update_topology(&address_space_io);

        }

        MEMORY_LISTENER_CALL_GLOBAL(commit,Forward);

    }

}

 

static void address_space_update_topology(AddressSpace*as)

{

    FlatView old_view = as->current_map;

    FlatView new_view =generate_memory_topology(as->root);

 

    address_space_update_topology_pass(as,old_view, new_view, false);

    address_space_update_topology_pass(as, old_view,new_view, true);

 

    as->current_map = new_view;

    flatview_destroy(&old_view);

    address_space_update_ioeventfds(as);

}

 

address_space_update_topology_pass==》 MEMORY_LISTENER_UPDATE_REGION

#defineMEMORY_LISTENER_UPDATE_REGION(fr, as, dir, callback)            \

    MEMORY_LISTENER_CALL(callback, dir,(&(MemoryRegionSection) {       \

        .mr = (fr)->mr,                                                \

        .address_space = (as)->root,                                    \

        .offset_within_region =(fr)->offset_in_region,                \

        .size =int128_get64((fr)->addr.size),                          \

        .offset_within_address_space =int128_get64((fr)->addr.start),  \

        .readonly = (fr)->readonly,                                     \

              }))

MEMORY_LISTENER_CALL会从前到后或从后到前遍历memory_listeners,并调用相应方法如region_add, region_del等。调用region_add的示例如下:

MEMORY_LISTENER_UPDATE_REGION(frnew,as, Forward, region_add);

 

3.2.1 PC内存管理流程分析

(1)RAM初始化

pc_init1(hw\pc_piix.c)==》pc_memory_init内存被分为两段0 ~ 0xE000_0000, 0xE000_0000以上pc_memory_init(hw\pc.c)

{   。。。。。。

   MemoryRegion * ram = g_malloc(sizeof(*ram));

    //分配整个内存区域

    memory_region_init_ram(ram,"pc.ram",

                           below_4g_mem_size +above_4g_mem_size);

    vmstate_register_ram_global(ram);

    *ram_memory = ram;

    ram_below_4g =g_malloc(sizeof(*ram_below_4g));

    memory_region_init_alias(ram_below_4g,"ram-below-4g", ram,

                             0, below_4g_mem_size);

    memory_region_add_subregion(system_memory,0, ram_below_4g);

    if (above_4g_mem_size > 0) {

        ram_above_4g =g_malloc(sizeof(*ram_above_4g));

        memory_region_init_alias(ram_above_4g,"ram-above-4g", ram,

                                below_4g_mem_size, above_4g_mem_size);

       memory_region_add_subregion(system_memory, 0x100000000ULL,

                                   ram_above_4g);

    }

。。。。。。

}

 

voidmemory_region_init_ram(MemoryRegion *mr,

                            const char *name,

                            uint64_t size)

{

    memory_region_init(mr, name, size);

    mr->ram = true;

    mr->terminates = true;

    mr->destructor =memory_region_destructor_ram;

    mr->ram_addr = qemu_ram_alloc(size, mr);

}

mr->ram_addr =qemu_ram_alloc(size, mr); 分配HVA

qemu_ram_alloc==》qemu_ram_alloc_from_ptr

a)  向ram_list 加入一个RAMBlock 结构;同时扩大ram_list.phys_dirty用于记录脏页

b)  ==》kvm_vmalloc==》qemu_vmalloc

qemu_vmalloc调用操作系统虚拟内存分配接口函数。

 

 

void memory_region_init_alias(MemoryRegion*mr,

                              const char *name,

                              MemoryRegion*orig,

                             target_phys_addr_t offset,

                              uint64_t size)

{

    memory_region_init(mr, name, size);

    mr->alias = orig;

    mr->alias_offset = offset;

}

 

memory_region_init_alias(ram_below_4g,"ram-below-4g", ram,0, below_4g_mem_size);

ram_below_4g->alias = ram;ram_below_4g->offset= 0;

 

memory_region_add_subregion(system_memory,0, ram_below_4g);

//将ram_below_4g加入到system_memory的subregion中去

memory_region_add_subregion==>memory_region_add_subregion_common

static voidmemory_region_add_subregion_common(MemoryRegion *mr,

                                              target_phys_addr_t offset,

                                              MemoryRegion *subregion)

{

    MemoryRegion *other;

 

    memory_region_transaction_begin();

 

    assert(!subregion->parent);

    subregion->parent = mr;

    subregion->addr = offset;

        if (subregion->may_overlap ||other->may_overlap) { //over la

            continue;

        }

    。。。。。。。

}

    QTAILQ_FOREACH(other,&mr->subregions, subregions_link) {

        if (subregion->priority >=other->priority) {

            QTAILQ_INSERT_BEFORE(other,subregion, subregions_link);

            goto done;

        }

    }

    QTAILQ_INSERT_TAIL(&mr->subregions,subregion, subregions_link);

done:

    memory_region_transaction_commit();

}

由于此时core_memory_listener,kvm_memory_listener都以注册,memory_region_transaction_commit();将触发他们的add_region被调用。

 

(2) rom区域

pc_init1:

        pci_memory = g_new(MemoryRegion, 1);

        memory_region_init(pci_memory,"pci", INT64_MAX);

        rom_memory = pci_memory;

 

pc_memory_init==> pc_system_firmware_init==》old_pc_system_rom_init

bios rom区域的建立:

    memory_region_init_ram(bios,"pc.bios", bios_size);

    vmstate_register_ram_global(bios);

    memory_region_set_readonly(bios, true);

    isa_bios = g_malloc(sizeof(*isa_bios));

    memory_region_init_alias(isa_bios,"isa-bios", bios,

                             bios_size -isa_bios_size, isa_bios_size);

   memory_region_add_subregion_overlap(rom_memory,

                                       0x100000 - isa_bios_size,

                                        isa_bios,

                                        1);

    memory_region_set_readonly(isa_bios, true);

 

    /* map all the bios at the top of memory */

    memory_region_add_subregion(rom_memory,

                                (uint32_t)(-bios_size),

                                bios);

(3) Ram RW VM-Exit处理

kvm_cpu_exec==》case KVM_EXIT_MMIO ==>cpu_physical_memory_rw 下面的示例为ram区域的写:

       ram_addr_t addr1;

                addr1 =memory_region_get_ram_addr(section->mr)

                    +memory_region_section_addr(section, addr);

                /* RAM case */

                ptr = qemu_get_ram_ptr(addr1);

                memcpy(ptr, buf, l);

                invalidate_and_set_dirty(addr1,l);

                qemu_put_ram_ptr(ptr);

static voidinvalidate_and_set_dirty(target_phys_addr_t addr,

                                    target_phys_addr_t length)

{

    if (!cpu_physical_memory_is_dirty(addr)) {

        /* invalidate code */

        tb_invalidate_phys_page_range(addr,addr + length, 0);

        /* set dirty bit */

       cpu_physical_memory_set_dirty_flags(addr, (0xff &~CODE_DIRTY_FLAG));

    }

}

将页标记为脏

static inline intcpu_physical_memory_set_dirty_flags(ram_addr_t addr,

                                                     int dirty_flags)

{

    if ((dirty_flags &MIGRATION_DIRTY_FLAG) &&

        !cpu_physical_memory_get_dirty(addr,TARGET_PAGE_SIZE,

                                      MIGRATION_DIRTY_FLAG)) {

        ram_list.dirty_pages++;

    }

    return ram_list.phys_dirty[addr >>TARGET_PAGE_BITS] |= dirty_flags;

}

 

(4) 其他

memory_region_transaction_commit==》address_space_update_topology==》address_space_update_topology_pass ==》     

      if (adding) {

                MEMORY_LISTENER_UPDATE_REGION(frnew, as,Forward, region_nop);

                if (frold->dirty_log_mask&& !frnew->dirty_log_mask) {

                   MEMORY_LISTENER_UPDATE_REGION(frnew, as, Reverse, log_stop);

                } else if (frnew->dirty_log_mask&& !frold->dirty_log_mask) {

                   MEMORY_LISTENER_UPDATE_REGION(frnew, as, Forward, log_start);

                }

            }

当添加/移除或更新内存是会调用memory_region_transaction_commit,此时如果更新前后区域相同,则对原区域调用log_stop,新区域调用log_start. log目前用于vga 虚拟驱动.

 

3.2.3 Qemu到KVM内存管理接口分析

kvm_set_phys_mem用于设置内存, 该函数流程如下:

(1) start_addr = section->offset_within_address_space

  ram_addr_t size = section->size;

  根据物理起始地址和长度,在kvm_state中搜索已建立的KVMSlot *mem区域

  typedef struct KVMSlot

{

    target_phys_addr_t start_addr;

    ram_addr_t memory_size;

    void *ram;

    int slot;

    int flags;

} KVMSlot;

 

(2) 如果没找到,则推出循环并建立一个slot; add 为false时直接退出

    mem = kvm_alloc_slot(s);

    mem->memory_size = size;

    mem->start_addr = start_addr;

    mem->ram = ram;

    mem->flags = kvm_mem_flags(s,log_dirty);

    然后调用 err = kvm_set_user_memory_region(s, mem); 通知内核态建立内存区域

 

static intkvm_set_user_memory_region(KVMState *s, KVMSlot *slot)

{

    struct kvm_userspace_memory_region mem;

 

    mem.slot = slot->slot;

    mem.guest_phys_addr = slot->start_addr;

    mem.memory_size = slot->memory_size;

    mem.userspace_addr = (unsignedlong)slot->ram;

    mem.flags = slot->flags;

    if (s->migration_log) {

        mem.flags |= KVM_MEM_LOG_DIRTY_PAGES;

    }

    return kvm_vm_ioctl(s,KVM_SET_USER_MEMORY_REGION, &mem);

}

 

(3) 如果找到,且区域完全重合则调用 并且add==true

kvm_slot_dirty_pages_log_change(mem,log_dirty);并返回 其中

log_dirty =memory_region_is_logging(mr); //return mr->dirty_log_mask;

 

  if (mem->flags &KVM_MEM_LOG_DIRTY_PAGES) {

           kvm_physical_sync_dirty_bitmap(section);

        }

当kvm_log_global_start时KVM_MEM_LOG_DIRTY_PAGES flag会被设置

 

(4) 如果找到,但不完全重合

   a.   取消slot区域

        old = *mem;

        /* unregister the overlapping slot */

        mem->memory_size = 0;

        err = kvm_set_user_memory_region(s,mem);

 b.将新建两个区域

  slot->StartAddr  to  mr->startaddr

  mr->start_addr to  (slot->startadd + slot->memory_size)

 

3.2.4 KVM内存虚拟化框架

(1) memslots

kvm_vm_ioctl ==> kvm_vm_ioctl_set_memory_region ==> __kvm_set_memory_region

内核态也维护了一个slots, struct kvm->memslots,其定义如下:

struct kvm_memslots {

    u64 generation;

    struct kvm_memory_slot memslots[KVM_MEM_SLOTS_NUM];

    /* The mapping table from slot id to the index in memslots[]. */

    short id_to_index[KVM_MEM_SLOTS_NUM];

};

 

struct kvm_memory_slot {

    gfn_t base_gfn;  //guestphysical page numer

    unsigned long npages; // page numbers

    unsigned long *dirty_bitmap;

    struct kvm_arch_memory_slot arch;

    unsigned long userspace_addr; //guest virtual start address

    u32 flags;

    short id;

};

 

内核态slot的管理策略是根据用户空间的slot_id一一对应的

slot =id_to_memslot(kvm->memslots, mem->slot); //根据用户态slot号得到内核slot结构

__kvm_set_memory_region流程如下:

a. 根据用户态slot号得到内核slot结构

b.根据slot中的值和要设置的值,决定要操作的类别:

  enum kvm_mr_change {

    KVM_MR_CREATE,

    KVM_MR_DELETE,

    KVM_MR_MOVE,

    KVM_MR_FLAGS_ONLY,

};

c. 根据b中的动作进行操作

  i . KVM_MR_CREATE: kvm_arch_create_memslot

    X86 arch layer memslot 该结构按大页分级页表来gpa

    struct kvm_arch_memory_slot {

    unsigned long *rmap[KVM_NR_PAGE_SIZES];

    struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1]; //记录页被写的次数

    };

 

  ii KVM_MR_DELETE OR KVM_MR_MOVE:  

    1. 将原slot标记为无效

           slot->flags|= KVM_MEMSLOT_INVALID;

       old_memslots = install_new_memslots(kvm, slots, NULL);

        kvm_iommu_unmap_pages(kvm, &old);

        kvm_arch_flush_shadow_memslot 刷新影子页表3.4节分析

 

    2. 安装新slot,对于delete而言会将新slot清零memset(&new.arch, 0, sizeof(new.arch));

 

  iii r = kvm_arch_prepare_memory_region(kvm,&new, mem, change);

    通过vm_mmap调用为hva分配空间

 

  iv     删除要取消映射的区域

        install_new_memslots(kvm, slots,&new);

        kvm_arch_commit_memory_region(kvm,mem, &old, change); //vm_unmap

        kvm_free_physmem_slot(kvm,&old, &new);

        kfree(old_memslots);

 

  v KVM_MR_CREATEOR KVM_MR_MOVE:

      kvm_iommu_map_pages(kvm, &new);// 在第7章分析

 

  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值