linux 内存管理---bootmem(三)

为什么要使用bootmem分配器,内存管理不是有buddy系统和slab分配器吗?由于在系统初始化的时候需要执行一些内存管理,内存分配的任务,这个时候buddy系统,slab分配器等并没有被初始化好,此时就引入了一种内存管理器bootmem分配器在系统初始化的时候进行内存管理与分配,当buddy系统和slab分配器初始化好后,在mem_init()中对bootmem分配器进行释放,内存管理与分配由buddy系统,slab分配器等进行接管。
bootmem分配器使用一个bitmap来标记物理页是否被占用,分配的时候按照第一适应的原则,从bitmap中进行查找,如果这位为1,表示已经被占用,否则表示未被占用。为什么系统运行的时候不使用bootmem分配器呢?bootmem分配器每次在bitmap中进行线性搜索,效率非常低,而且在内存的起始端留下许多小的空闲碎片,在需要非常大的内存块的时候,检查位图这一过程就显得代价很高。bootmem分配器是用于在启动阶段分配内存的,对该分配器的需求集中于简单性方面,而不是性能和通用性。

memblock算法是linux内核初始化阶段的一个内存分配器,本质上是取代了原来的bootmem算法. memblock实现比较简单,而它的作用就是在page allocator初始化之前来管理内存,完成分配和释放请求.
为了保证系统的兼容性, 内核为bootmem和memblock提供了相同的API接口.
这样在编译Kernel的时候可以选择nobootmem或者bootmem 来在buddy system起来之前管理memory. 这两种机制对提供的API是一致的,因此对用户是透明的
参见 mm/Makefile
ifdef CONFIG_NO_BOOTMEM
obj-y += nobootmem.o
else
obj-y += bootmem.o
endif
由于接口是一致的, 那么他们共同使用一份
头文件bootmem接口nobootmem接口
include/linux/bootmem.hmm/bootmem.cmm/nobootmem.c
Memblock是在早期引导过程中管理内存的方法之一,此时内核内存分配器还没运行. Memblock以前被定义为Logical Memory Block( 逻辑内存块), 但根据 Yinghai Lu的补丁 , 它被重命名为memblock.

    +-------------------------------------------------------+
    |                                        外部模块申请内存                                    |
    +-------------------------------------------------------+
           |                                    |
           |                                    |
                    ↓                                                                    ↓                  
+------------------------+         +------------------------+
|         bootmem.c        |         |    nobootmem.c        |
|   __alloc_bootmem()     |                 |   __alloc_bootmem()     |
+------------------------+         +------------------------+
                                                                                            |
                                                                                            |
                                                                                            ↓
                                                                 +-----------------------------------+
                                   |                 memblock.c             |
                                   memblock_find_in_range_node()     |
                                                                +-----------------------------------+
这里仅仅介绍bootmem。


前面一节《 linux 内存管理---物理内存探测(二)》记录了物理内存的分布,那么之后就交由bootmem来管理了。

static void __init bootmem_init(void)
{
    unsigned long reserved_end;
    unsigned long mapstart = ~0UL;
    unsigned long bootmap_size;
    int i;

    /*
     * Init any data related to initrd. It's a nop if INITRD is
     * not selected. Once that done we can determine the low bound
     * of usable memory.
     */
    reserved_end = max(init_initrd(),
               (unsigned long) PFN_UP(__pa_symbol(&_end)));    //得到内核映像或者initrd占用的最后一个页框

    /*
     * max_low_pfn is not a number of pages. The number of pages
     * of the system is given by 'max_low_pfn - min_low_pfn'.
     */
    min_low_pfn = ~0UL;
    max_low_pfn = 0;

    /*
     * Find the highest page frame number we have available.
     */
    for (i = 0; i < boot_mem_map.nr_map; i++) {
        unsigned long start, end;

        if (boot_mem_map.map[i].type != BOOT_MEM_RAM)
            continue;

        start = PFN_UP(boot_mem_map.map[i].addr);
        end = PFN_DOWN(boot_mem_map.map[i].addr
                + boot_mem_map.map[i].size);

        if (end > max_low_pfn)
            max_low_pfn = end;
        if (start < min_low_pfn)
            min_low_pfn = start;
        if (end <= reserved_end)
            continue;
        if (start >= mapstart)
            continue;
        mapstart = max(reserved_end, start);      //得到mapstart的页框,用于bootmem记录分配的情况,mapstart就在内核映像后面的一个页框
    }

    if (min_low_pfn >= max_low_pfn)
        panic("Incorrect memory mapping !!!");
    if (min_low_pfn > ARCH_PFN_OFFSET) {
        pr_info("Wasting %lu bytes for tracking %lu unused pages\n",
            (min_low_pfn - ARCH_PFN_OFFSET) * sizeof(struct page),
            min_low_pfn - ARCH_PFN_OFFSET);
    } else if (min_low_pfn < ARCH_PFN_OFFSET) {
        pr_info("%lu free pages won't be used\n",
            ARCH_PFN_OFFSET - min_low_pfn);
    }
    min_low_pfn = ARCH_PFN_OFFSET;    //#define ARCH_PFN_OFFSET        PFN_UP(PHYS_OFFSET)

    /*
     * Determine low and high memory ranges
     */
    max_pfn = max_low_pfn;
    if (max_low_pfn > PFN_DOWN(HIGHMEM_START)) {          //最大不超过0x20000000+768M
#ifdef CONFIG_HIGHMEM
        highstart_pfn = PFN_DOWN(HIGHMEM_START);
        highend_pfn = max_low_pfn;
#endif
        max_low_pfn = PFN_DOWN(HIGHMEM_START);
    }

    /*
     * Initialize the boot-time allocator with low memory only.
     */
    bootmap_size = init_bootmem_node(NODE_DATA(0), mapstart,
                     min_low_pfn, max_low_pfn);      //初始化bootmem, 最小页框,最大页框,包括中间的空洞

    ...

    /*
     * Register fully available low RAM pages with the bootmem allocator.
     */
    for (i = 0; i < boot_mem_map.nr_map; i++) {
        unsigned long start, end, size;

        start = PFN_UP(boot_mem_map.map[i].addr);
        end   = PFN_DOWN(boot_mem_map.map[i].addr
                    + boot_mem_map.map[i].size);

        /*
         * Reserve usable memory.
         */
        switch (boot_mem_map.map[i].type) {
        case BOOT_MEM_RAM:
            break;
        case BOOT_MEM_INIT_RAM:
            memory_present(0, start, end);
            continue;
        default:
            /* Not usable memory */
            continue;
        }

        /*
         * We are rounding up the start address of usable memory
         * and at the end of the usable range downwards.
         */
        if (start >= max_low_pfn)
            continue;
        if (start < reserved_end)   //从内核映像最后一个页框开始标记为可用
            start = reserved_end;
        if (end > max_low_pfn)
            end = max_low_pfn;

        /*
         * ... finally, is the area going away?
         */
        if (end <= start)
            continue;
        size = end - start;

        /* Register lowmem ranges */
#ifdef CONFIG_BRCMSTB
        /* carve out space for bmem */
        brcm_free_bootmem(PFN_PHYS(start), size << PAGE_SHIFT);    //剔除bmem内存,bmem内存是保留给设备DMA用的
#else
        free_bootmem(PFN_PHYS(start), size << PAGE_SHIFT);    //标记内核映像结束的页框到连续页框最后一个页框之间的页框为free可用页框
#endif
    }

    /*
     * Reserve the bootmap memory.
     */
    reserve_bootmem(PFN_PHYS(mapstart), bootmap_size, BOOTMEM_DEFAULT);  //标记bootmem用于分配标记占用的页为保留

    /*
     * Reserve initrd memory if needed.
     */
    finalize_initrd();    //标记initrd占用的页为保留

    /*
     * Call memory_present() on all valid ranges, for SPARSEMEM.
     * This must be done after setting up bootmem, since memory_present()
     * may allocate bootmem.
     */
    for (i = 0; i < boot_mem_map.nr_map; i++) {
        unsigned long start, end;

        if (boot_mem_map.map[i].type != BOOT_MEM_RAM)
            continue;

        start = PFN_UP(boot_mem_map.map[i].addr);
        end   = PFN_DOWN(boot_mem_map.map[i].addr
                    + boot_mem_map.map[i].size);
        memory_present(0, start, end);      //主要是物理内存空洞,对于mips,低256M为DRAM,接着256M为register,接着768M为DRAM,所以对于系统内存大于256M,就肯定有内存空洞了
    }
}

unsigned long __init init_bootmem_node(pg_data_t *pgdat, unsigned long freepfn,
                unsigned long startpfn, unsigned long endpfn)
{
    return init_bootmem_core(pgdat->bdata, freepfn, startpfn, endpfn);
}

在include/asm-mips/mach-generic/spaces.h中:
#ifndef PHYS_OFFSET
#define PHYS_OFFSET  _AC(0, UL)
#endif
#ifdef CONFIG_32BIT
#define CAC_BASE  _AC(0x80000000, UL)
#endif

#define BRCM_MAX_UPPER_MB    _AC(768, UL)

#define UPPERMEM_START        _AC(0x20000000, UL)
#define HIGHMEM_START        (UPPERMEM_START + (BRCM_MAX_UPPER_MB << 20))

#ifndef PAGE_OFFSET
#define PAGE_OFFSET  (CAC_BASE + PHYS_OFFSET)
#endif

static unsigned long __init init_bootmem_core(bootmem_data_t *bdata,
    unsigned long mapstart, unsigned long start, unsigned long end)
{
    unsigned long mapsize;

    mminit_validate_memmodel_limits(&start, &end);
    bdata->node_bootmem_map = phys_to_virt(PFN_PHYS(mapstart));    //记录分配标志
    bdata->node_min_pfn = start;
    bdata->node_low_pfn = end;
    link_bootmem(bdata);

    /*
     * Initially all pages are reserved - setup_arch() has to
     * register free RAM areas explicitly.
     */
    mapsize = bootmap_bytes(end - start);   //需要多少个byte来记录
     memset(bdata->node_bootmem_map, 0xff, mapsize);     

    bdebug("nid=%td start=%lx map=%lx end=%lx mapsize=%lx\n",
        bdata - bootmem_node_data, start, mapstart, end, mapsize);

    return mapsize;
}

static unsigned long __init bootmap_bytes(unsigned long pages)
{
    unsigned long bytes = DIV_ROUND_UP(pages, 8);

    return ALIGN(bytes, sizeof(long));
}
一个byte有8bit,每个bit可用来记录一个页是否分配或释放,非0表示页可用,因此一个byte可用记录8个页。

void __init free_bootmem(unsigned long addr, unsigned long size)
{
    unsigned long start, end;

    kmemleak_free_part(__va(addr), size);

    start = PFN_UP(addr);
    end = PFN_DOWN(addr + size);

     mark_bootmem(start, end, 0, 0);
}

static int __init mark_bootmem(unsigned long start, unsigned long end,
                int reserve, int flags)
{
    unsigned long pos;
    bootmem_data_t *bdata;

    pos = start;
    list_for_each_entry(bdata, &bdata_list, list) {
        int err;
        unsigned long max;

        if (pos < bdata->node_min_pfn ||
            pos >= bdata->node_low_pfn) {
            BUG_ON(pos != start);
            continue;
        }

        max = min(bdata->node_low_pfn, end);

        err = mark_bootmem_node(bdata, pos, max, reserve, flags);
        if (reserve && err) {
            mark_bootmem(start, pos, 0, 0);
            return err;
        }

        if (max == end)
            return 0;
        pos = bdata->node_low_pfn;
    }
    BUG();
}

static int __init mark_bootmem_node(bootmem_data_t *bdata,
                unsigned long start, unsigned long end,
                int reserve, int flags)
{
    unsigned long sidx, eidx;

    sidx = start - bdata->node_min_pfn;
    eidx = end - bdata->node_min_pfn;

    if (reserve)
        return __reserve(bdata, sidx, eidx, flags);
    else
        __free(bdata, sidx, eidx);
    return 0;
}

static void __init __free(bootmem_data_t *bdata,
            unsigned long sidx, unsigned long eidx)
{
    unsigned long idx;
    ...
    if (bdata->hint_idx > sidx)
        bdata->hint_idx = sidx;

    for (idx = sidx; idx < eidx; idx++)
        if (!test_and_clear_bit(idx, bdata->node_bootmem_map))
            BUG();
}

static int __init __reserve(bootmem_data_t *bdata, unsigned long sidx,
            unsigned long eidx, int flags)
{
    unsigned long idx;
    int exclusive = flags & BOOTMEM_EXCLUSIVE;


    for (idx = sidx; idx < eidx; idx++)
        if (test_and_set_bit(idx, bdata->node_bootmem_map)) {
            if (exclusive) {   //如果是互斥的,页框已经为1,再设置为reserve
                __free(bdata, sidx, idx);
                return -EBUSY;
            }
            bdebug("silent double reserve of PFN %lx\n",
                idx + bdata->node_min_pfn);
        }
    return 0;
}

调用bootmem_init()函数之后bootmem就初始化完成了,当然可能有人会问在bootmem初始化之前内核要分配内存怎么办,而且在bootmem初始化过程中要用到内存哪里来?这就是一个先有鸡还是先有蛋的问题,内核采取的办法是在bootmem可用之前包括bootmem的初始化,内核的一切内存需要都采用静态内存,即全局变量的形式,比如bootmem的初始化过程中:
NODE_DATA(0) 宏展开为:
#define NODE_DATA(nid)        (&contig_page_data)
struct pglist_data __refdata contig_page_data = {
    .bdata = &bootmem_node_data[0]
};
contig_page_data就是定义为一个全局结构体变量,其中bdata为它的成员变量指针,直接指向另外一个全局变量:
bootmem_data_t bootmem_node_data[MAX_NUMNODES] __initdata;

bootmem初始化完成后就可以通过下列函数分配内存了:
#define alloc_bootmem(x) \
    __alloc_bootmem(x, SMP_CACHE_BYTES, BOOTMEM_LOW_LIMIT)
#define alloc_bootmem_align(x, align) \
    __alloc_bootmem(x, align, BOOTMEM_LOW_LIMIT)
#define alloc_bootmem_nopanic(x) \
    __alloc_bootmem_nopanic(x, SMP_CACHE_BYTES, BOOTMEM_LOW_LIMIT)
#define alloc_bootmem_pages(x) \
    __alloc_bootmem(x, PAGE_SIZE, BOOTMEM_LOW_LIMIT)
#define alloc_bootmem_pages_nopanic(x) \
    __alloc_bootmem_nopanic(x, PAGE_SIZE, BOOTMEM_LOW_LIMIT)
#define alloc_bootmem_node(pgdat, x) \
    __alloc_bootmem_node(pgdat, x, SMP_CACHE_BYTES, BOOTMEM_LOW_LIMIT)
#define alloc_bootmem_node_nopanic(pgdat, x) \
    __alloc_bootmem_node_nopanic(pgdat, x, SMP_CACHE_BYTES, BOOTMEM_LOW_LIMIT)
#define alloc_bootmem_pages_node(pgdat, x) \
    __alloc_bootmem_node(pgdat, x, PAGE_SIZE, BOOTMEM_LOW_LIMIT)
#define alloc_bootmem_pages_node_nopanic(pgdat, x) \
    __alloc_bootmem_node_nopanic(pgdat, x, PAGE_SIZE, BOOTMEM_LOW_LIMIT)

下面简单进行说明:
static void * __init alloc_bootmem_core(struct bootmem_data *bdata,
                    unsigned long size, unsigned long align,
                    unsigned long goal, unsigned long limit)
{
    unsigned long fallback = 0;
    unsigned long min, max, start, sidx, midx, step;
    ...
    min = bdata->node_min_pfn;
    max = bdata->node_low_pfn;

    goal >>= PAGE_SHIFT;
    limit >>= PAGE_SHIFT;

    if (limit && max > limit)
        max = limit;
    if (max <= min)
        return NULL;

    step = max(align >> PAGE_SHIFT, 1UL);

    if (goal && min < goal && goal < max)
        start = ALIGN(goal, step);
    else
        start = ALIGN(min, step);

    sidx = start - bdata->node_min_pfn;
    midx = max - bdata->node_min_pfn;

    if (bdata->hint_idx > sidx) {
        /*
         * Handle the valid case of sidx being zero and still
         * catch the fallback below.
         */
        fallback = sidx + 1;
        sidx = align_idx(bdata, bdata->hint_idx, step);
    }

    while (1) {
        int merge;
        void *region;
        unsigned long eidx, i, start_off, end_off;
find_block:
        sidx = find_next_zero_bit(bdata->node_bootmem_map, midx, sidx);      //查找满足要求的起始页框
        sidx = align_idx(bdata, sidx, step);
        eidx = sidx + PFN_UP(size);

        if (sidx >= midx || eidx > midx)
            break;

        for (i = sidx; i < eidx; i++)
            if (test_bit(i, bdata->node_bootmem_map)) {
                sidx = align_idx(bdata, i, step);
                if (sidx == i)
                    sidx += step;
                goto find_block;
            }

        if (bdata->last_end_off & (PAGE_SIZE - 1) &&
                PFN_DOWN(bdata->last_end_off) + 1 == sidx)
            start_off = align_off(bdata, bdata->last_end_off, align);
        else
            start_off = PFN_PHYS(sidx);

        merge = PFN_DOWN(start_off) < sidx;
        end_off = start_off + size;

        bdata->last_end_off = end_off;
        bdata->hint_idx = PFN_UP(end_off);

        /*
         * Reserve the area now:
         */
        if (__reserve(bdata, PFN_DOWN(start_off) + merge,
                PFN_UP(end_off), BOOTMEM_EXCLUSIVE))         //将分配后的页框设置为保留
            BUG();

        region = phys_to_virt(PFN_PHYS(bdata->node_min_pfn) +
                start_off);
        memset(region, 0, size);
        /*
         * The min_count is set to 0 so that bootmem allocated blocks
         * are never reported as leaks.
         */
        kmemleak_alloc(region, size, 0, 0);
        return region;
    }

    if (fallback) {
        sidx = align_idx(bdata, fallback - 1, step);
        fallback = 0;
        goto find_block;
    }

    return NULL;
}


参考文档:

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值