物理内存探测和memblock分配器初始化和分配

linux内存管理,总的来说,分为3个阶段,分别是:

  •  memblock/bootmem分配器初始化
  • Buddy 系统初始化
  • Slab/slub分配器初始化

 

为什么要分为3个阶段呢?由于系统在初始化最初阶段,buddy系统和slab分配器都还未初始化完成,这时候内核分配内存,需要一种方式来分配物理内存,linux系统最初提供bootmem分配器分配内存,后续提供了memblock分配器用来代替bootmem分配器,不过它们使用的系统接口API是相同的。

 

为了分配1个或者多个page,linux如何分配呢?linux系统为分配2^order个pages提供了buddy系统分配器,其核心思想是内核将内存分成多个zone,每个zone维护了个链表数组frea_area[MAX_ORDER], 将内存按照1,2,4,8,…,2^max_order-1个page,分别挂在对应的链表上。用户分配内存时,先从对应的链表上分配,如果没有空闲内存块,则从比它大一级的链表中,取一个内存块,并将其拆分为2块,他们彼此称之为伙伴,其中一个分配给用户,另一个链入对应的空闲链表中,以此类推。

 

Buddy只能分配2^order个pages,为了分配任意byte的内存,避免造成内存的浪费,linux提供了slab分配器和slub分配器。slab分配器将page划分为对象大小的内存池,当需要一个对象时,就从中分配出一个对象,当不需要时,就把该对象的内存归还给内存池。

 

下面我们一步步来分析从系统探测出内存大小,到一步步初始化内存分配器的过程。

 

物理内存探测

在x86平台下,物理内存探测是通过bios INT15中断,返回可使用内存的基地址、长度、type等信息。

探测内存函数在arch/x86/boot/memory.c:

int detect_memory(void)

{

int err = -1;

if (detect_memory_e820() > 0)

err = 0;

if (!detect_memory_e801())

err = 0;

if (!detect_memory_88())

err = 0;

return err;

}

 

主要探测函数在detect_memory_e820()中,通过循环遍历int15中断,将获取内存segment信息保存在boot_params.e820.map中。

static int detect_memory_e820(void)

{

int count = 0;

struct biosregs ireg, oreg;

struct e820entry *desc = boot_params.e820_map;

static struct e820entry buf; /* static so it is zeroed */

 

initregs(&ireg);

ireg.ax  = 0xe820;

ireg.cx  = sizeof buf;

ireg.edx = SMAP;

ireg.di  = (size_t)&buf;

 

/*

 * Note: at least one BIOS is known which assumes that the

 * buffer pointed to by one e820 call is the same one as

 * the previous call, and only changes modified fields.  Therefore,

 * we use a temporary buffer and copy the results entry by entry.

 *

 * This routine deliberately does not try to account for

 * ACPI 3+ extended attributes.  This is because there are

 * BIOSes in the field which report zero for the valid bit for

 * all ranges, and we don't currently make any use of the

 * other attribute bits.  Revisit this if we see the extended

 * attribute bits deployed in a meaningful way in the future.

 */

 

do {

intcall(0x15, &ireg, &oreg);

ireg.ebx = oreg.ebx; /* for next iteration... */

 

/* BIOSes which terminate the chain with CF = 1 as opposed

   to %ebx = 0 don't always report the SMAP signature on

   the final, failing, probe. */

if (oreg.eflags & X86_EFLAGS_CF)

break;

 

/* Some BIOSes stop returning SMAP in the middle of

   the search loop.  We don't know exactly how the BIOS

   screwed up the map at that point, we might have a

   partial map, the full map, or complete garbage, so

   just return failure. */

if (oreg.eax != SMAP) {

count = 0;

break;

}

 

*desc++ = buf;

count++;

} while (ireg.ebx && count < ARRAY_SIZE(boot_params.e820_map));

 

return boot_params.e820_entries = count;

}

 

这里介绍几个数据结构。

#define E820MAX        128                /* number of entries in E820MAP */

 

struct e820entry {

__u64 addr;        /* start of memory segment */

__u64 size;        /* size of memory segment */

__u32 type;        /* type of memory segment */

} __attribute__((packed));

 

Struct boot_params{

 

struct e820entry e820_map[E820MAX];                /* 0x2d0 */

 

}

 

这里假设已经得到了可用内存地址段,保存在boot_params.e820_map[]数组中。具体如何从bios得到内存地址空间,在另外一篇文章中单独讨论。

 

detect_memory调用栈及后续memblock初始化调用栈如下:

[Arch/x86/boot/header.S: call main]->Main()->detect_memory()->go_to_protected_mode()->[arch/x86/boot/pmjump.S:protected_mode_jump:code32_start]->[arch/x86/boot/header.S:code32_start: 0x100000]->…->

[arch/x86/kernel/head_64.S:startup_64:x86_64_start_kernel()]->x86_64_start_kernel()->x86_64_start_reservations()start_kernel()->arch_setup()

整个初始化从汇编到C程序跳转过程,后续会单独在x86系统在初始化从实模式到保护模式跳转过程。

bootloader加载内核到内存,也会单独介绍。

 

回到主题,接下来介绍几个与内存管理相关的关键函数分析。

在start_kernel()->setup_arch()中,

Void _init setup_arch(char **cmdline_p)

{

Setup_memory_map();

Max_pfn = e820_end_of_ram_pfn();//找到最大的页框编号

Find_low_pfn_range();//设定高低内存分界线

}

 

在setup_memory_map()中,首先看一下几个初始化,

struct x86_init_ops x86_init __initdata = {

 

.resources = {

.probe_roms                = probe_roms,

.reserve_resources        = reserve_standard_io_resources,

.memory_setup                = default_machine_specific_memory_setup,

},

}

 

void __init setup_memory_map(void)

{

char *who;

 

who = x86_init.resources.memory_setup();//实际上调用为x86_init.resourses.default_machine_special_memory_setup.

memcpy(&e820_saved, &e820, sizeof(struct e820map));//将e820结构保存一份

printk(KERN_INFO "e820: BIOS-provided physical RAM map:\n");

e820_print_map(who);

}

 

char *__init default_machine_specific_memory_setup(void)

{

char *who = "BIOS-e820";

u32 new_nr;

/*

 * Try to copy the BIOS-supplied E820-map.

 *

 * Otherwise fake a memory map; one section from 0k->640k,

 * the next section from 1mb->appropriate_mem_k

 */

new_nr = boot_params.e820_entries;

//消除重叠内存块

sanitize_e820_map(boot_params.e820_map,

ARRAY_SIZE(boot_params.e820_map),

&new_nr);

boot_params.e820_entries = new_nr;

//将内存布局信息从boot_params.e820_map拷贝到全局e820结构中

if (append_e820_map(boot_params.e820_map, boot_params.e820_entries)

  < 0) {

u64 mem_size;

 

/* compare results from other methods and take the greater */

if (boot_params.alt_mem_k

    < boot_params.screen_info.ext_mem_k) {

mem_size = boot_params.screen_info.ext_mem_k;

who = "BIOS-88";

} else {

mem_size = boot_params.alt_mem_k;

who = "BIOS-e801";

}

 

e820.nr_map = 0;

e820_add_region(0, LOWMEMSIZE(), E820_RAM);

e820_add_region(HIGH_MEMORY, mem_size << 10, E820_RAM);

}

 

/* In case someone cares... */

return who;

}

可见,该函数主要完成2个功能,一是消除内存段重叠部分,二是将内存布局信息从boot_params.e820_map中拷贝到e820中。最终调用e820_add_region()将内存信息保存到e820的map数组中。

static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size,

 int type)

{

int x = e820x->nr_map;

 

if (x >= ARRAY_SIZE(e820x->map)) {

printk(KERN_ERR "e820: too many entries; ignoring [mem %#010llx-%#010llx]\n",

       (unsigned long long) start,

       (unsigned long long) (start + size - 1));

return;

}

 

e820x->map[x].addr = start;

e820x->map[x].size = size;

e820x->map[x].type = type;

e820x->nr_map++;

}

 

下面再来看看e820_end_of_ram_pfn(),该函数用来遍历所有内存段的页框,找到低端内存的最大页框编号。

unsigned long __init e820_end_of_ram_pfn(void)

{

return e820_end_pfn(MAX_ARCH_PFN, E820_RAM);

}

 

该函数找到系统中最高的物理内存页框号。

static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type)

{

int i;

unsigned long last_pfn = 0;

unsigned long max_arch_pfn = MAX_ARCH_PFN;

 

for (i = 0; i < e820.nr_map; i++) {

struct e820entry *ei = &e820.map[i];

unsigned long start_pfn;

unsigned long end_pfn;

 

if (ei->type != type)

continue;

 

start_pfn = ei->addr >> PAGE_SHIFT;

end_pfn = (ei->addr + ei->size) >> PAGE_SHIFT;

 

if (start_pfn >= limit_pfn)

continue;

if (end_pfn > limit_pfn) {

last_pfn = limit_pfn;

break;

}

if (end_pfn > last_pfn)

last_pfn = end_pfn;

}

 

if (last_pfn > max_arch_pfn)

last_pfn = max_arch_pfn;

 

printk(KERN_INFO "e820: last_pfn = %#lx max_arch_pfn = %#lx\n",

 last_pfn, max_arch_pfn);

return last_pfn;

}

 

接下来,在回到setup_arch()中

Setup_arch()

{

max_pfn = e820_end_of_ram_pfn();

 

#ifdef CONFIG_X86_32

/* max_low_pfn get updated here */

find_low_pfn_range();

#else

check_x2apic();

 

/* How many end-of-memory variables you have, grandma! */

/* need this before calling reserve_initrd */

if (max_pfn > (1UL<<(32 - PAGE_SHIFT)))

max_low_pfn = e820_end_of_low_ram_pfn();//获取低端内存最大值,对于大于4G的内存,低端内存设置为4G,对于小于4G内存,低端内存最大值设置为max_pfn

else

max_low_pfn = max_pfn;

 

high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;//高端内存

#endif

}

因此,对于64位系统,低端内存和高端内存的分界线为4G;但当物理内存小于4G时,高端内存和低端内存就重合了,都是到max_pfn。

 

memblock初始化

物理内存探测完成,接下来就该初始化memblock分配器了。

主要函数为:

Void setup_arch()

{

memblock_set_current_limit(ISA_END_ADDRESS);//设置memblock.current_limit=1M

memblock_x86_fill();//主要初始化memblock函数

init_mem_mapping();

memblock_set_current_limit(get_max_mapped());

initmem_init();

 

 

}

 

Memblock debug:可以在grub参数中加入memblock=debug,会在dmesg中打印出memblock初始化相关debug信息。

 

在memblock_x86_fill中遍历e820中保存的内存布局信息,调用memblock_add加入到memblock结构中。

void __init memblock_x86_fill(void)

{

int i;

u64 end;

 

/*

 * EFI may have more than 128 entries

 * We are safe to enable resizing, beause memblock_x86_fill()

 * is rather later for x86

 */

memblock_allow_resize();

 

for (i = 0; i < e820.nr_map; i++) {

struct e820entry *ei = &e820.map[i];

 

end = ei->addr + ei->size;

if (end != (resource_size_t)end)

continue;

 

if (ei->type != E820_RAM && ei->type != E820_RESERVED_KERN)

continue;

 

memblock_add(ei->addr, ei->size);//

}

 

/* throw away partial pages */

memblock_trim_memory(PAGE_SIZE);

 

memblock_dump_all();

}

 

好了,下面重点介绍memblock,主要函数在mm/memblock.c中。

linux系统中有一个struct memblock memblock的实例,用来保存memblock信息。

struct memblock {

bool bottom_up;  /* is bottom up direction? */

phys_addr_t current_limit;//memblock限制,

struct memblock_type memory;

struct memblock_type reserved;

#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP

struct memblock_type physmem;

#endif

};

 

bottom_up: 用来标志memblock是从上往下分配还是从下往上分配,默认=false,即采取从上往下分配

current_limit:分配内存上限

memory:描述当前内存区域的内存区数目、总大小以及每个内存region

reserved:描述当前内存块已经分配的内存区数目、总大小以及每个内存region,在reserved中描述的地址范围,表示不可以再备memblock分配。

memory用来描述全部region,不区分分配和未分配,reserved用来描述memblock中已经分配的内存region。

 

struct memblock_type {

unsigned long cnt;        /* number of regions */

unsigned long max;        /* size of the allocated array */

phys_addr_t total_size;        /* size of all regions */

struct memblock_region *regions;

};

cnt:regions数目

max:最大regions数目

total_size:regions总大小

regions: regions array,记录每个regions的信息

 

struct memblock_region {

phys_addr_t base;

phys_addr_t size;

unsigned long flags;

#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP

int nid;

#endif

};

base:region基地址

size:region大小

flags:region 标志

 

 

memblock初始化

static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock;

static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock;

#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP

static struct memblock_region memblock_physmem_init_regions[INIT_PHYSMEM_REGIONS] __initdata_memblock;

#endif

 

 

struct memblock memblock __initdata_memblock = {

.memory.regions                = memblock_memory_init_regions,

.memory.cnt                = 1,        /* empty dummy entry */

.memory.max                = INIT_MEMBLOCK_REGIONS,

 

.reserved.regions        = memblock_reserved_init_regions,

.reserved.cnt                = 1,        /* empty dummy entry */

.reserved.max                = INIT_MEMBLOCK_REGIONS,

 

#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP

.physmem.regions        = memblock_physmem_init_regions,

.physmem.cnt                = 1,        /* empty dummy entry */

.physmem.max                = INIT_PHYSMEM_REGIONS,

#endif

 

.bottom_up                = false,

.current_limit                = MEMBLOCK_ALLOC_ANYWHERE,

};

.memory.regions 和.reserved.regions初始化为固定数组,最多支持128个regions;

.current_limit初始化最大为可能物理地址。

 

调用memblock_add将可用内存区间加入到memblock中。

int __init_memblock memblock_add(phys_addr_t base, phys_addr_t size)

{

return memblock_add_range(&memblock.memory, base, size,

   MAX_NUMNODES, 0);

}

 

/*

添加一个memblock region

@type: memblock type添加新的region

@base:新region的base address

@size:新region的size

@nid:新region的nid

@flags:新region的flags

*/

int __init_memblock memblock_add_range(struct memblock_type *type,

phys_addr_t base, phys_addr_t size,

int nid, unsigned long flags)

{

bool insert = false;

phys_addr_t obase = base;

phys_addr_t end = base + memblock_cap_size(base, &size);//调整size,不被溢出

int i, nr_new;

 

if (!size)

return 0;

 

/* special case for empty array */

if (type->regions[0].size == 0) {//初次添加走这里

WARN_ON(type->cnt != 1 || type->total_size);

type->regions[0].base = base;

type->regions[0].size = size;

type->regions[0].flags = flags;

memblock_set_region_node(&type->regions[0], nid);

type->total_size = size;

return 0;

}

repeat:

/*

 * The following is executed twice.  Once with %false @insert and

 * then with %true.  The first counts the number of regions needed

 * to accomodate the new area.  The second actually inserts them.

 */

 //后续添加过程执行2次,第一次insert=false,并不真正insert,只计算出需要添加的area的个数;第二次真正insert到regions中。

base = obase;

nr_new = 0;

 

for (i = 0; i < type->cnt; i++) {

struct memblock_region *rgn = &type->regions[i];

phys_addr_t rbase = rgn->base;

phys_addr_t rend = rbase + rgn->size;

 

if (rbase >= end)

break;

if (rend <= base)

continue;

/*

 * @rgn overlaps.  If it separates the lower part of new

 * area, insert that portion.

 */

if (rbase > base) {

nr_new++;

if (insert)

memblock_insert_region(type, i++, base,

       rbase - base, nid,

       flags);

}

/* area below @rend is dealt with, forget about it */

base = min(rend, end);

}

 

/* insert the remaining portion */

if (base < end) {

nr_new++;

if (insert)

memblock_insert_region(type, i, base, end - base,

       nid, flags);

}

 

/*

 * If this was the first round, resize array and repeat for actual

 * insertions; otherwise, merge and return.

 */

 //如果第一次走到这里,resize array并goto repeat

if (!insert) {

while (type->cnt + nr_new > type->max)

if (memblock_double_array(type, obase, size) < 0)

return -ENOMEM;

insert = true;

goto repeat;

} else {

memblock_merge_regions(type);//第二次时合并重叠区域

return 0;

}

}

走到这里,系统已经将e820中保存的内存加入到memblock中。

 

64位模式,采用的直接映射,即  内核虚拟地址=物理地址,PAGE_OFFSET=0;
 

 

memblock分配API

void * __init memblock_virt_alloc_try_nid_nopanic(

phys_addr_t size, phys_addr_t align,

phys_addr_t min_addr, phys_addr_t max_addr,

int nid)

{

memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=0x%llx max_addr=0x%llx %pF\n",

     __func__, (u64)size, (u64)align, nid, (u64)min_addr,

     (u64)max_addr, (void *)_RET_IP_);

return memblock_virt_alloc_internal(size, align, min_addr,

     max_addr, nid);

}

 

下面我们深入到memblock_virt_alloc_try_nid_nopanic函数中,探究一下OS初期是如何用memblock来分配内存的?

首先解析一下memblock_virt_alloc_try_nid_nopanic入参:

Size: 分配内存大小

Align: 分配内存对其byte

Min_addr: 最小的分配地址,就是说分配的最小地址不能低于min_addr

Max_addr:最大的分配地址,就是说分配的最大地址不能大于max_addr

Nid:分配内存所属node

 

下面看分配流程,首先看是否有slab分配器,如果没有,再从memblock分配器分配,优先从本地nid的内存上分配,如果分配不到,再从其他node的内存上分配,从node上分配内存函数为memblock_find_in_range_node。

static void * __init memblock_virt_alloc_internal(

phys_addr_t size, phys_addr_t align,

phys_addr_t min_addr, phys_addr_t max_addr,

int nid)

{

phys_addr_t alloc;

void *ptr;

 

if (WARN_ONCE(nid == MAX_NUMNODES, "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n"))

nid = NUMA_NO_NODE;

 

/*

 * Detect any accidental use of these APIs after slab is ready, as at

 * this moment memblock may be deinitialized already and its

 * internal data may be destroyed (after execution of free_all_bootmem)

 */

if (WARN_ON_ONCE(slab_is_available()))//首先判断slab分配器是否可用,如果可用,直接从slab分配器分配

return kzalloc_node(size, GFP_NOWAIT, nid);

 

if (!align)

align = SMP_CACHE_BYTES;//在没有指明align时,以L1cache line对齐

 

if (max_addr > memblock.current_limit)

max_addr = memblock.current_limit;//memblock设置current_limit时,max_addr不能超过设定的上限

 

again:

alloc = memblock_find_in_range_node(size, align, min_addr, max_addr,

    nid);//从memblock中找到一段未分配的region

if (alloc)//如果找到了可用地址,goto done,否则从其他nid上分配

goto done;

 

if (nid != NUMA_NO_NODE) {//从其他node上分配

alloc = memblock_find_in_range_node(size, align, min_addr,

    max_addr,  NUMA_NO_NODE);

if (alloc)

goto done;

}

 

if (min_addr) {

min_addr = 0;

goto again;

} else {

goto error;

}

 

done:

memblock_reserve(alloc, size);//将分配的区域保存在reserved regions

ptr = phys_to_virt(alloc);//在64位系统上,kernel 虚拟地址直接等于物理地址

memset(ptr, 0, size);//将分配的内存初始化为0

 

/*

 * The min_count is set to 0 so that bootmem allocated blocks

 * are never reported as leaks. This is because many of these blocks

 * are only referred via the physical address which is not

 * looked up by kmemleak.

 */

kmemleak_alloc(ptr, size, 0, 0);

 

return ptr;

 

error:

return NULL;

}

 

下面看如何从node上分配内存的?

phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size,

phys_addr_t align, phys_addr_t start,

phys_addr_t end, int nid)

{

phys_addr_t kernel_end, ret;

 

/* pump up @end */

if (end == MEMBLOCK_ALLOC_ACCESSIBLE)

end = memblock.current_limit;

 

/* avoid allocating the first page */

start = max_t(phys_addr_t, start, PAGE_SIZE);

end = max(start, end);

kernel_end = __pa_symbol(_end);// _end是kernel_image end

 

/*

 * try bottom-up allocation only when bottom-up mode

 * is set and @end is above the kernel image.

 */

if (memblock_bottom_up() && end > kernel_end) {//x86采用的topdown 分配,不走这段

phys_addr_t bottom_up_start;

 

/* make sure we will allocate above the kernel */

bottom_up_start = max(start, kernel_end);//保证分配的起始位置在kernel image之上

 

/* ok, try bottom-up allocation first */

ret = __memblock_find_range_bottom_up(bottom_up_start, end,

      size, align, nid);

if (ret)

return ret;

 

/*

 * we always limit bottom-up allocation above the kernel,

 * but top-down allocation doesn't have the limit, so

 * retrying top-down allocation may succeed when bottom-up

 * allocation failed.

 *

 * bottom-up allocation is expected to be fail very rarely,

 * so we use WARN_ONCE() here to see the stack trace if

 * fail happens.

 */

WARN_ONCE(1, "memblock: bottom-up allocation failed, "

     "memory hotunplug may be affected\n");

}

 

return __memblock_find_range_top_down(start, end, size, align, nid);

}

 

 

X86采用的topdown分配方式,具体分配过程如下:

static phys_addr_t __init_memblock

__memblock_find_range_top_down(phys_addr_t start, phys_addr_t end,

       phys_addr_t size, phys_addr_t align, int nid)

{

phys_addr_t this_start, this_end, cand;

u64 i;

 

/*

memblock分配器核心分配函数过程,所谓分配就是从已经记录的memblock的memory regions中找到一段内存区域,

该内存区域不在memblock.reserverd的regions中记录,找到的内存区域起始地址保存在this_start,结束地址保存在this_end中

*/

for_each_free_mem_range_reverse(i, nid, &this_start, &this_end, NULL) {

this_start = clamp(this_start, start, end);//this_start,this_end要在start和end中间

this_end = clamp(this_end, start, end);

 

if (this_end < size)//

continue;

 

cand = round_down(this_end - size, align);

if (cand >= this_start)//找到的内存区域要大于size,即align(this_end-size)后大于this_start的才满足分配要求

return cand;

}

 

return 0;

}

 

那重点来了,找内存region是由for_each_free_mem_range_reverse宏来实现的,该宏遍历所有的memblock.memory中的region,从中找到不在memblock.reserved中记录的区域,(注:memblock.reserved中保存的是已经分配出去的区域)

#define for_each_free_mem_range_reverse(i, nid, p_start, p_end, p_nid)        \

for_each_mem_range_rev(i, &memblock.memory, &memblock.reserved,        \

       nid, p_start, p_end, p_nid)

 

for_each_mem_range_rev定义,其中p_start,p_end,p_nid保存找到区域的起始地址、结束地址和所属的nid:

#define for_each_mem_range_rev(i, type_a, type_b, nid,                        \

       p_start, p_end, p_nid)                        \

for (i = (u64)ULLONG_MAX,                                        \

     __next_mem_range_rev(&i, nid, type_a, type_b,        \

 p_start, p_end, p_nid);        \

     i != (u64)ULLONG_MAX;                                        \

     __next_mem_range_rev(&i, nid, type_a, type_b,                \

  p_start, p_end, p_nid))

 

遍历memblock region核心函数如下:

void __init_memblock __next_mem_range_rev(u64 *idx, int nid,

  struct memblock_type *type_a,

  struct memblock_type *type_b,

  phys_addr_t *out_start,

  phys_addr_t *out_end, int *out_nid)

{

int idx_a = *idx & 0xffffffff;//idx_a idx_b分别保存在idx的低32位和高32位

int idx_b = *idx >> 32;

 

if (WARN_ONCE(nid == MAX_NUMNODES, "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n"))

nid = NUMA_NO_NODE;

 

if (*idx == (u64)ULLONG_MAX) {//初始遍历mem_range,idx赋值为ULONG_MAX

idx_a = type_a->cnt - 1;   //获得type_a和type_b的index

idx_b = type_b->cnt;

}

 

for (; idx_a >= 0; idx_a--) {//遍历type_a中的memblock_type,找出不在type_b中的空间

struct memblock_region *m = &type_a->regions[idx_a];

 

phys_addr_t m_start = m->base;

phys_addr_t m_end = m->base + m->size;

int m_nid = memblock_get_region_node(m);

 

/* only memory regions are associated with nodes, check it */

if (nid != NUMA_NO_NODE && nid != m_nid)

continue;

 

/* skip hotpluggable memory regions if needed */

if (movable_node_is_enabled() && memblock_is_hotpluggable(m))

continue;

 

if (!type_b) {//如果没有定义type_b,则直接返回找到的type_a的start和end

if (out_start)

*out_start = m_start;

if (out_end)

*out_end = m_end;

if (out_nid)

*out_nid = m_nid;

idx_a++;

*idx = (u32)idx_a | (u64)idx_b << 32;

return;

}

 

/* scan areas before each reservation */

for (; idx_b >= 0; idx_b--) {//找reservation之前的区域

struct memblock_region *r;

phys_addr_t r_start;

phys_addr_t r_end;

 

r = &type_b->regions[idx_b];

r_start = idx_b ? r[-1].base + r[-1].size : 0;//找到不在type_b中的区域,这段区域与type_a中重叠的区域可以拿出来分配

r_end = idx_b < type_b->cnt ?

r->base : ULLONG_MAX;

/*

 * if idx_b advanced past idx_a,

 * break out to advance idx_a

 */

 

if (r_end <= m_start)//如果找到的type_a起始地址m_start大于r_end,那说明type_a 区域都在type_b之上,应break出来,遍历下一个type_a

break;

/* if the two regions intersect, we're done */

if (m_end > r_start) {//如果两个区域有重叠,则取出重叠区域

if (out_start)

*out_start = max(m_start, r_start);

if (out_end)

*out_end = min(m_end, r_end);

if (out_nid)

*out_nid = m_nid;

if (m_start >= r_start)

idx_a--;

else

idx_b--;

*idx = (u32)idx_a | (u64)idx_b << 32;

return;

}

}

}

/* signal end of iteration */

*idx = ULLONG_MAX;

}

 

 

经历了以上for_each_free_mem_range_reverse函数执行,我们就得到了一段free memblock region,[p_start, p_end, nid],返回该region的起始地址,即memblock分配的地址,回到memblock_virt_alloc_try_nid_nopanic 分配函数,即返回分配的地址。

 

 

 

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值