日期 | 内核版本 | 架构 | 作者 | GitHub | CSDN |
---|---|---|---|---|---|
2017-07-017 | Linux-4.12 | X86 | lwhuq | LinuxMemoryStudy | Linux内存管理 |
1 Introduction
在Linux内核早期启动阶段,在Linux的内存管理模块还没有初始化完成之前,内核也需要提供简化的内存管理模块来满足内存分配请求。早期的内核中负责初始化阶段的内存分配器称为引导内存分配器(bootmem分配器)。bootmem分配器基于最先适配(first-first)分配器的原理(这儿是很多系统的内存分配所使用的原理), 使用一个位图来管理页。最新的内核过渡到使用memblock,详见patch。
Memoryblock和bootmem这两种机制对提供的API是一致的,因此对用户是透明的。内核中可以通过编译选项CONFIG_NO_BOOTMEM来选择使用哪一种机制,定义在mm/Makefile#L46
ifdef CONFIG_NO_BOOTMEM
obj-y += nobootmem.o
else
obj-y += bootmem.o
endif
2 Data structure
Memoryblock的所有数据结构定义在include/linux/memblock.h。
第一个数据结构的名字是memblock,定义在include/linux/memblock.h#L48
struct memblock {
bool bottom_up; /* is bottom up direction? 如果true,从下往上分配内存 */
phys_addr_t current_limit; /* memory block的大小限制 */
/* 三种不同内存类型:内存,预留,物理 */
struct memblock_type memory;
struct memblock_type reserved;
#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
struct memblock_type physmem;
#endif
};
第二个数据结构是memblock_type,定义在
include/linux/memblock.h#L40struct memblock_type {
unsigned long cnt; /* number of regions 内存区域的数目*/
unsigned long max; /* size of the allocated array 已经分配的内存区域大小*/
phys_addr_t total_size; /* size of all regions 所有内存区域的大小*/
struct memblock_region *regions; /* 指针指向memblock_region结构体 */
char *name; /* 名字 */
};
memblock_region结构用于描述memory region,定义在include/linux/memblock.h#L31
struct memblock_region {
phys_addr_t base;
phys_addr_t size;
unsigned long flags;
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
int nid;
#endif
};
memoryblock_region记录了当前memory region的起始地址,大小,标志和Node ID。
标志的定义在
include/linux/memblock.h#L24
/* Definition of memblock flags. */
enum {
MEMBLOCK_NONE = 0x0, /* No special request */
MEMBLOCK_HOTPLUG = 0x1, /* hotpluggable region */
MEMBLOCK_MIRROR = 0x2, /* mirrored region */
MEMBLOCK_NOMAP = 0x4, /* don't add to kernel direct mapping */
};
3 Memblock 初始化
Memblock结构的实例是一个同名全局静态变量,定义在mm/memblock.c#L34
static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock;
static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock;
#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
static struct memblock_region memblock_physmem_init_regions[INIT_PHYSMEM_REGIONS] __initdata_memblock;
#endif
struct memblock memblock __initdata_memblock = {
.memory.regions = memblock_memory_init_regions,
.memory.cnt = 1, /* empty dummy entry */
.memory.max = INIT_MEMBLOCK_REGIONS,
.memory.name = "memory",
.reserved.regions = memblock_reserved_init_regions,
.reserved.cnt = 1, /* empty dummy entry */
.reserved.max = INIT_MEMBLOCK_REGIONS,
.reserved.name = "reserved",
#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
.physmem.regions = memblock_physmem_init_regions,
.physmem.cnt = 1, /* empty dummy entry */
.physmem.max = INIT_PHYSMEM_REGIONS,
.physmem.name = "physmem",
#endif
.bottom_up = false,
.current_limit = MEMBLOCK_ALLOC_ANYWHERE,
};
- 宏__initdata_memblock指定了结构储存位置,如果定义了CONFIG_ARCH_DISCARD_MEMBLOCK,则存放在__meminitdata
- 每中memory type的cnt字段都初始化为1
- 每种memory type的regions都指向全局静态数组。数组单元个数,memory和reserved初始化为INIT_MEMBLOCK_REGIONS,physical memory初始化为INIT_PHYSMEM_REGIONS。因此max字段也初始化同样的值
#define INIT_MEMBLOCK_REGIONS 128
#define INIT_PHYSMEM_REGIONS 4
- buttom_up被初始化为false,说明内存分配是从高到低
- current_limit被初始化为MEMBLOCK_ALLOC_ANYWHERE,可访问到最高地址空间。
#define MEMBLOCK_ALLOC_ANYWHERE (~(phys_addr_t)0)
4 Memblock APIs
4.1 Add
在bootmem.h中的相关APIs
4.1.1 memblock_add_range
/**
* memblock_add_range - add new memblock region
* @type: memblock type to add new region into
* @base: base address of the new region
* @size: size of the new region
* @nid: nid of the new region
* @flags: flags of the new region
*
* Add new memblock region [@base,@base+@size) into @type. The new region
* is allowed to overlap with existing ones - overlaps don't affect already
* existing regions. @type is guaranteed to be minimal (all neighbouring
* compatible regions are merged) after the addition.
*
* RETURNS:
* 0 on success, -errno on failure.
*/
int __init_memblock memblock_add_range(struct memblock_type *type,
phys_addr_t base, phys_addr_t size,
int nid, unsigned long flags)
{
bool insert = false;
phys_addr_t obase = base;
phys_addr_t end = base + memblock_cap_size(base, &size);
int idx, nr_new;
struct memblock_region *rgn;
if (!size)
return 0;
/* special case for empty array */
if (type->regions[0].size == 0) {
WARN_ON(type->cnt != 1 || type->total_size);
type->regions[0].base = base;
type->regions[0].size = size;
type->regions[0].flags = flags;
memblock_set_region_node(&type->regions[0], nid);
type->total_size = size;
return 0;
}
repeat:
/*
* The following is executed twice. Once with %false @insert and
* then with %true. The first counts the number of regions needed
* to accommodate the new area. The second actually inserts them.
*/
base = obase;
nr_new = 0;
for_each_memblock_type(type, rgn) {
phys_addr_t rbase = rgn->base;
phys_addr_t rend = rbase + rgn->size;
if (rbase >= end)
break;
if (rend <= base)
continue;
/*
* @rgn overlaps. If it separates the lower part of new
* area, insert that portion.
*/
if (rbase > base) {
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
WARN_ON(nid != memblock_get_region_node(rgn));
#endif
WARN_ON(flags != rgn->flags);
nr_new++;
if (insert)
memblock_insert_region(type, idx++, base,
rbase - base, nid,
flags);
}
/* area below @rend is dealt with, forget about it */
base = min(rend, end);
}
/* insert the remaining portion */
if (base < end) {
nr_new++;
if (insert)
memblock_insert_region(type, idx, base, end - base,
nid, flags);
}
if (!nr_new)
return 0;
/*
* If this was the first round, resize array and repeat for actual
* insertions; otherwise, merge and return.
*/
if (!insert) {
while (type->cnt + nr_new > type->max)
if (memblock_double_array(type, obase, size) < 0)
return -ENOMEM;
insert = true;
goto repeat;
} else {
memblock_merge_regions(type);
return 0;
}
}
- 第一次循环检查是否有region的overlap。并且检查memory type存放的memory region实例个数type->max是否足够容纳新增的region。不够的话就调用memblock_double_array扩容。如果有需要添加的region就设置insert = true。最后goto到repeat执行第二次循环
- 第二次循环中,执行insert == true代码块,调用memblock_insert_region插入region,最后调用memblock_merge_regions合并相邻region。
- idx没有初始化,从默认值0开始?
4.2 Free and remove
4.3 Allocate
- memory allocate就是把内存范围添加到memory reserved region
5 memblock初始化
X86_64结构内核从E820和EFI memmap得到boot内存信息,随后根据boot内存信息建立memory block结构。具体实现在setup_arch函数,定义在arch/x86/kernel/setup.c#L848
void __init setup_arch(char **cmdline_p)
{
memblock_reserve(__pa_symbol(_text),
(unsigned long)__bss_stop - (unsigned long)_text);
#ifdef CONFIG_EFI
if (efi_enabled(EFI_BOOT))
efi_memblock_x86_reserve_range();
#endif
#ifdef CONFIG_MEMORY_HOTPLUG
/*
* Memory used by the kernel cannot be hot-removed because Linux
* cannot migrate the kernel pages. When memory hotplug is
* enabled, we should prevent memblock from allocating memory
* for the kernel.
*
* ACPI SRAT records all hotpluggable memory ranges. But before
* SRAT is parsed, we don't know about it.
*
* The kernel image is loaded into memory at very early time. We
* cannot prevent this anyway. So on NUMA system, we set any
* node the kernel resides in as un-hotpluggable.
*
* Since on modern servers, one node could have double-digit
* gigabytes memory, we can assume the memory around the kernel
* image is also un-hotpluggable. So before SRAT is parsed, just
* allocate memory near the kernel image to try the best to keep
* the kernel away from hotpluggable memory.
*/
if (movable_node_is_enabled())
memblock_set_bottom_up(true);
#endif
/* after early param, so could get panic from serial */
memblock_x86_reserve_range_setup_data();
/*
* Need to conclude brk, before e820__memblock_setup()
* it could use memblock_find_in_range, could overlap with
* brk area.
*/
reserve_brk();
cleanup_highmap();
memblock_set_current_limit(ISA_END_ADDRESS);
e820__memblock_setup();
}
- 最后的e820_memblock_setup()真正完成memory block的添加初始化工作。在此之前的函数都只是调用memblock_reserve从reserve内存申请
6 Reference