MM之物理内存的管理
在系统启动早期是没有buddy system可用的,因此开发者常用的kmalloc()、get_free_pages()在启动早期是无效的;系统启动早期阶段也肯定要使用物理内存和虚拟内存,内核是怎样设计与实现的呢?
linux系统启动固件一般包含:内核镜像、根文件系统镜像;uboot在加载引导启动固件至对应内存地址后,固件一般会做重定向,但仍然会占用一段内存空间;系统启动或模块功能开发中不能使用此段内存地址,否则系统必然崩溃;内核又是如何对固件镜像内存地址进行保护的呢?
在设备树中往往会配置一些保留内存:reserved-memory节点,内核又是如何对其进行标记保护的呢?
让我们带着问题进入内核源码世界探索学习。
涉及到的数据结构:
enum memblock_flags {
MEMBLOCK_NONE = 0x0, /* No special request */
MEMBLOCK_HOTPLUG = 0x1, /* hotpluggable region */
MEMBLOCK_MIRROR = 0x2, /* mirrored region */
MEMBLOCK_NOMAP = 0x4, /* don't add to kernel direct mapping */
};
struct memblock_region {
phys_addr_t base; //base: physical address of the region
phys_addr_t size; //size of the region
enum memblock_flags flags; //memory region attributes
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
int nid; //NUMA node id
#endif
};
struct memblock_type {
unsigned long cnt; //number of regions
unsigned long max; //size of the allocated array
phys_addr_t total_size; //size of all regions
struct memblock_region *regions; //array of regions
char *name; //the memory type symbolic name
};
struct memblock {
bool bottom_up; //is bottom up direction?自低地址向高地址还是自高地址向低地址分配
phys_addr_t current_limit; //physical address of the current allocation limit
struct memblock_type memory; //usabe memory regions
struct memblock_type reserved; //reserved memory regions
#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
struct memblock_type physmem; //all physical memory
#endif
};
组织形式如下:
{1、何时解析设备树中memory信息并将其添加到memblock中的呢?}
流程入口:
start_kernel
|--->setup_arch
|--->|--->setup_machine_fdt(__fdt_pointer)
|--->|--->|--->early_init_dt_scan(dt_virt)
|--->|--->|--->|--->early_init_dt_scan_nodes()
|--->|--->|--->|--->|--->early_init_dt_scan_memory //通过memory节点获取其对应的reg参数
|--->|--->|--->|--->|--->|--->early_init_dt_add_memory_arch(base, size)
|--->|--->|--->|--->|--->|--->|--->memblock_add(base, size)
|--->|--->|--->|--->|--->|--->|--->|--->memblock_add_range(&memblock.memory, base, size, MAX_NUMNODES, 0)
{2、如何对内核镜像段、根文件系统段、设备树所用地址的保护预留}
[2.1、设备树镜像地址reserved]
流程入口:
start_kernel
|--->setup_arch
|--->|--->setup_machine_fdt(__fdt_pointer)
|--->|--->|--->fixmap_remap_fdt(dt_phys)
|--->|--->|--->|--->memblock_reserve(dt_phys, size)
|--->|--->|--->|--->|--->memblock_add_range(&memblock.reserved, base, size, MAX_NUMNODES, 0)
[2.2、内核镜像段、根文件系统镜像段reserved]
流程入口:
start_kernel
|--->setup_arch
|--->|--->arm64_memblock_init
void __init arm64_memblock_init(void)
{
const s64 linear_region_size = -(s64)PAGE_OFFSET;
/* Remove memory above our supported physical address size */
memblock_remove(1ULL << PHYS_MASK_SHIFT, ULLONG_MAX); /*CONFIG_ARM64_PA_BITS=48*/
memstart_addr = round_down(memblock_start_of_DRAM(),ARM64_MEMSTART_ALIGN);
/*
* Remove the memory that we will not be able to cover with the
* linear mapping. Take care not to clip the kernel which may be
* high in memory.
*/
memblock_remove(max_t(u64, memstart_addr + linear_region_size,
__pa_symbol(_end)), ULLONG_MAX);
/*
* Apply the memory limit if it was set. Since the kernel may be loaded
* high up in memory, add back the kernel region that must be accessible
* via the linear mapping.
*/
if (memory_limit != PHYS_ADDR_MAX)
{
memblock_mem_limit_remove_map(memory_limit);
memblock_add(__pa_symbol(_text), (u64)(_end - _text));
}
if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && initrd_start)
{
/*
* Add back the memory we just removed if it results in the
* initrd to become inaccessible via the linear mapping.
* Otherwise, this is a no-op
*/
u64 base = initrd_start & PAGE_MASK;
u64 size = PAGE_ALIGN(initrd_end) - base;
memblock_remove(base, size); /* clear MEMBLOCK_ flags */
memblock_add(base, size);
memblock_reserve(base, size); /*根文件系统地址段的reserved*/
}
/*
* Register the kernel text, kernel data, initrd, and initial
* pagetables with memblock.
*/
memblock_reserve(__pa_symbol(_text), _end - _text); /*内核code段的reserved*/
if (initrd_start) {
memblock_reserve(initrd_start, initrd_end - initrd_start);
/* the generic initrd code expects virtual addresses */
initrd_start = __phys_to_virt(initrd_start);
initrd_end = __phys_to_virt(initrd_end);
}
early_init_fdt_scan_reserved_mem(); /* deal FDT node for reserved memory*/
}
memory_limit怎么来的呢?
static int __init early_mem(char *p)
{
if (!p)
return 1;
memory_limit = memparse(p, &p) & PAGE_MASK;
pr_notice("Memory limited to %lldMB\n", memory_limit >> 20);
return 0;
}
early_param("mem", early_mem);
[2.3、设备树dts中reserved-memory节点地址的reserve]
int __init __weak early_init_dt_reserve_memory_arch(phys_addr_t base,
phys_addr_t size, bool nomap)
{
if (nomap)
return memblock_remove(base, size); /*若存在nomap标记,则此段内存将被remove处理*,OS中是不可见的/
return memblock_reserve(base, size); /*没有nomap标记,仅是做reserve处理,OS中可以看到*/
}
remove和reserve的区别体现在哪里呢?
memblock_remove
|--->memblock_remove_range(&memblock.memory, base, size)
|--->|--->memblock_isolate_range(type, base, size, &start_rgn, &end_rgn)
//其是将memory节点上一个连续地址段以base为切入点,size为长度,将其切分
memblock_reserve
|--->memblock_add_range(&memblock.reserved, base, size, MAX_NUMNODES, 0)
//其是将地址为base长度为size的地址段,添加到reserved节点上
memblock_add
|--->memblock_add_range(&memblock.memory, base, size, MAX_NUMNODES, 0)
//其是将地址为base长度为size的地址段,添加到memory节点上
从哪里可以看到系统启动后物理内存的地址段情况呢?
mount -t debugfs debug /sys/kernel/debug //---挂载debug文件系统
cat /sys/kernel/debug/memblock/memory
---此指令获取的信息为OS内存空间被设备树reserved-memory中标记有no-map字段的节点分割后的地址分布
cat /sys/kernel/debug/memblock/reserved
gfs debug /sys/kernel/debug //---挂载debug文件系统
cat /sys/kernel/debug/memblock/memory
---此指令获取的信息为OS内存空间被设备树reserved-memory中标记有no-map字段的节点分割后的地址分布
cat /sys/kernel/debug/memblock/reserved
---已分配出去或提前预留的内存
内核启动早期通过memblock_alloc接口获取可用的物理内存,具体接口实现此处不再详述;
接口调用流程整理如下:
memblock_alloc(PAGE_SIZE, PAGE_SIZE)
|--->memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE)
|--->|--->__memblock_alloc_base(size, align, max_addr)
|--->|--->|--->memblock_alloc_base_nid(size, align, max_addr, NUMA_NO_NODE,MEMBLOCK_NONE)
|--->|--->|--->|--->memblock_alloc_range_nid(size, align, 0, max_addr, nid, flags)
|--->|--->|--->|--->|--->memblock_find_in_range_node(size, align, start, end, nid,flags)
|--->|--->|--->|--->|--->|--->__memblock_find_range_top_down(start, end, size, align, nid,flags)
|--->|--->|--->|--->|--->找到后memblock_reserve(found, size)