linux内存管理(十五)解析设备树中的内存信息过程分析

解析设备树中的内存信息过程分析

有个疑问,引导内存分配器怎么知道内存的大小和物理地址范围?
回答:ARM64 架构使用扁平设备树(Flattened Device Tree,FDT)描述板卡的硬件信息,FDT是写在设备树中的。设备启动时,引导程序把设备树二进制文件从存储设备读到内存中,引导内核的时候把设备树二进制文件的起始地址传给内核,内核解析设备树二进制文件后得到硬件信息。举个例子:

	memory@00{
		device_type = "memory";
		reg = <0x0 0x80000000 0x2 0x00000000>;
	};

而预留内存在设备树中有两种写法:
第一种:

/memreserve/ 0x80000000 0x10000;

第二种:

	reserved-memory {
		#address-cells = <0x2>;
		#size-cells = <0x2>;
		ranges;

		reserved0: reserved@0 {
			no-map;
			reg = <0x0 0xb0100000 0x0 0x19900000>;
		};
	};

linux启动从init/main.c文件的start_kernel函数开始,然后从文件setup_arch(arch/arm64/kernel/setup.c文件中)函数检测处理器类型,初始化处理器和内存,其中的arm64_memblock_init(arch/arm64/mm/init.c文件中)函数就是arm64架构的memblock初始化流程。arm64_memblock_init的前面有个函数setup_machine_fdt,就是他解析设备树的内存信息的。

void __init setup_arch(char **cmdline_p)
{
	init_mm.start_code = (unsigned long) _text;
	init_mm.end_code   = (unsigned long) _etext;
	init_mm.end_data   = (unsigned long) _edata;
	init_mm.brk	   = (unsigned long) _end;

	*cmdline_p = boot_command_line;

	early_fixmap_init();
	early_ioremap_init();

	setup_machine_fdt(__fdt_pointer);//解析设备树的内存信息

	parse_early_param();

	/*
	 * Unmask asynchronous aborts and fiq after bringing up possible
	 * earlycon. (Report possible System Errors once we can report this
	 * occurred).
	 */
	local_daif_restore(DAIF_PROCCTX_NOIRQ);

	/*
	 * TTBR0 is only used for the identity mapping at this stage. Make it
	 * point to zero page to avoid speculatively fetching new entries.
	 */
	cpu_uninstall_idmap();

	xen_early_init();
	efi_init();
	arm64_memblock_init();//arm64的引导内存分配器的初始化

	paging_init();

	acpi_table_upgrade();

	/* Parse the ACPI tables for possible boot-time configuration */
	acpi_boot_table_init();

	if (acpi_disabled)
		unflatten_device_tree();

	bootmem_init();

	kasan_init();

	request_standard_resources();

	early_ioremap_reset();

	if (acpi_disabled)
		psci_dt_init();
	else
		psci_acpi_init();

	cpu_read_bootcpu_ops();
	smp_init_cpus();
	smp_build_mpidr_hash();

#ifdef CONFIG_ARM64_SW_TTBR0_PAN
	/*
	 * Make sure init_thread_info.ttbr0 always generates translation
	 * faults in case uaccess_enable() is inadvertently called by the init
	 * thread.
	 */
	init_task.thread_info.ttbr0 = __pa_symbol(empty_zero_page);
#endif

#ifdef CONFIG_VT
#if defined(CONFIG_VGA_CONSOLE)
	conswitchp = &vga_con;
#elif defined(CONFIG_DUMMY_CONSOLE)
	conswitchp = &dummy_con;
#endif
#endif
	if (boot_args[1] || boot_args[2] || boot_args[3]) {
		pr_err("WARNING: x1-x3 nonzero in violation of boot protocol:\n"
			"\tx1: %016llx\n\tx2: %016llx\n\tx3: %016llx\n"
			"This indicates a broken bootloader or old kernel\n",
			boot_args[1], boot_args[2], boot_args[3]);
	}
}

我们看看setup_machine_fdt是如何解析fdt的:

static void __init setup_machine_fdt(phys_addr_t dt_phys)
{
	void *dt_virt = fixmap_remap_fdt(dt_phys);//根据物理地址获取虚拟地址
	const char *name;

	if (!dt_virt || !early_init_dt_scan(dt_virt)) {
		pr_crit("\n"
			"Error: invalid device tree blob at physical address %pa (virtual address 0x%p)\n"
			"The dtb must be 8-byte aligned and must not exceed 2 MB in size\n"
			"\nPlease check your bootloader.",
			&dt_phys, dt_virt);

		while (true)
			cpu_relax();
	}

	name = of_flat_dt_get_machine_name();//获取根节点的name
	if (!name)
		return;

	pr_info("Machine model: %s\n", name);
	dump_stack_set_arch_desc("%s (DT)", name);
}

setup_machine_fdt首先通过setup_machine_fdt函数根据设备树的物理地址获取到对应的虚拟地址,然后调用early_init_dt_scan进行早期的设备树扫描的初始化,最后获取根节点的name。只有early_init_dt_scan需要深入研究一下:

bool __init early_init_dt_scan(void *params)
{
	bool status;
	//校验设备树二进制文件
	status = early_init_dt_verify(params);
	if (!status)
		return false;

	early_init_dt_scan_nodes();
	return true;
}

bool __init early_init_dt_verify(void *params)
{
	if (!params)
		return false;

	/* check device tree validity */
	//检查设备树有效性
	if (fdt_check_header(params))
		return false;

	/* Setup flat device-tree pointer */
	//设置平面设备树指针
	initial_boot_params = params;
	of_fdt_crc32 = crc32_be(~0, initial_boot_params,
				fdt_totalsize(initial_boot_params));
	return true;
}

early_init_dt_scan主要是调用early_init_dt_verify函数校验设备树二进制文件,然后调用early_init_dt_scan_nodes,进行设备树二进制解析:

void __init early_init_dt_scan_nodes(void)
{
	/* Retrieve various information from the /chosen node */
	//读取 /chosen节点信息,写入到boot_command_line
	of_scan_flat_dt(early_init_dt_scan_chosen, boot_command_line);

	/* Initialize {size,address}-cells info */
	//初始化size-cells和address-cells信息
	of_scan_flat_dt(early_init_dt_scan_root, NULL);

	/* Setup memory, calling early_init_dt_add_memory_arch */
	//调用函数early_init_dt_add_memory_arch设置内存
	of_scan_flat_dt(early_init_dt_scan_memory, NULL);
}

early_init_dt_scan_nodes三次调用of_scan_flat_dt函数,我们看看of_scan_flat_dt到底干了啥:

int __init of_scan_flat_dt(int (*it)(unsigned long node,
				     const char *uname, int depth,
				     void *data),
			   void *data)
{
	const void *blob = initial_boot_params;//获取dtb的基地址
	const char *pathp;
	int offset, rc = 0, depth = -1;

	if (!blob)
		return 0;

	for (offset = fdt_next_node(blob, -1, &depth);
	     offset >= 0 && depth >= 0 && !rc;
	     //遍历dtb的所有节点
	     offset = fdt_next_node(blob, offset, &depth)) {

		pathp = fdt_get_name(blob, offset, NULL);// 获取给定节点的名字
		if (*pathp == '/')
			pathp = kbasename(pathp);
		rc = it(offset, pathp, depth, data);//调用传入的回调函数
	}
	return rc;
}

of_scan_flat_dt扫描FDT,获取dtb的基地址,遍历dtb的所有节点,获取给定节点的名字,调用传入的回调函数。

early_init_dt_scan_chosen、early_init_dt_scan_root、early_init_dt_scan_memory就是三个回调函数:

int __init early_init_dt_scan_chosen(unsigned long node, const char *uname,
				     int depth, void *data)
{
	int l;
	const char *p;

	pr_debug("search \"chosen\", depth: %d, uname: %s\n", depth, uname);

	//如果不是chosen节点,返回吧
	if (depth != 1 || !data ||
	    (strcmp(uname, "chosen") != 0 && strcmp(uname, "chosen@0") != 0))
		return 0;

	//从FDT上解析initrd位置
	early_init_dt_check_for_initrd(node);

	/* Retrieve command line */
	//解析bootargs,写入到COMMAND_LINE_SIZE中
	p = of_get_flat_dt_prop(node, "bootargs", &l);
	if (p != NULL && l > 0)
		strlcpy(data, p, min((int)l, COMMAND_LINE_SIZE));

	/*
	 * CONFIG_CMDLINE is meant to be a default in case nothing else
	 * managed to set the command line, unless CONFIG_CMDLINE_FORCE
	 * is set in which case we override whatever was found earlier.
	 */
#ifdef CONFIG_CMDLINE
#if defined(CONFIG_CMDLINE_EXTEND)
	strlcat(data, " ", COMMAND_LINE_SIZE);
	strlcat(data, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
#elif defined(CONFIG_CMDLINE_FORCE)
	strlcpy(data, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
#else
	/* No arguments from boot loader, use kernel's  cmdl*/
	if (!((char *)data)[0])
		strlcpy(data, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
#endif
#endif /* CONFIG_CMDLINE */

	pr_debug("Command line is: %s\n", (char*)data);

	/* break now */
	return 1;
}

early_init_dt_scan_chosen主要是确定了是chosen节点,解析initrd位置和bootargs。我们看下一个回调函数early_init_dt_scan_root:

int __init early_init_dt_scan_root(unsigned long node, const char *uname,
				   int depth, void *data)
{
	const __be32 *prop;

	if (depth != 0)
		return 0;
	
	//给dt_root_size_cells 和dt_root_addr_cells 赋初值
	dt_root_size_cells = OF_ROOT_NODE_SIZE_CELLS_DEFAULT;
	dt_root_addr_cells = OF_ROOT_NODE_ADDR_CELLS_DEFAULT;

	//获取#size-cells节点大小
	prop = of_get_flat_dt_prop(node, "#size-cells", NULL);
	if (prop)
		dt_root_size_cells = be32_to_cpup(prop);
	pr_debug("dt_root_size_cells = %x\n", dt_root_size_cells);
	//获取#address-cells节点大小
	prop = of_get_flat_dt_prop(node, "#address-cells", NULL);
	if (prop)
		dt_root_addr_cells = be32_to_cpup(prop);
	pr_debug("dt_root_addr_cells = %x\n", dt_root_addr_cells);

	/* break now */
	return 1;
}

early_init_dt_scan_root也很简单,就是获取size-cells和address-cells信息,并且赋值给dt_root_size_cells 和dt_root_addr_cells而已。最后看看那early_init_dt_scan_memory:

int __init early_init_dt_scan_memory(unsigned long node, const char *uname,
				     int depth, void *data)
{
	//解析FDT的device_type节点,返回属性ptr
	const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
	const __be32 *reg, *endp;
	int l;
	bool hotpluggable;

	/* We are scanning "memory" nodes only */
	//只解析memory节点,不是memory节点就返回吧
	if (type == NULL || strcmp(type, "memory") != 0)
		return 0;
	
	//获取memory节点的linux,usable-memory信息
	reg = of_get_flat_dt_prop(node, "linux,usable-memory", &l);
	if (reg == NULL)
		//找不到就找reg也行
		reg = of_get_flat_dt_prop(node, "reg", &l);
	if (reg == NULL)//reg信息也找不到就返回吧
		return 0;

	endp = reg + (l / sizeof(__be32));
	// 获取内存节点的热插拔属性
	hotpluggable = of_get_flat_dt_prop(node, "hotpluggable", NULL);

	pr_debug("memory scan node %s, reg size %d,\n", uname, l);
	//遍历reg属性中所有的内存region
	while ((endp - reg) >= (dt_root_addr_cells + dt_root_size_cells)) {
		u64 base, size;
		//获取每个region的base和size 
		base = dt_mem_next_cell(dt_root_addr_cells, &reg);
		size = dt_mem_next_cell(dt_root_size_cells, &reg);

		if (size == 0)
			continue;
		pr_debug(" - %llx ,  %llx\n", (unsigned long long)base,
		    (unsigned long long)size);

		early_init_dt_add_memory_arch(base, size);//将region信息添加到memblock中

		if (!hotpluggable)
			continue;
		//若该region支持hotplug,则为其添加对应标志
		if (early_init_dt_mark_hotplug_memory_arch(base, size))
			pr_warn("failed to mark hotplug range 0x%llx - 0x%llx\n",
				base, base + size);
	}

	return 0;
}

到这里,我们的设备树的内存节点信息已经解析完毕,并且添加到memblock中了。我们在继续看看early_init_dt_add_memory_arch是怎么添加到memblock中的:

void __init __weak early_init_dt_add_memory_arch(u64 base, u64 size)
{
	const u64 phys_offset = MIN_MEMBLOCK_ADDR;

	if (!PAGE_ALIGNED(base)) {
		if (size < PAGE_SIZE - (base & ~PAGE_MASK)) {
			pr_warn("Ignoring memory block 0x%llx - 0x%llx\n",
				base, base + size);
			return;
		}
		size -= PAGE_SIZE - (base & ~PAGE_MASK);
		base = PAGE_ALIGN(base);
	}
	size &= PAGE_MASK;

	if (base > MAX_MEMBLOCK_ADDR) {
		pr_warning("Ignoring memory block 0x%llx - 0x%llx\n",
				base, base + size);
		return;
	}

	if (base + size - 1 > MAX_MEMBLOCK_ADDR) {
		pr_warning("Ignoring memory range 0x%llx - 0x%llx\n",
				((u64)MAX_MEMBLOCK_ADDR) + 1, base + size);
		size = MAX_MEMBLOCK_ADDR - base + 1;
	}

	if (base + size < phys_offset) {
		pr_warning("Ignoring memory block 0x%llx - 0x%llx\n",
			   base, base + size);
		return;
	}
	if (base < phys_offset) {
		pr_warning("Ignoring memory range 0x%llx - 0x%llx\n",
			   base, phys_offset);
		size -= phys_offset - base;
		base = phys_offset;
	}
	memblock_add(base, size);
}

很简单,early_init_dt_add_memory_arch主要是对base和size进行各种校验,最后调用memblock_add加入到memblock中。memblock_add之前已经讲过了,这里就不在复述了。不过我对early_init_dt_mark_hotplug_memory_arch是怎么设置hotplug标志位有点感兴趣,可以一起看看:

int __init __weak early_init_dt_mark_hotplug_memory_arch(u64 base, u64 size)
{
	return memblock_mark_hotplug(base, size);
}

int __init_memblock memblock_mark_hotplug(phys_addr_t base, phys_addr_t size)
{
	return memblock_setclr_flag(base, size, 1, MEMBLOCK_HOTPLUG);
}

static int __init_memblock memblock_setclr_flag(phys_addr_t base,
				phys_addr_t size, int set, int flag)
{
	struct memblock_type *type = &memblock.memory;
	int i, ret, start_rgn, end_rgn;

	ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn);
	if (ret)
		return ret;

	for (i = start_rgn; i < end_rgn; i++)
		if (set)
			memblock_set_region_flags(&type->regions[i], flag);
		else
			memblock_clear_region_flags(&type->regions[i], flag);

	memblock_merge_regions(type);//合并相邻的兼容区域
	return 0;
}

static inline void memblock_set_region_flags(struct memblock_region *r,
					     enum memblock_flags flags)
{
	r->flags |= flags;
}

原来early_init_dt_mark_hotplug_memory_arch是通过memblock_isolate_range分离出对应的start_rgn和end_rgn,然后对[start_rgn,end_rgn]进行memblock_set_region_flags设置标志位。

  • 1
    点赞
  • 9
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

小坚学Linux

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值