Linux内存初始化(1)——memblock初始化

10 篇文章 0 订阅

说明

Kernel版本:4.14.111
ARM处理器,Contex-A7,四核

  本文从内核汇编代码执行完毕,跳转到start_kernel,开始介绍内存初始化。跳转到start_kernel前,已知,内核已创建了kernel、dtb的线性映射,即内核可以访问自己代码段等区域,并能访问dtb所在内存区域的虚拟地址。

物理内存大小

  一般情况下,内存大小设置有两种方法,通过设备树设置,或者通过bootargs设置。
1)通过dtb设置

memory@0 {
	device_type = "memory";
	reg = <0x40000000 0x10000000>;
};

  上述配置意思为:物理内存地址从0x40000000开始,大小为0x10000000(256Mb)。
  代码调用关系为
start_kernel->setup_arch->setup_machine_fdt->early_init_dt_scan_nodes-> of_scan_flat_dt(early_init_dt_scan_memory, NULL)->early_init_dt_scan_memory
2)通过bootargs设置
setup.c->early_mem

	static int __init early_mem(char *p)
	{
		static int usermem __initdata = 0;
		u64 size;
		u64 start;
		char *endp;
	
		/*
		 * If the user specifies memory size, we
		 * blow away any automatically generated
		 * size.
		 */
		if (usermem == 0) {
			usermem = 1;
			memblock_remove(memblock_start_of_DRAM(),
				memblock_end_of_DRAM() - memblock_start_of_DRAM());
		}
	
		start = PHYS_OFFSET;
		size  = memparse(p, &endp);
		if (*endp == '@')
			start = memparse(endp + 1, NULL);
	
		arm_add_memory(start, size);
	
		return 0;
	}
	early_param("mem", early_mem);

  设置格式为:mem=1024M,代码中会去bootargs(内核command line)中提取mem=关键字,获取系统内存大小。

early_fixmap_init

  简单来说,Fixed map指的是虚拟地址中的一段区域,在该区域中所有的线性地址是在编译阶段就确定好的。可以从内核启动打印,看出fixmap所在虚拟地址空间。如:

vector  : 0xffff0000 - 0xffff1000   (   4 kB)
fixmap  : 0xffc00000 - 0xfff00000   (3072 kB)
vmalloc : 0xf0800000 - 0xff800000   ( 240 MB)
lowmem  : 0xc0000000 - 0xf0000000   ( 768 MB)
pkmap   : 0xbfe00000 - 0xc0000000   (   2 MB)
modules : 0xbf000000 - 0xbfe00000   (  14 MB)
  .text : 0xc0008000 - 0xc0c00000   (12256 kB)
  .init : 0xc1000000 - 0xc1200000   (2048 kB)
  .data : 0xc1200000 - 0xc1261b40   ( 391 kB)
   .bss : 0xc126a8d8 - 0xc12b6890   ( 304 kB)

  early_fixmap_init代码实现在arch/arm/mm/mmu.c
start_kernel->setup_arch->early_fixmap_init

void __init early_fixmap_init(void)
{
	pmd_t *pmd;

	BUILD_BUG_ON((__fix_to_virt(__end_of_early_ioremap_region) >> PMD_SHIFT)
		     != FIXADDR_TOP >> PMD_SHIFT);

	pmd = fixmap_pmd(FIXADDR_TOP);						(1)
	pmd_populate_kernel(&init_mm, pmd, bm_pte);			(2)

	pte_offset_fixmap = pte_offset_early_fixmap;
}

static inline void
pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp, pte_t *ptep)
{
	/*
	 * The pmd must be loaded with the physical address of the PTE table
	 */
	__pmd_populate(pmdp, __pa(ptep), _PAGE_KERNEL_TABLE);
}

static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t pte,
				  pmdval_t prot)
{
	pmdval_t pmdval = (pte + PTE_HWTABLE_OFF) | prot;
	pmdp[0] = __pmd(pmdval);
#ifndef CONFIG_ARM_LPAE
	pmdp[1] = __pmd(pmdval + 256 * sizeof(pte_t));
#endif
	flush_pmd_entry(pmdp);
}

1)获取addr对应pmd全局页表中的节点,对于arm32,二级映射来说。
2)将bm_pte的物理地址写到pmd页表目录表中。bm_pte是全局数组,是fixmap的pte页表。定义如下:

#define PTRS_PER_PTE		512
#define PTE_HWTABLE_PTRS	(PTRS_PER_PTE)

static pte_t bm_pte[PTRS_PER_PTE + PTE_HWTABLE_PTRS]
	__aligned(PTE_HWTABLE_OFF + PTE_HWTABLE_SIZE) __initdata;

  early_fixmap_init只是建立了一个映射的框架,并没有填充pte页表,即没有创建真正虚拟地址和物理地址的联系。
  需要使用者在使用时再去填充具体的pte页表。比如下文的early_ioremap_init()

  32位arm处理器对应内核的fixmap地址区间定义如下:

enum fixed_addresses {
	FIX_EARLYCON_MEM_BASE,
	__end_of_permanent_fixed_addresses,

	FIX_KMAP_BEGIN = __end_of_permanent_fixed_addresses,
	FIX_KMAP_END = FIX_KMAP_BEGIN + (KM_TYPE_NR * NR_CPUS) - 1,

	/* Support writing RO kernel text via kprobes, jump labels, etc. */
	FIX_TEXT_POKE0,
	FIX_TEXT_POKE1,

	__end_of_fixmap_region,

	/*
	 * Share the kmap() region with early_ioremap(): this is guaranteed
	 * not to clash since early_ioremap() is only available before
	 * paging_init(), and kmap() only after.
	 */
#define NR_FIX_BTMAPS		32
#define FIX_BTMAPS_SLOTS	7
#define TOTAL_FIX_BTMAPS	(NR_FIX_BTMAPS * FIX_BTMAPS_SLOTS)

	FIX_BTMAP_END = __end_of_permanent_fixed_addresses,
	FIX_BTMAP_BEGIN = FIX_BTMAP_END + TOTAL_FIX_BTMAPS - 1,
	__end_of_early_ioremap_region
};

early_ioremap_init

  在设备初始化早期,如果需要操作io内存,则需要依赖early ioremap模块初始化。
代码如下:
start_kernel->setup_arch->early_ioremap_init

void __init early_ioremap_init(void)
{
	early_ioremap_setup();
}

void __init early_ioremap_setup(void)
{
	int i;

	for (i = 0; i < FIX_BTMAPS_SLOTS; i++)
		if (WARN_ON(prev_map[i]))
			break;

	for (i = 0; i < FIX_BTMAPS_SLOTS; i++)
		slot_virt[i] = __fix_to_virt(FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*i);
}

#define NR_FIX_BTMAPS		32
#define FIX_BTMAPS_SLOTS	7
#define TOTAL_FIX_BTMAPS	(NR_FIX_BTMAPS * FIX_BTMAPS_SLOTS)

  early_ioremap_setup实际操作就是在fixmap区域内,设置了一段7*32字节的区域,保存到了slot_virt[]数组中,当需要io操作时,最终会调用到__early_ioremap函数,该函数中会去填充对应的pte页表,从而完成物理地址和虚拟地址的映射。

memblock

  memblock内存管理机制用于在Linux启动后管理内存,一直到free_initmem()为止。
  在buddy系统初始化之前,内存由memblock管理,需要注意的是,memblock管理的内存为物理地址,非虚拟地址。
  不知道有没有人和我一样疑惑,在shell命令行下,通过cat /proc/meminfoorfree命令,为什么totalmem和我们的物理内存大小不一致。
  这部分内核无法管理的内存,就是由memblock机制,预留的。

memblock数据结构

  struct memblock是memblock的核心数据结构,下面分为几种类型的memblock,每种类型memblock包含若干regions。

/* Definition of memblock flags. */
enum {
	MEMBLOCK_NONE		= 0x0,	/* No special request */
	MEMBLOCK_HOTPLUG	= 0x1,	/* hotpluggable region */
	MEMBLOCK_MIRROR		= 0x2,	/* mirrored region */
	MEMBLOCK_NOMAP		= 0x4,	/* don't add to kernel direct mapping */
};

struct memblock_region {
	phys_addr_t base;					//region的开始物理地址
	phys_addr_t size;					//region的大小
	unsigned long flags;				//region的标志,上面枚举定义
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
	int nid;
#endif
};

struct memblock_type {
	unsigned long cnt;	/* number of regions */					//该类型内存的regions数量
	unsigned long max;	/* size of the allocated array */		//当前集合中记录内存区域最大大小
	phys_addr_t total_size;	/* size of all regions */			//regions总大小
	struct memblock_region *regions;							//指向region数组
	char *name;
};

struct memblock {
	bool bottom_up;  /* is bottom up direction? */		//表明分配器的分配方式,true表示从低地址向高地址
	phys_addr_t current_limit;			//内存块大小的限制
	struct memblock_type memory;		//可用内存类型
	struct memblock_type reserved;		//预留内存类型
#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
	struct memblock_type physmem;
#endif
};

在这里插入图片描述
  内核中定义了全局memblock数据结构,定义了初始值,这个全局变量在memblock生命周期内,会被频繁调用。
mm/memblock.c

struct memblock memblock __initdata_memblock = {
	.memory.regions		= memblock_memory_init_regions,
	.memory.cnt		= 1,	/* empty dummy entry */
	.memory.max		= INIT_MEMBLOCK_REGIONS,
	.memory.name		= "memory",

	.reserved.regions	= memblock_reserved_init_regions,
	.reserved.cnt		= 1,	/* empty dummy entry */
	.reserved.max		= INIT_MEMBLOCK_REGIONS,
	.reserved.name		= "reserved",

#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
	.physmem.regions	= memblock_physmem_init_regions,
	.physmem.cnt		= 1,	/* empty dummy entry */
	.physmem.max		= INIT_PHYSMEM_REGIONS,
	.physmem.name		= "physmem",
#endif

	.bottom_up		= false,
	.current_limit		= MEMBLOCK_ALLOC_ANYWHERE,
}

memblock API介绍

phys_addr_t memblock_find_in_range_node(phys_addr_t size, phys_addr_t align,
                    phys_addr_t start, phys_addr_t end,
                    int nid, ulong flags);
phys_addr_t memblock_find_in_range(phys_addr_t start, phys_addr_t end,
                   phys_addr_t size, phys_addr_t align);
void memblock_allow_resize(void);
int memblock_add_node(phys_addr_t base, phys_addr_t size, int nid);
int memblock_add(phys_addr_t base, phys_addr_t size);
int memblock_remove(phys_addr_t base, phys_addr_t size);
int memblock_free(phys_addr_t base, phys_addr_t size);
int memblock_reserve(phys_addr_t base, phys_addr_t size);
void memblock_trim_memory(phys_addr_t align);
bool memblock_overlaps_region(struct memblock_type *type,
                  phys_addr_t base, phys_addr_t size);
int memblock_mark_hotplug(phys_addr_t base, phys_addr_t size);
int memblock_clear_hotplug(phys_addr_t base, phys_addr_t size);
int memblock_mark_mirror(phys_addr_t base, phys_addr_t size);
int memblock_mark_nomap(phys_addr_t base, phys_addr_t size);
ulong choose_memblock_flags(void);

  其中对不同类型memblock的分配释放主要有如下:其中memblock_add()和memblock_remove()是针对可用memlbock操作;memblock_reserve()和memblock_free()是针对reserved类型memblock操作。

int __init_memblock memblock_add_node(phys_addr_t base, phys_addr_t size,
                       int nid)
{
    return memblock_add_range(&memblock.memory, base, size, nid, 0);
}

int __init_memblock memblock_add(phys_addr_t base, phys_addr_t size)
{
return memblock_add_range(&memblock.memory, base, size, MAX_NUMNODES, 0);
}
int __init_memblock memblock_remove(phys_addr_t base, phys_addr_t size)
{
    return memblock_remove_range(&memblock.memory, base, size);
}

int __init_memblock memblock_free(phys_addr_t base, phys_addr_t size)
{
    kmemleak_free_part_phys(base, size);
    return memblock_remove_range(&memblock.reserved, base, size);
}

int __init_memblock memblock_reserve(phys_addr_t base, phys_addr_t size)
{
return memblock_add_range(&memblock.reserved, base, size, MAX_NUMNODES, 0);
}

memblock调试

  在内核启动bootargs,可以加入"memblock=debug",会打开memblock的dbg打印,通过打印可以看出memblock的预留、分配等操作。
  在内核编译时使能了内核debug功能后,还可以通过以下操作查看memblock信息。

cat /sys/kernel/debug/memblock/memory 
cat /sys/kernel/debug/memblock/reserved

arm_memblock_init

  arm_memblock_init是memblock的初始化函数,代码如下:
start_kernel->setup_arch->early_ioremap_init

void __init arm_memblock_init(const struct machine_desc *mdesc)
{
	/* Register the kernel text, kernel data and initrd with memblock. */
	memblock_reserve(__pa(KERNEL_START), KERNEL_END - KERNEL_START);	(1)

	arm_initrd_init();													(2)

	arm_mm_memblock_reserve();											(3)

	/* reserve any platform specific memblock areas */
	if (mdesc->reserve)
		mdesc->reserve();

	early_init_fdt_reserve_self();										(4)
	early_init_fdt_scan_reserved_mem();									(5)

	/* reserve memory for DMA contiguous allocations */
	dma_contiguous_reserve(arm_dma_limit);								(6)

	arm_memblock_steal_permitted = false;
	memblock_dump_all();												(7)
}

1)将内核代码段设置为reserved类型memblock,其中的init段会在free_initmem()中返还给内核
2)将内核initrd段设置为reserved类型memblock
3)将内核一级页表区域设置为reserved类型memblock
4)将dtb本身区域设置为reserved类型memblock
5)将dtb中reserved-memory区域设置为reserved类型memblock,其中CMA区域会返还给内核
6)预留cma连续内存区域
7)打印memblock(全局变量)信息,前提是设置了bootargs中"memblock=debug"
  memblock debug打印:

memblock_reserve: [0x0000000040200000-0x00000000412b688f] arm_memblock_init+0x34/0x1d8	//内核代码段
memblock_reserve: [0x0000000049dee000-0x0000000049ffffff] arm_memblock_init+0x160/0x1d8	//initrd
memblock_reserve: [0x0000000040003000-0x0000000040007fff] arm_memblock_init+0x17c/0x1d8	//内核一级页表
memblock_reserve: [0x0000000049de3000-0x0000000049deb07f] early_init_fdt_reserve_self+0x3c/0x44	//设备树预留自身
memblock_reserve: [0x0000000049de3000-0x0000000049deafff] early_init_fdt_scan_reserved_mem+0x58/0x78	//设备树扫描预留区域,这里是uboot设置的预留dtb,和设备树预留自身冲突
memblock_reserve: [0x0000000049dee000-0x0000000049fff82a] early_init_fdt_scan_reserved_mem+0x58/0x78	//设备树扫描预留区域,这里是uboot设置预留的ramdisk.gz区域内存。
memblock_reserve: [0x000000007c000000-0x000000007fffffff] memblock_alloc_range_nid+0x70/0x88		//cma连续内存,设置的为64M,从物理内存最后开始预留(设备物理内存0x40000000-0x7fffffff)
Reserved memory: created CMA memory pool at 0x000000007c000000, size 64 MiB
OF: reserved mem: initialized node linux,cma, compatible id shared-dma-pool
MEMBLOCK configuration:
 memory size = 0x0000000040000000 reserved size = 0x00000000052d5910
 memory.cnt  = 0x1	//可用内存块,数量1
 memory[0x0]     [0x0000000040000000-0x000000007fffffff], 0x0000000040000000 bytes flags: 0x0	//打印可用内存
 reserved.cnt  = 0x5	//预留内存块,数量5,以下依次打印预留段的起止物理地址
 reserved[0x0]   [0x0000000040003000-0x0000000040007fff], 0x0000000000005000 bytes flags: 0x0
 reserved[0x1]   [0x0000000040200000-0x00000000412b688f], 0x00000000010b6890 bytes flags: 0x0
 reserved[0x2]   [0x0000000049de3000-0x0000000049deb07f], 0x0000000000008080 bytes flags: 0x0
 reserved[0x3]   [0x0000000049dee000-0x0000000049ffffff], 0x0000000000212000 bytes flags: 0x0
 reserved[0x4]   [0x000000007c000000-0x000000007fffffff], 0x0000000004000000 bytes flags: 0x0

  在buddy系统初始化前,还会有很多用到memblock的地方,内核也还会预留一些其他的内存段。开启memblock debug,内核启动打印都可以看到。

小结

  内核启动后,start_kernel调用关系为:
在这里插入图片描述
  本文中介绍了,early_fixmap_init()、early_ioremap_init()、early_ioremap_init()。setup_arch中还调用到了paging_init,该接口涉及到内核low mem页表初始化、zone初始化,后续的文章中再介绍。

相关文章

Linux内存初始化(1)——memblock初始化
Linux内存初始化(2)——paging_init初始化
Linux内存初始化(3)——pglist_data/zone初始化
Linux内存初始化(4)——伙伴系统(buddy)

  • 3
    点赞
  • 28
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值