memory block - Linux内存管理(5)


日期 内核版本 架构 作者 GitHub CSDN
2017-07-017 Linux-4.12 X86 lwhuq LinuxMemoryStudy Linux内存管理

1 Introduction

  在Linux内核早期启动阶段,在Linux的内存管理模块还没有初始化完成之前,内核也需要提供简化的内存管理模块来满足内存分配请求。早期的内核中负责初始化阶段的内存分配器称为引导内存分配器(bootmem分配器)。bootmem分配器基于最先适配(first-first)分配器的原理(这儿是很多系统的内存分配所使用的原理), 使用一个位图来管理页。最新的内核过渡到使用memblock,详见patch

  Memoryblock和bootmem这两种机制对提供的API是一致的,因此对用户是透明的。内核中可以通过编译选项CONFIG_NO_BOOTMEM来选择使用哪一种机制,定义在mm/Makefile#L46

ifdef CONFIG_NO_BOOTMEM
	obj-y		+= nobootmem.o
else
	obj-y		+= bootmem.o
endif

2 Data structure

  Memoryblock的所有数据结构定义在include/linux/memblock.h。

  第一个数据结构的名字是memblock,定义在include/linux/memblock.h#L48

struct memblock {
	bool bottom_up;  /* is bottom up direction? 如果true,从下往上分配内存 */
	phys_addr_t current_limit; /* memory block的大小限制 */
	/* 三种不同内存类型:内存,预留,物理 */
	struct memblock_type memory;
	struct memblock_type reserved;
#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
	struct memblock_type physmem;
#endif
};
  第二个数据结构是memblock_type,定义在 include/linux/memblock.h#L40
struct memblock_type {
	unsigned long cnt;	/* number of regions 内存区域的数目*/
	unsigned long max;	/* size of the allocated array 已经分配的内存区域大小*/
	phys_addr_t total_size;	/* size of all regions 所有内存区域的大小*/
	struct memblock_region *regions; /* 指针指向memblock_region结构体 */
	char *name; /* 名字 */
};

  memblock_region结构用于描述memory region,定义在include/linux/memblock.h#L31

struct memblock_region {
	phys_addr_t base;
	phys_addr_t size;
	unsigned long flags;
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
	int nid;
#endif
};
  memoryblock_region记录了当前memory region的起始地址,大小,标志和Node ID。 标志的定义在 include/linux/memblock.h#L24

/* Definition of memblock flags. */
enum {
	MEMBLOCK_NONE		= 0x0,	/* No special request */
	MEMBLOCK_HOTPLUG	= 0x1,	/* hotpluggable region */
	MEMBLOCK_MIRROR		= 0x2,	/* mirrored region */
	MEMBLOCK_NOMAP		= 0x4,	/* don't add to kernel direct mapping */
};


  总结来说,整个memory block的data structure布局如下

3 Memblock 初始化

Memblock结构的实例是一个同名全局静态变量,定义在mm/memblock.c#L34

static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock;
static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock;
#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
static struct memblock_region memblock_physmem_init_regions[INIT_PHYSMEM_REGIONS] __initdata_memblock;
#endif

struct memblock memblock __initdata_memblock = {
	.memory.regions		= memblock_memory_init_regions,
	.memory.cnt		= 1,	/* empty dummy entry */
	.memory.max		= INIT_MEMBLOCK_REGIONS,
	.memory.name		= "memory",

	.reserved.regions	= memblock_reserved_init_regions,
	.reserved.cnt		= 1,	/* empty dummy entry */
	.reserved.max		= INIT_MEMBLOCK_REGIONS,
	.reserved.name		= "reserved",

#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
	.physmem.regions	= memblock_physmem_init_regions,
	.physmem.cnt		= 1,	/* empty dummy entry */
	.physmem.max		= INIT_PHYSMEM_REGIONS,
	.physmem.name		= "physmem",
#endif

	.bottom_up		= false,
	.current_limit		= MEMBLOCK_ALLOC_ANYWHERE,
};

  • __initdata_memblock指定了结构储存位置,如果定义了CONFIG_ARCH_DISCARD_MEMBLOCK,则存放在__meminitdata
  • 每中memory type的cnt字段都初始化为1
  • 每种memory type的regions都指向全局静态数组。数组单元个数,memory和reserved初始化为INIT_MEMBLOCK_REGIONS,physical memory初始化为INIT_PHYSMEM_REGIONS。因此max字段也初始化同样的值
#define INIT_MEMBLOCK_REGIONS	128
#define INIT_PHYSMEM_REGIONS	4
  • buttom_up被初始化为false,说明内存分配是从高到低
  • current_limit被初始化为MEMBLOCK_ALLOC_ANYWHERE,可访问最高地址空间。
#define MEMBLOCK_ALLOC_ANYWHERE	(~(phys_addr_t)0)

4 Memblock APIs  

4.1 Add


  在bootmem.h中的相关APIs


4.1.1 memblock_add_range

定义在mm/memblock.c#L496

/**
 * memblock_add_range - add new memblock region
 * @type: memblock type to add new region into
 * @base: base address of the new region
 * @size: size of the new region
 * @nid: nid of the new region
 * @flags: flags of the new region
 *
 * Add new memblock region [@base,@base+@size) into @type.  The new region
 * is allowed to overlap with existing ones - overlaps don't affect already
 * existing regions.  @type is guaranteed to be minimal (all neighbouring
 * compatible regions are merged) after the addition.
 *
 * RETURNS:
 * 0 on success, -errno on failure.
 */
int __init_memblock memblock_add_range(struct memblock_type *type,
				phys_addr_t base, phys_addr_t size,
				int nid, unsigned long flags)
{
	bool insert = false;
	phys_addr_t obase = base;
	phys_addr_t end = base + memblock_cap_size(base, &size);
	int idx, nr_new;
	struct memblock_region *rgn;

	if (!size)
		return 0;

	/* special case for empty array */
	if (type->regions[0].size == 0) {
		WARN_ON(type->cnt != 1 || type->total_size);
		type->regions[0].base = base;
		type->regions[0].size = size;
		type->regions[0].flags = flags;
		memblock_set_region_node(&type->regions[0], nid);
		type->total_size = size;
		return 0;
	}
repeat:
	/*
	 * The following is executed twice.  Once with %false @insert and
	 * then with %true.  The first counts the number of regions needed
	 * to accommodate the new area.  The second actually inserts them.
	 */
	base = obase;
	nr_new = 0;

	for_each_memblock_type(type, rgn) {
		phys_addr_t rbase = rgn->base;
		phys_addr_t rend = rbase + rgn->size;

		if (rbase >= end)
			break;
		if (rend <= base)
			continue;
		/*
		 * @rgn overlaps.  If it separates the lower part of new
		 * area, insert that portion.
		 */
		if (rbase > base) {
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
			WARN_ON(nid != memblock_get_region_node(rgn));
#endif
			WARN_ON(flags != rgn->flags);
			nr_new++;
			if (insert)
				memblock_insert_region(type, idx++, base,
						       rbase - base, nid,
						       flags);
		}
		/* area below @rend is dealt with, forget about it */
		base = min(rend, end);
	}

	/* insert the remaining portion */
	if (base < end) {
		nr_new++;
		if (insert)
			memblock_insert_region(type, idx, base, end - base,
					       nid, flags);
	}

	if (!nr_new)
		return 0;

	/*
	 * If this was the first round, resize array and repeat for actual
	 * insertions; otherwise, merge and return.
	 */
	if (!insert) {
		while (type->cnt + nr_new > type->max)
			if (memblock_double_array(type, obase, size) < 0)
				return -ENOMEM;
		insert = true;
		goto repeat;
	} else {
		memblock_merge_regions(type);
		return 0;
	}
}

  • 第一次循环检查是否有region的overlap。并且检查memory type存放的memory region实例个数type->max是否足够容纳新增的region。不够的话就调用memblock_double_array扩容。如果有需要添加的region就设置insert = true。最后goto到repeat执行第二次循环
  • 第二次循环中,执行insert == true代码块,调用memblock_insert_region插入region,最后调用memblock_merge_regions合并相邻region。
  • idx没有初始化,从默认值0开始?

4.2 Free and remove


4.3 Allocate


  • memory allocate就是把内存范围添加到memory reserved region

5 memblock初始化

  X86_64结构内核从E820和EFI memmap得到boot内存信息,随后根据boot内存信息建立memory block结构。具体实现在setup_arch函数,定义在arch/x86/kernel/setup.c#L848

void __init setup_arch(char **cmdline_p)
{
	memblock_reserve(__pa_symbol(_text),
			 (unsigned long)__bss_stop - (unsigned long)_text);


#ifdef CONFIG_EFI
	if (efi_enabled(EFI_BOOT))
		efi_memblock_x86_reserve_range();
#endif


#ifdef CONFIG_MEMORY_HOTPLUG
	/*
	 * Memory used by the kernel cannot be hot-removed because Linux
	 * cannot migrate the kernel pages. When memory hotplug is
	 * enabled, we should prevent memblock from allocating memory
	 * for the kernel.
	 *
	 * ACPI SRAT records all hotpluggable memory ranges. But before
	 * SRAT is parsed, we don't know about it.
	 *
	 * The kernel image is loaded into memory at very early time. We
	 * cannot prevent this anyway. So on NUMA system, we set any
	 * node the kernel resides in as un-hotpluggable.
	 *
	 * Since on modern servers, one node could have double-digit
	 * gigabytes memory, we can assume the memory around the kernel
	 * image is also un-hotpluggable. So before SRAT is parsed, just
	 * allocate memory near the kernel image to try the best to keep
	 * the kernel away from hotpluggable memory.
	 */
	if (movable_node_is_enabled())
		memblock_set_bottom_up(true);
#endif

	/* after early param, so could get panic from serial */
	memblock_x86_reserve_range_setup_data();

	/*
	 * Need to conclude brk, before e820__memblock_setup()
	 *  it could use memblock_find_in_range, could overlap with
	 *  brk area.
	 */
	reserve_brk();

	cleanup_highmap();

	memblock_set_current_limit(ISA_END_ADDRESS);
	e820__memblock_setup();

}

  • 最后的e820_memblock_setup()真正完成memory block的添加初始化工作。在此之前的函数都只是调用memblock_reserve从reserve内存申请

6 Reference


  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值