上一篇文章记述了node和zone初始化的基本流程。本篇记录一下伙伴系统初始化。
跟伙伴系统相关的数据结构
struct zone {
...
/* free areas of different sizes */
struct free_area free_area[MAX_ORDER];
...
}
struct free_area {
struct list_head free_list[MIGRATE_TYPES];
unsigned long nr_free;
};
enum migratetype {
MIGRATE_UNMOVABLE,
MIGRATE_MOVABLE,
MIGRATE_RECLAIMABLE,
MIGRATE_PCPTYPES, /* the number of types on the pcp lists */
MIGRATE_HIGHATOMIC = MIGRATE_PCPTYPES,
#ifdef CONFIG_CMA
/*
* MIGRATE_CMA migration type is designed to mimic the way
* ZONE_MOVABLE works. Only movable pages can be allocated
* from MIGRATE_CMA pageblocks and page allocator never
* implicitly change migration type of MIGRATE_CMA pageblock.
*
* The way to use it is to change migratetype of a range of
* pageblocks to MIGRATE_CMA which can be done by
* __free_pageblock_cma() function. What is important though
* is that a range of pageblocks must be aligned to
* MAX_ORDER_NR_PAGES should biggest page be bigger then
* a single pageblock.
*/
MIGRATE_CMA,
#endif
#ifdef CONFIG_MEMORY_ISOLATION
MIGRATE_ISOLATE, /* can't allocate from here */
#endif
MIGRATE_TYPES
};
这个migrate_type是为了内存反碎片化而引入的。以前最重要的是MIGRATE_MOVABLE,MIGRATE_RECLAIMABLE,cma是近两年引入的。
linux内存初始化:
asmlinkage __visible void __init __no_sanitize_address start_kernel(void)
{
...
//跟架构相关,zone,node初始化
setup_arch(&command_line);
...
//zonelists 初始化
build_all_zonelists(NULL);
page_alloc_init();
...
// 伙伴系统初始化
mm_init();
...
}
在mm_init()之前free_area已经初始化了,只不过nr_page都初始化为0了。
/*
* mem_init() marks the free areas in the mem_map and tells us how much memory
* is free. This is done after various parts of the system have claimed their
* memory after the kernel image.
*/
void __init mem_init(void)
{
...
/* this will put all unused low memory onto the freelists */
memblock_free_all();
...
}
/**
* memblock_free_all - release free pages to the buddy allocator
*
* Return: the number of pages actually released.
*/
unsigned long __init memblock_free_all(void)
{
unsigned long pages;
free_unused_memmap();
reset_all_zones_managed_pages();
pages = free_low_memory_core_early();
totalram_pages_add(pages);
return pages;
}
static unsigned long __init free_low_memory_core_early(void)
{
unsigned long count = 0;
phys_addr_t start, end;
u64 i;
memblock_clear_hotplug(0, -1);
for_each_reserved_mem_range(i, &start, &end)
reserve_bootmem_region(start, end);
/*
* We need to use NUMA_NO_NODE instead of NODE_DATA(0)->node_id
* because in some case like Node0 doesn't have RAM installed
* low ram will be on Node1
*/
for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, &start, &end,
NULL)
count += __free_memory_core(start, end);
return count;
}
通过for_each_free_mem_range遍历memblock中的memory bank,然后通过__free_memory_core释放给buddy system。该函数会调用__free_pages_memory。
static void __init __free_pages_memory(unsigned long start, unsigned long end)
{
int order;
while (start < end) {
order = min(MAX_ORDER - 1UL, __ffs(start));
while (start + (1UL << order) > end)
order--;
memblock_free_pages(pfn_to_page(start), start, order);
start += (1UL << order);
}
}
可以看出,页是按照order进行释放的,并非一页一页。最终会引导页在free_lists中按照order组织。
memblock_free_pages->__free_pages_core->free_pages_ok
static void __free_pages_ok(struct page *page, unsigned int order,
fpi_t fpi_flags)
{
unsigned long flags;
int migratetype;
unsigned long pfn = page_to_pfn(page);
if (!free_pages_prepare(page, order, true))
return;
//从page中获取迁移类型
migratetype = get_pfnblock_migratetype(page, pfn);
local_irq_save(flags);
__count_vm_events(PGFREE, 1 << order);
// 按照order和迁移类型组织page
free_one_page(page_zone(page), page, pfn, order, migratetype,
fpi_flags);
local_irq_restore(flags);
}
除了order,page还需要按照迁移类型进行组织。最终将页释放给buddy system的是__free_one_page。free_one_page会调用__free_one_page。__free_one_page就是buddy system的释放页的函数。也就是在初始化buddy system时就是用的伙伴系统的释放页所用的函数,因为两者都是将页还给伙伴系统。具体组织页的细节比较复杂,最终形成的结构大约是这样。
-----图片转自《奔跑吧,linux kernel》