大页内存 初始化
hstates
所有的大页均放在 hstates 的全局变量中。
mm/hugetlb.c
struct hstate hstates[HUGE_MAX_HSTATE];
每个 hstates
数组的元素,均表示一种大小的page,其中 HUGE_MAX_HSTATE
的值为2,这也就解释了为什么hugepage目前只支持2个pagesize。
当前系统有多少个类型 的hugepage,可以通过 /sys/kernel/mm/hugepages/
查看,例如我当前系统只有20个大小为1G的Page:
$ls -l /sys/kernel/mm/hugepages/
total 0
drwxr-xr-x 2 root root 0 Nov 1 11:52 hugepages-1048576kB
$cat /sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages
20
初始化 hstates
系统启动后,会执行 parse_args
函数解析一些启动参数
sudo grubby --update-kernel=ALL --args="hugepagesz=1G default_hugepagesz=1G hugepages=20"
我们设置的启动参数均会被解析,当解析 hugepagesz=
时调用setup_hugepagesz
函数,该函数 通过 hugetlb_add_hstate
,初始化 hstates
。
static __init int setup_hugepagesz(char *opt)
{
unsigned long ps = memparse(opt, &opt);
if (ps == PMD_SIZE) {
hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
} else if (ps == PUD_SIZE && cpu_has_gbpages) {
hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
} else {
printk(KERN_ERR "hugepagesz: Unsupported page size %lu M\n",
ps >> 20);
return 0;
}
return 1;
}
可见,设置的size不是随意设置,其大小必须要么是PUD_SIZE
或者PMD_SIZE
。对于X64来说,就是1G或者2M。
申请大页内存
解析 到 hugepages=
命令的时候,会调用 hugetlb_nrpages_setup
static void __init hugetlb_hstate_alloc_pages(struct hstate *h)
{
unsigned long i;
//hugepages=10,则申请 10个 大页
for (i = 0; i < h->max_huge_pages; ++i) {
if (hstate_is_gigantic(h)) {
if (!alloc_bootmem_huge_page(h))
break;
} else if (!alloc_fresh_huge_page(h,
&node_states[N_MEMORY]))
break;
}
h->max_huge_pages = i;
}
alloc_bootmem_huge_page
函数申请一块大内存,然后放在全局队列 huge_boot_pages
中。
组织大页
/* Put bootmem huge pages into the standard lists after mem_map is up */
static void __init gather_bootmem_prealloc(void)
{
struct huge_bootmem_page *m;
list_for_each_entry(m, &huge_boot_pages, list) {
struct hstate *h = m->hstate;
struct page *page;
#ifdef CONFIG_HIGHMEM
page = pfn_to_page(m->phys >> PAGE_SHIFT);
free_bootmem_late((unsigned long)m,
sizeof(struct huge_bootmem_page));
#else
page = virt_to_page(m);
#endif
WARN_ON(page_count(page) != 1);
prep_compound_huge_page(page, h->order);
WARN_ON(PageReserved(page));
prep_new_huge_page(h, page, page_to_nid(page));
/*
* If we had gigantic hugepages allocated at boot time, we need
* to restore the 'stolen' pages to totalram_pages in order to
* fix confusing memory reports from free(1) and another
* side-effects, like CommitLimit going negative.
*/
if (hstate_is_gigantic(h))
totalram_pages += 1 << h->order;
}
}
实际是在prep_new_huge_page
函数中,设置page的析构函数为free_huge_page
。将大页page,放入 h->hugepage_freelists