http://blog.chinaunix.net/uid-1701789-id-284584.html
暂时放下对于内存管理的探讨而对setup_arch进行深入分析的目的在于要找出相关于内存管理初始化细节。在遍历代码的过程中,我跳过了一些和特殊平台相关的代码,例如EFI等。
1. 首先kernel将全局页表目录page global directory切换到swapper_pg_dir。方法是将initial_page_table的内容复制到swapper_pg_dir里面,并且重置cr3。initial_page_table的初始化可以参见http://blog.chinaunix.net/space.php?uid=1701789&do=blog&id=154125。
- /*
- * copy kernel address range established so far and switch
-
* to the proper swapper page table
- */
- clone_pgd_range(swapper_pg_dir + KERNEL_PGD_BOUNDARY,
- initial_page_table + KERNEL_PGD_BOUNDARY,
- KERNEL_PGD_PTRS);
-
- load_cr3(swapper_pg_dir);
- __flush_tlb_all();
2. 初始化中断向量和CPU
- early_trap_init();//初始化中断向量1的服务例程为debug所指向的中断服务例程,向量3指向int3所指向的服务例程,向量14则指向page_fault指向的服务例程。
- early_cpu_init(); //初始化cpu_devs[X86_VENDOR_NUM] 并以此侦测boot_cpu_data
3. 根据实模式下设置的各个启动参数boot_params设置全局变量
- ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev);
- screen_info = boot_params.screen_info;
- edid_info = boot_params.edid_info;
- #ifdef CONFIG_X86_32
- apm_info.bios = boot_params.apm_bios_info;
- ist_info = boot_params.ist_info;
- if (boot_params.sys_desc_table.length != 0) {
- set_mca_bus(boot_params.sys_desc_table.table[3] & 0x2);
- machine_id = boot_params.sys_desc_table.table[0];
- machine_submodel_id = boot_params.sys_desc_table.table[1];
- BIOS_revision = boot_params.sys_desc_table.table[2];
- }
- #endif
- saved_video_mode = boot_params.hdr.vid_mode;
- bootloader_type = boot_params.hdr.type_of_loader;
- if ((bootloader_type >> 4) == 0xe) {
- bootloader_type &= 0xf;
- bootloader_type |= (boot_params.hdr.ext_loader_type+0x10) << 4;
- }
- bootloader_version = bootloader_type & 0xf;
- bootloader_version |= boot_params.hdr.ext_loader_ver << 4;
-
- #ifdef CONFIG_BLK_DEV_RAM
- rd_image_start = boot_params.hdr.ram_size & RAMDISK_IMAGE_START_MASK;
- rd_prompt = ((boot_params.hdr.ram_size & RAMDISK_PROMPT_FLAG) != 0);
- rd_doload = ((boot_params.hdr.ram_size & RAMDISK_LOAD_FLAG) != 0);
- #endif
- #ifdef CONFIG_EFI
- if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
- #ifdef CONFIG_X86_32
- "EL32",
- #else
- "EL64",
- #endif
- 4)) {
- efi_enabled = 1;
- efi_memblock_x86_reserve_range();
- }
- #endif
copy_edd();
if (!boot_params.hdr.root_flags)
root_mountflags &= ~MS_RDONLY;
#ifdef CONFIG_CMDLINE_BOOL
#ifdef CONFIG_CMDLINE_OVERRIDE
strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
#else
if (builtin_cmdline[0]) {
/* append boot loader cmdline to builtin */
strlcat(builtin_cmdline, " ", COMMAND_LINE_SIZE);
strlcat(builtin_cmdline, boot_command_line, COMMAND_LINE_SIZE);
strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
}
#endif
#endif
strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
*cmdline_p = command_line;
parse_early_param();
memblock_x86_reserve_range_setup_data(); /* after early param, so could get panic from serial */
4. 初始化early_ioremap
- early_ioremap_init();对于early_ioremap的解释参考http://blog.chinaunix.net/space.php?uid=1701789&do=blog&id=263951
5. 调用x86_init.oem.arch_setup()。x86_init结构的初始化值在x86_init.c中定义,其具体内容如下。所以arch_setup实际执行的是x86_init_noop(). 这是一个空函数,什么都没有做。x86平台的其他非普通PC如果需要进行一些特定的初始化,可以重置x86_init的结构。
- /*
- * The platform setup functions are preset with the default functions
- * for standard PC hardware.
- */
- struct x86_init_ops x86_init __initdata = {
-
- .resources = {
- .probe_roms = x86_init_noop,
- .reserve_resources = reserve_standard_io_resources,
- .memory_setup = default_machine_specific_memory_setup,
- },
-
- .mpparse = {
- .mpc_record = x86_init_uint_noop,
- .setup_ioapic_ids = x86_init_noop,
- .mpc_apic_id = default_mpc_apic_id,
- .smp_read_mpc_oem = default_smp_read_mpc_oem,
- .mpc_oem_bus_info = default_mpc_oem_bus_info,
- .find_smp_config = default_find_smp_config,
- .get_smp_config = default_get_smp_config,
- },
-
- .irqs = {
- .pre_vector_init = init_ISA_irqs,
- .intr_init = native_init_IRQ,
- .trap_init = x86_init_noop,
- },
-
- .oem = {
- .arch_setup = x86_init_noop,
- .banner = default_banner,
- },
-
- .paging = {
- .pagetable_setup_start = native_pagetable_setup_start,
- .pagetable_setup_done = native_pagetable_setup_done,
- },
-
- .timers = {
- .setup_percpu_clockev = setup_boot_APIC_clock,
- .tsc_pre_init = x86_init_noop,
- .timer_init = hpet_time_init,
- },
-
- .iommu = {
- .iommu_init = iommu_init_noop,
- },
-
- .pci = {
- .init = x86_default_pci_init,
- .init_irq = x86_default_pci_init_irq,
- .fixup_irqs = x86_default_pci_fixup_irqs,
- },
- };
6. 初始化e820结构。对于e820的初始化介绍详见
http://blog.chinaunix.net/space.php?uid=1701789&do=blog&id=263951
- setup_memory_map();
- parse_setup_data();
- /* update the e820_saved too */
- e820_reserve_setup_data();
- finish_e820_parsing();
7. 初始化dmi ---
dmi_scan_machine();
- void __init dmi_scan_machine(void)
- {
- char __iomem *p, *q;
- int rc;
-
- if (efi_enabled) { //如果存在efi则在efi_smbios中寻找dmi
- if (efi.smbios == EFI_INVALID_TABLE_ADDR)
- goto error;
-
- /* This is called as a core_initcall() because it isn't
- * needed during early boot. This also means we can
- * iounmap the space when we're done with it.
- */
- p = dmi_ioremap(efi.smbios, 32);
- if (p == NULL)
- goto error;
-
- rc = dmi_present(p + 0x10); /* offset of _DMI_ string */ // to decode DMI table to dmi_ident table
- dmi_iounmap(p, 32);
- if (!rc) {
- dmi_available = 1;
- goto out;
- }
- }
- else {
- /*
- * no iounmap() for that ioremap(); it would be a no-op, but
- * it's so early in setup that sucker gets confused into doing
- * what it shouldn't if we actually call it.
- */
- p = dmi_ioremap(0xF0000, 0x10000); // 在非efi的BIOS中,dmi应该从 memory range 0xF0000, size 0x10000处开始寻找
- if (p == NULL)
- goto error;
-
- for (q = p; q < p + 0x10000; q += 16) { //每次步进16个字节循环搜索dmi
- rc = dmi_present(q);//查找dmi的标记并提取 dmi_num, dmi_len, dmi_base三个变量
- if (!rc) { //发现了dmi,那么就可以跳出循环了
- dmi_available = 1;
- dmi_iounmap(p, 0x10000);
- goto out;
- }
- }
- dmi_iounmap(p, 0x10000);
- }
- error:
- printk(KERN_INFO "DMI not present or invalid.\n");
- out:
- dmi_initialized = 1;
- }
8. iomem_resource. iomem_resource的定义如下, 其定义了一个从0到ffffffff的内存区间。
- struct resource iomem_resource = {
- .name = "PCI mem",
- .start = 0,
- .end = -1,
- .flags = IORESOURCE_MEM,
- };
相关于iomem_resource在setup_arch里的操作如下:
- iomem_resource.end = (1ULL << boot_cpu_data.x86_phys_bits) - 1; //iomem_resource.end=0xffffffff
-
- code_resource.start = virt_to_phys(_text);
- code_resource.end = virt_to_phys(_etext)-1;
- data_resource.start = virt_to_phys(_etext);
- data_resource.end = virt_to_phys(_edata)-1;
- bss_resource.start = virt_to_phys(&__bss_start);
- bss_resource.end = virt_to_phys(&__bss_stop)-1;
-
-
- x86_init.resources.probe_roms();// call x86_init_noop which does nothing
-
- /* after parse_early_param, so could debug it */
- //将code_resource, data_resource和bss_resource加入iomem_resource,iomem_resource是一个超集,code_resource, data_resource 和bss_resource是iomem_resource中的无不重叠的子集,这三个自己之间是平级关系,通过sibling域相互联通。
- insert_resource(&iomem_resource, &code_resource);
- insert_resource(&iomem_resource, &data_resource);
- insert_resource(&iomem_resource, &bss_resource);
9. 计算max_pfn, max_low_pfn以确定HighMemory大小。
由于kernel的实现中0xC0000000到0xFFFFFFFF之间的区域都是使用页面线性映射的方式将3G-4G逻辑地址映射到0-1G物理地址,所以内核逻辑地址的最大上限是0xFFFFFFFF。而逻辑地址在0-3G的空间是分配给应用程序使用的。而前面说过从0xFFFFFFFF往下的128M的空间被ioremap所占据用以固定内存的映射。那么实际kernel使用的空间就是从0xC0000000的896M空间。当物理内存超过896M的空间的时候,超出的物理内存将无法直接从逻辑地址到物理地址进行线性映射。所以就出现了HighMemory:超出896M的物理内存的部分就被定义为HIGHMemory.当kernel需要去使用这部分HighMemory的时候需要进行映射。
max_pfn的含义是最大的物理内存的页面号码。
max_low_pfn的含义是Kernel能够直接使用的页面号码。
HighMemory就是max_low_pfn到max_pfn之间的物理内存。
highmem_pages的含义是属于HighMemory的页面数。
max_pfn和max_low_pfn的计算方式如下:
- max_pfn = e820_end_of_ram_pfn();//已经知道e820里面是物理内存分配的情况。e820_end_of_ram_pfn通过遍历e820结构找出最大物理地址,并将其所处的页面作为max_pfn
-
- /* update e820 for memory not covered by WB MTRRs */ MTTR是X86结构中一组寄存器以指明在物理内存中的一组内存区域是被CPU所加载入Cache以加快访问速度。*/
- mtrr_bp_init();//initialize boot cpu mtrr. Actually it clean up the mtrr to make sure only Write Back cache memory range are included in mtrr memory range
- if (mtrr_trim_uncached_memory(max_pfn))//update e820 to reserve mtrr memory range
- max_pfn = e820_end_of_ram_pfn();//如果由于mtrr的原因导致e820被更新,那么重新计算max_pfn
-
- #ifdef CONFIG_X86_32
- /* max_low_pfn get updated here */
- find_low_pfn_range(); //计算max_low_pfn,只看x86_32部分。
- #else
- num_physpages = max_pfn;
-
- check_x2apic();
-
- /* How many end-of-memory variables you have, */
- /* need this before calling reserve_initrd */
- if (max_pfn > (1UL<<(32 - PAGE_SHIFT)))
- max_low_pfn = e820_end_of_low_ram_pfn();
- else
- max_low_pfn = max_pfn;
-
- high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
- #endif
find_low_pfn_range()的定义如下:
- void __init find_low_pfn_range(void)
- {
- /* it could update max_pfn */
-
- if (max_pfn <= MAXMEM_PFN)
- lowmem_pfn_init();//如果物理内存小于MAXMEM
- else
- highmem_pfn_init();//如果物理内存大于MAXMEM
- }
先来看MAXMEM_PFN的定义:
- #define MAXMEM_PFN PFN_DOWN(MAXMEM)
- #define MAXMEM (VMALLOC_END - PAGE_OFFSET - __VMALLOC_RESERVE)
- #ifdef CONFIG_X86_PAE
- #define LAST_PKMAP 512
- #else
- #define LAST_PKMAP 1024
- #endif
-
- #define PKMAP_BASE ((FIXADDR_BOOT_START - PAGE_SIZE * (LAST_PKMAP + 1)) \
- & PMD_MASK)
-
- #ifdef CONFIG_HIGHMEM
- # define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE)
- #else
- # define VMALLOC_END (FIXADDR_START - 2 * PAGE_SIZE)
- #endif
- unsigned int __VMALLOC_RESERVE = 128 << 20;
可以看到MAXMEM是由VMALLOC_END, PAGE_OFFSET和VMALLOC_RESERVE决定的。PAGE_OFFSET是0xC0000000。
从以上的定义可以看到,从0xFFFFFFFF的最高地址往下,有FIXADDR固定映射区,PKMAP区和VMALLOC区。在这三个区域以下是kernel可以使用自由访问的物理内存空间。
当max_pfn小于MAXMEM_PFN的时候,即是所具备的实际物理内存数小于Kernel所能自由访问的物理内存数时,HighMemory的存在与否实际取决于传给kernel的命令行highmem=x的参数。如果没有传入该参数,highmemory将不存在。如果存在该参数,highmemory的将在lowmem_pfn_init里被设置。
- void __init lowmem_pfn_init(void)
- {
- /* max_low_pfn is 0, we already have early_res support */
- max_low_pfn = max_pfn;
-
- if (highmem_pages == -1) //highmem_pages在parse_highmem()设置
- highmem_pages = 0;
- #ifdef CONFIG_HIGHMEM
- if (highmem_pages >= max_pfn) { //如果命令行要求的highmem大于实际内存数,就放弃设置highmem
- printk(KERN_ERR MSG_HIGHMEM_TOO_BIG,
- pages_to_mb(highmem_pages), pages_to_mb(max_pfn));
- highmem_pages = 0;
- }
- if (highmem_pages) {
- if (max_low_pfn - highmem_pages < 64*1024*1024/PAGE_SIZE) {//如果需要的highmem小于实际内存数,但是物理内存数减去所需的highmem之后,给kernel留下的空间不足64M,则放弃设置highmem.
- printk(KERN_ERR MSG_LOWMEM_TOO_SMALL,
- pages_to_mb(highmem_pages));
- highmem_pages = 0;
- }
- max_low_pfn -= highmem_pages;//根据highmem_pages所要求的数目调整max_low_pfn。
- }
- #else
- if (highmem_pages)
- printk(KERN_ERR "ignoring highmem size on non-highmem kernel!\n");
- #endif
- }
当max_pfn大于MAXMEM_PFN,即实际存在的物理内存大于kernel能够直接访问的物理内存时,为了能够访问其余的内存,就需要设置HighMemory。highmem_pfn_init()进行了设置。
- /*
- * We have more RAM than fits into lowmem - we try to put it into
- * highmem, also taking the highmem=x boot parameter into account:
- */
- void __init highmem_pfn_init(void)
- {
- max_low_pfn = MAXMEM_PFN;
-
- if (highmem_pages == -1)//如果命令行没有设置highmem_pages,那么默认highmem_pages就是实际物理内存数减去kernel可直接访问的最大物理内存。
- highmem_pages = max_pfn - MAXMEM_PFN;
-
- if (highmem_pages + MAXMEM_PFN < max_pfn)//如果命令行所要求的highmem+MAXMEM小于实际存在的物理内存数,那么实际可用的物理内存数目就要相应减少。
- max_pfn = MAXMEM_PFN + highmem_pages;
-
- if (highmem_pages + MAXMEM_PFN > max_pfn) {//如果所需的highmem+MAXMEM大于实际存在的物理内存数。则不配置highmem
- printk(KERN_WARNING MSG_HIGHMEM_TOO_SMALL,
- pages_to_mb(max_pfn - MAXMEM_PFN),
- pages_to_mb(highmem_pages));
- highmem_pages = 0;
- }
- #ifndef CONFIG_HIGHMEM
- /* Maximum memory usable is what is directly addressable */
- printk(KERN_WARNING "Warning only %ldMB will be used.\n", MAXMEM>>20);
- if (max_pfn > MAX_NONPAE_PFN)
- printk(KERN_WARNING "Use a HIGHMEM64G enabled kernel.\n");
- else
- printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
- max_pfn = MAXMEM_PFN;//如果编译kernel时没有定义CONFIG_HIGHMEM,就应该把max_pfn从实际物理内存数改变成为MAXMEM从而取消HIGHMEM
- #else /* !CONFIG_HIGHMEM */
- #ifndef CONFIG_HIGHMEM64G
- if (max_pfn > MAX_NONPAE_PFN) {//如果编译kernel时没有打开PAE以打开CPU的PAE寻址,那么最大可用内存不可超过4G.
- max_pfn = MAX_NONPAE_PFN;
- printk(KERN_WARNING MSG_HIGHMEM_TRIMMED);
- }
- #endif /* !CONFIG_HIGHMEM64G */
- #endif /* !CONFIG_HIGHMEM */
- }
10. 继续分配和预留内存空间。
- /*
- * Need to conclude brk, before memblock_x86_fill()
- * it could use memblock_find_in_range, could overlap with
- * brk area.
- */
- reserve_brk(); //将brk段保留在memblock.reserved里
-
- memblock.current_limit = get_max_mapped(); //对于x86_32系统来说,max_pfn_mapped在head_32.S里面被初始化。 详见http://blog.chinaunix.net/space.php?uid=1701789&do=blog&id=154125
- memblock_x86_fill();//将e820中的E820_RAM类型和E820_RESERVED_KERN类型的内存区域加入到memblock.memory中
-
- /* preallocate 4k for mptable mpc */
- early_reserve_e820_mpc_new();//在memblock中找到一块4K的空闲内存以供mptable使用。并更新memblock和e820反映这个变化。
-
- #ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
- setup_bios_corruption_check();
- #endif
-
- printk(KERN_DEBUG "initial memory mapped : 0 - %08lx\n",
- max_pfn_mapped<<PAGE_SHIFT);
-
- reserve_trampoline_memory();
-
- #ifdef CONFIG_ACPI_SLEEP
- /*
- * Reserve low memory region for sleep support.
- * even before init_memory_mapping
- */
- acpi_reserve_wakeup_memory();//在memblock中保留(wakeup_code_end-wakeup_code_start)大小的空间为ACPI S3代码预留空间
- #endif
-
11. 继续初始化页表。页表在head32.S里面已经进行了一部分的初始化。但是在head32.S中,页表的初始化并没有覆盖所有的内存空间,而是只覆盖了_end+MAPPING_BEYOND_END。这里就对页面继续进行初始化
- init_gbpages();//这个函数的定义是空的。没有任何的操作。
-
- /* max_pfn_mapped is updated here */
- max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);//将0-max_low_pfn之间的页面映射到页表。
- max_pfn_mapped = max_low_pfn_mapped;
-
- #ifdef CONFIG_X86_64
- if (max_pfn > max_low_pfn) {
- max_pfn_mapped = init_memory_mapping(1UL<<32,
- max_pfn<<PAGE_SHIFT);
- /* can we preseve max_low_pfn ?*/
- max_low_pfn = max_pfn;
- }
- #endif
- memblock.current_limit = get_max_mapped();
init_memory_mapping的定义如下。
- /*
- * Setup the direct mapping of the physical memory at PAGE_OFFSET.
- * This runs before bootmem is initialized and gets pages directly from
- * the physical memory. To access them they are temporarily mapped.
- */
- unsigned long __init_refok init_memory_mapping(unsigned long start,
- unsigned long end)
- {
- unsigned long page_size_mask = 0;
- unsigned long start_pfn, end_pfn;
- unsigned long ret = 0;
- unsigned long pos;
-
- struct map_range mr[NR_RANGE_MR];
- int nr_range, i;
- int use_pse, use_gbpages;
-
- printk(KERN_INFO "init_memory_mapping: %016lx-%016lx\n", start, end);
-
- #if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK)
- /*
- * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages.
- * This will simplify cpa(), which otherwise needs to support splitting
- * large pages into small in interrupt context, etc.
- */
- use_pse = use_gbpages = 0;
- #else
- use_pse = cpu_has_pse;
- use_gbpages = direct_gbpages;
- #endif
-
- /* Enable PSE if available */
- if (cpu_has_pse)//如果CPU支持PSE,就设置CR4中的PSE位
- set_in_cr4(X86_CR4_PSE);
-
- /* Enable PGE if available */
- if (cpu_has_pge) {//如果CPU支持PGE,就设置CR4中的PGE位
- set_in_cr4(X86_CR4_PGE);
- __supported_pte_mask |= _PAGE_GLOBAL;
- }
-
- if (use_gbpages)//如果使用1GB大小的页面,就设置PG_LEVEL_1G的位在page_size_mask里
- page_size_mask |= 1 << PG_LEVEL_1G;
- if (use_pse)//如果使用PSE那么就设置PG_LEVEL_2M位在page_size_mask中
- page_size_mask |= 1 << PG_LEVEL_2M;
-
- memset(mr, 0, sizeof(mr)); //清空mr
- nr_range = 0;
-
- /* head if not big page alignment ? */
- start_pfn = start >> PAGE_SHIFT;//按照start的地址计算start_pfn
- pos = start_pfn << PAGE_SHIFT; //pos就是start_pfn的起始地址
- #ifdef CONFIG_X86_32
- /*
- * Don't use a large page for the first 2/4MB of memory
- * because there are often fixed size MTRRs in there
- * and overlapping MTRRs into large pages can cause
- * slowdowns.
- */
- /*在只考虑32寻址能力而不考虑PAE的X86系统上,PAGE_SHIFT=12, PMD_SHIFT=PUD_SHIFT=PGDIR_SHIFT=22,即名义上Linux使用4级页面转换机制,而实际上只使用2级页面转换机制
- if (pos == 0)
- end_pfn = 1<<(PMD_SHIFT - PAGE_SHIFT);//end_pfn=(1<<PMD_SHIFT)>>PAGE_SHIFT。其含义是start_pfn和end_pfn代表了第一个PMD项所指向的页表所指向的内存区域0-4M。
- else
- end_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT)
- << (PMD_SHIFT - PAGE_SHIFT);//如果start不是从第0个page作为起点,那么start_pfn和end_pfn就应该指向start_pfn为起始的那个PMD项所指向的整个页表所代表的4M空间。
- #else /* CONFIG_X86_64 */
- end_pfn = ((pos + (PMD_SIZE - 1)) >> PMD_SHIFT)
- << (PMD_SHIFT - PAGE_SHIFT);
- #endif
- if (end_pfn > (end >> PAGE_SHIFT))
- end_pfn = end >> PAGE_SHIFT;
- if (start_pfn < end_pfn) {
- nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);//保存start_pfn和end_pfn在mr中
- pos = end_pfn << PAGE_SHIFT;
- }
-
- /* big page (2M) range */
- start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT)
- << (PMD_SHIFT - PAGE_SHIFT); //start_pfn指向上个range的end_pfn。理想状态下pos+PMD_SIZE-1不会造成对于PMD_SIZE的进位,所以start_pfn==end_pfn
- #ifdef CONFIG_X86_32
- end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT);//对于X86_32来说,这个range的end_pfn就是指向end地址的PMD的页面号
- #else /* CONFIG_X86_64 */
- end_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT)
- << (PUD_SHIFT - PAGE_SHIFT);
- if (end_pfn > ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT)))
- end_pfn = ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT));
- #endif
-
- if (start_pfn < end_pfn) {
- nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
- page_size_mask & (1<<PG_LEVEL_2M));//将start_pfn,end_pfn加入mr,同时指出该区域如果可能的话使用2M的页
- pos = end_pfn << PAGE_SHIFT;
- }
-
- #ifdef CONFIG_X86_64
- /* big page (1G) range */
- start_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT)
- << (PUD_SHIFT - PAGE_SHIFT);
- end_pfn = (end >> PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT);
- if (start_pfn < end_pfn) {
- nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
- page_size_mask &
- ((1<<PG_LEVEL_2M)|(1<<PG_LEVEL_1G)));
- pos = end_pfn << PAGE_SHIFT;
- }
-
- /* tail is not big page (1G) alignment */
- start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT)
- << (PMD_SHIFT - PAGE_SHIFT);
- end_pfn = (end >> PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT);
- if (start_pfn < end_pfn) {
- nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
- page_size_mask & (1<<PG_LEVEL_2M));
- pos = end_pfn << PAGE_SHIFT;
- }
- #endif
-
- /* tail is not big page (2M) alignment */
- start_pfn = pos>>PAGE_SHIFT;
- end_pfn = end>>PAGE_SHIFT;
- nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);//建立第三个memory range,将最后的未被(endL>>PMD_SHIFT<<(PMD_SHIFT-PAGE_SHIFT)除尽的内存进行
-
- /* try to merge same page size and continuous *//如果三个memory range都使用4K的页面,那么就合并。
- for (i = 0; nr
- _range > 1 & & i < nr_range - 1 ; i + + ) {
- unsigned long old_start;
- if (mr[i].end != mr[i+1].start ||
- mr[i].page_size_mask != mr[i+1].page_size_mask)
- continue;
- /* move it */
- old_start = mr[i].start;
- memmove(&mr[i], &mr[i+1],
- (nr_range - 1 - i) * sizeof(struct map_range));
- mr[i--].start = old_start;
- nr_range--;
- }
-
- //OK now, there is no overlap between every range in mr
- for (i = 0; i < nr_range; i++)
- printk(KERN_DEBUG " %010lx - %010lx page %s\n",
- mr[i].start, mr[i].end,
- (mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":(
- (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k"));
-
- /*
- * Find space for the kernel direct mapping tables.
- *
- * Later we should allocate these tables in the local node of the
- * memory mapped. Unfortunately this is done currently before the
- * nodes are discovered.
- */
- if (!after_bootmem)
- find_early_table_space(end, use_pse, use_gbpages); //寻找一块连续的空间能够存放所有PUD, PMD, PT表。全局变量e820_table_start用来描述这段内存的开始地址,e820_table_end用来描述这块内存已经使用了的最大地址。e820_table_top用来描述这段内存区域的结束地址。
-
- for (i = 0; i < nr_range; i++) //真正建立pgd, pt的戏码在这里呢。
- ret = kernel_physical_mapping_init(mr[i].start, mr[i].end,
- mr[i].page_size_mask);
-
- #ifdef CONFIG_X86_32
- early_ioremap_page_table_range_init();//在新pgd, pmd, pt的结构下刷新固定内存映射。
-
- load_cr3(swapper_pg_dir);//启用新的pgd, pmd, pt
- #endif
-
- #ifdef CONFIG_X86_64
- if (!after_bootmem && !start) {
- pud_t *pud;
- pmd_t *pmd;
-
- mmu_cr4_features = read_cr4();
-
- /*
- * _brk_end cannot change anymore, but it and _end may be
- * located on different 2M pages. cleanup_highmap(), however,
- * can only consider _end when it runs, so destroy any
- * mappings beyond _brk_end here.
- */
- pud = pud_offset(pgd_offset_k(_brk_end), _brk_end);
- pmd = pmd_offset(pud, _brk_end - 1);
- while (++pmd <= pmd_offset(pud, (unsigned long)_end - 1))
- pmd_clear(pmd);
- }
- #endif
- __flush_tlb_all();
-
- if (!after_bootmem && e820_table_end > e820_table_start)
- memblock_x86_reserve_range(e820_table_start << PAGE_SHIFT,
- e820_table_end << PAGE_SHIFT, "PGTABLE");//将pgd, pud, pmd, pt所占据的空间加入memblock.reserve中
-
- if (!after_bootmem)
- early_memtest(start, end);
-
- return ret >> PAGE_SHIFT;
- }
再来看一下实际建立2级页表的过程。
- /*
- * This maps the physical memory to kernel virtual address space, a total
- * of max_low_pfn pages, by creating page tables starting from address
- * PAGE_OFFSET:
- */
- unsigned long __init
- kernel_physical_mapping_init(unsigned long start,
- unsigned long end,
- unsigned long page_size_mask)
- {
- int use_pse = page_size_mask == (1<<PG_LEVEL_2M);
- unsigned long last_map_addr = end;
- unsigned long start_pfn, end_pfn;
- pgd_t *pgd_base = swapper_pg_dir;
- int pgd_idx, pmd_idx, pte_ofs;
- unsigned long pfn;
- pgd_t *pgd;
- pmd_t *pmd;
- pte_t *pte;
- unsigned pages_2m, pages_4k;
- int mapping_iter;
-
- start_pfn = start >> PAGE_SHIFT;
- end_pfn = end >> PAGE_SHIFT;
-
- /*
- * First iteration will setup identity mapping using large/small pages
- * based on use_pse, with other attributes same as set by
- * the early code in head_32.S
- *
- * Second iteration will setup the appropriate attributes (NX, GLOBAL..)
- * as desired for the kernel identity mapping.
- *
- * This two pass mechanism conforms to the TLB app note which says:
- *
- * "Software should not write to a paging-structure entry in a way
- * that would change, for any linear address, both the page size
- * and either the page frame or attributes."
- */
- mapping_iter = 1;
-
- if (!cpu_has_pse)
- use_pse = 0;
-
- repeat:
- pages_2m = pages_4k = 0;
- pfn = start_pfn;
//pgd, pmd, pte实现的是0xC0000000以上的虚拟地址转换成0x00000000的物理地址,所以当pgd, pmd, pte建立起来之后将不需要使用pa, va来进行虚拟地址和物理地址的转换。而对于现在的mapping建立,由于pfn是基于物理地址的,所以pgd_idx在计算pgd entry的时候需要加上PAGE_OFFSET.
- pgd_idx = pgd_index((pfn<<PAGE_SHIFT) + PAGE_OFFSET);
- pgd = pgd_base + pgd_idx;
- //以下的循环建立了所有的pgd entry
- for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) {
- pmd = one_md_table_init(pgd);//在e820_table中获得一个页面作为pmd table.如果已经对相应pgd存在一个则返回该pmd的地址。
-
- if (pfn >= end_pfn)
- continue;
- #ifdef CONFIG_X86_PAE
- pmd_idx = pmd_index((pfn<<PAGE_SHIFT) + PAGE_OFFSET);
- pmd += pmd_idx;
- #else
- pmd_idx = 0;
- #endif
- //以下循环为每一个pgd entry即pmd中的pmd entry建立page table.
- for (; pmd_idx < PTRS_PER_PMD && pfn < end_pfn;
- pmd++, pmd_idx++) {
- unsigned int addr = pfn * PAGE_SIZE + PAGE_OFFSET; //addr是给定pfn的起始地址
-
- /*
- * Map with big pages if possible, otherwise
- * create normal page tables:
- */
- if (use_pse) { //如果使用了2M的page
- unsigned int addr2;
- pgprot_t prot = PAGE_KERNEL_LARGE;//该page的属性是PAGE_KERNEL_LARGE
- /*
- * first pass will use the same initial
- * identity mapping attribute + _PAGE_PSE.
- */
- pgprot_t init_prot =
- __pgprot(PTE_IDENT_ATTR |
- _PAGE_PSE);
-
- addr2 = (pfn + PTRS_PER_PTE-1) * PAGE_SIZE +
- PAGE_OFFSET + PAGE_SIZE-1;
//如果addr和addr2都落在kernel_text的内存空间里面,则我们可以把这个page设为PAGE_KERNEL_LARGE_EXEC.这里addr2是:假设该page是2M的,该2M空间的结束地址是addr2.
- if (is_kernel_text(addr) ||
- is_kernel_text(addr2))
- prot = PAGE_KERNEL_LARGE_EXEC;//如果开始和结束都是在kernel_text之内,则设置页面属性为PAGE_KERNEL_LARGE_EXEC.
-
- pages_2m++; //pages_2m是对于使用2M page的计数
- //将pmd和物理pfn相映射以设置pmd entry
- if (mapping_iter == 1)
- set_pmd(pmd, pfn_pmd(pfn, init_prot));
- else
- set_pmd(pmd, pfn_pmd(pfn, prot));
-
- pfn += PTRS_PER_PTE;
- continue; //在2M page的情况下我们就不需要page table了吗?地址转换就变成了pgd->pmd?
- }
- pte = one_page_table_init(pmd); //在e820_table里面获得一个页面作为page table
-
- pte_ofs = pte_index((pfn<<PAGE_SHIFT) + PAGE_OFFSET);
- pte += pte_ofs; //计算处给定pfn对应的page table entry的位置。
- //以下循环设置了page table中每一个page table entry.
- for (; pte_ofs < PTRS_PER_PTE && pfn < end_pfn;
- pte++, pfn++, pte_ofs++, addr += PAGE_SIZE) {
- pgprot_t prot = PAGE_KERNEL;
- /*
- * first pass will use the same initial
- * identity mapping attribute.
- */
- pgprot_t init_prot = __pgprot(PTE_IDENT_ATTR);
-
- if (is_kernel_text(addr))
- prot = PAGE_KERNEL_EXEC;
-
- pages_4k++;
//设置page table entry, 将pte与物理pfn相映射
- if (mapping_iter == 1) {
- set_pte(pte, pfn_pte(pfn, init_prot));
- last_map_addr = (pfn << PAGE_SHIFT) + PAGE_SIZE;
- } else
- set_pte(pte, pfn_pte(pfn, prot));
- }
- }
- }
//这是我所不能理解的,为什么会两次对pte的属性进行设置,而不是一次完成呢?
- if (mapping_iter == 1) {
- /*
- * update direct mapping page count only in the first
- * iteration.
- */
- update_page_count(PG_LEVEL_2M, pages_2m);
- update_page_count(PG_LEVEL_4K, pages_4k);
-
- /*
- * local global flush tlb, which will flush the previous
- * mappings present in both small and large page TLB's.
- */
- __flush_tlb_all();
-
- /*
- * Second iteration will set the actual desired PTE attributes.
- */
- mapping_iter = 2;
- goto repeat;
- }
- return last_map_addr;
- }
12. 接下来是一些杂七杂八的初始化:
- reserve_initrd();//如果需要的话,将initrd移动位置。
-
- reserve_crashkernel();//如果在启动命令行指定了crashkernel大小,就在memblock.memory中划分一块内存作为crashkernel。
-
- vsmp_init(); //这个函数只在x64系统中使用。暂时不需要看。
-
- io_delay_init(); //这个函数检查BIOS中的dmi,如果DMI_BOARD_VENDOR, DMI_BOARD_NAME匹配,就调用dmi_io_delay_0xed_port。只有HP的特定机型会匹配DMI_BOARD_VENDOR和DMI_BOARD_NAME
-
- /*
- * Parse the ACPI tables for possible boot-time SMP configuration.
- */
- acpi_boot_table_init();//初始化acpi table,最终的结果放在全局变量里initial_tables
-
- early_acpi_boot_init(); //Process the Multiple APIC Description Table (MADT)
13. initmem_init(). initmem_init的作用就在于建立了mem_section的概念。
- #ifndef CONFIG_NEED_MULTIPLE_NODES
- void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
- int acpi, int k8)
- {
- #ifdef CONFIG_HIGHMEM
- highstart_pfn = highend_pfn = max_pfn;
- if (max_pfn > max_low_pfn)
- highstart_pfn = max_low_pfn;
- memblock_x86_register_active_regions(0, 0, highend_pfn);//建立early_node_map, 将0-highend_pfn的空间(0-最大实际物理内存页码)放入到early_node_map里,并且其nodeid是0。实际上内核按照当前memblock.memory的map状况将0-highend_pfn之间的memory range放入到early_node_map。
- sparse_memory_present_with_active_regions(0);//将放入early_node_map中并且nodeid是0的memory region的page放入另外一个mem_section的数组中。这里引入了mem_section的概念,其引入这个概念的目的可能是内存分配更加高效。kernel中一个mem_section包含了物理地址相连的固定数目个page。在x86系统中,如果没有PAE的状况下, 一个mem_section可以包含16384个page。所以从一个给定地址,我们可以得到它所在的page,也能得到它所在的mem_section的下标。而sparse_memory_present_with_active_regions的作用就是为给定node所拥有的物理page建立mem_sections.
- printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
- pages_to_mb(highend_pfn - highstart_pfn));
- num_physpages = highend_pfn;
- high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; //high_memory就是指向highmemory起始的虚拟地址。
- #else
- memblock_x86_register_active_regions(0, 0, max_low_pfn);
- sparse_memory_present_with_active_regions(0);
- num_physpages = max_low_pfn;
- high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
- #endif
- #ifdef CONFIG_FLATMEM
- max_mapnr = num_physpages;
- #endif
- __vmalloc_start_set = true;
-
- printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
- pages_to_mb(max_low_pfn));
-
- setup_bootmem_allocator();//将after_bootmem置1,标志我们已经完成了bootmem初始化.
- }
- #endif /* !CONFIG_NEED_MULTIPLE_NODES */
13. 预留DMA空间
- memblock_find_dma_reserve();// 0-16M的物理内存空间是DMA可以使用的空间,这个函数意义是找出0-16M空间中有多大的内存已经被memblock.reserved保留而无法分配给后续的DMA操作。
- dma32_reserve_bootmem(); //在x86_32上是空函数
14. paging_init进行kmap初始化,sparse memory初始化和zone初始化。
- x86_init.paging.pagetable_setup_start(swapper_pg_dir); //call native_pagetable_setup_start.实际上是把max_low_pfn以上的可能存在的pte进行清空
- paging_init();
- x86_init.paging.pagetable_setup_done(swapper_pg_dir); //call native_pagetable_setup_done。实际是空函数。
在paging_init以前,已经建立的页表包括0xC000000-max_low_pfn,FIXMAP。而在paging_init里,对kmap进行了页表映射。
- void __init paging_init(void)
- {
- pagetable_init();//这个函数相对简单,其作用是在swapper_pg_dir所指向的pgd中建立从PKMAP_BASE到PKMAP_BASE+PAGE_SIZE*LASTPKMAP页映射所需要的pgd, pmd, pud, pte.PKMAP_BASE到PKMAP_BASE+PAGE_SIZE*LASTKMAP指向空间是在FIXMAP之下的LASTP_KMAP个页面空间。其为kmap建立一个固定的内存映射区域,为kernel访问1G以上的物理空间服务。
-
- __flush_tlb_all();
-
- kmap_init();//根据enum fixed_addresses中的FIX_KMAP_BEGIN和FIX_KMAP_END的定义在FIXMAP中为KMAP建立一个映射区域,该区域所能映射的page个数以及每个page所使用的目的在enum km_type中定义。每个CPU核心都有一套kmap page相对应。
- /*#ifdef CONFIG_X86_32
- FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */
- FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
- #ifdef CONFIG_PCI_MMCONFIG
-
- enum km_type {
- KM_BOUNCE_READ,
- KM_SKB_SUNRPC_DATA,
- KM_SKB_DATA_SOFTIRQ,
- KM_USER0,
- KM_USER1,
- KM_BH_IRQ,
- KM_SOFTIRQ0,
- KM_SOFTIRQ1,
- KM_TYPE_NR
- };
- */
-
- /*
- * NOTE: at this point the bootmem allocator is fully available.
- */
- sparse_init(); //查了一下,sparse memory和热插拔memory相关,一般使用在server和SMP上,在这种环境下,memory dimm的地址是不连续的,并且在插拔dimm的时候也不会存在相同memory地址的变化,所以在kernel里面必须有能力处理这种非连续性的内存地址。sparse memory的作用就在这里。这部分先掠过不看
- zone_sizes_init();//初始化zone
- }
再看一下zone_sizes_init到底做了点什么:
- static void __init zone_sizes_init(void)
- {
- unsigned long max_zone_pfns[MAX_NR_ZONES];
- memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
- max_zone_pfns[ZONE_DMA] =
- virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; //dma zone is 0-16M
- max_zone_pfns[ZONE_NORMAL] = max_low_pfn; //zone normal is 16M-max_low_pfn
- #ifdef CONFIG_HIGHMEM
- max_zone_pfns[ZONE_HIGHMEM] = highend_pfn; //HIGHMEM Zone is max_low_pfn -highend_pfn
- #endif
-
- free_area_init_nodes(max_zone_pfns);//按照以上对于max_zone_pfns的设定进行zone的初始化
- }
好了,不得不再看看free_area_init_nodes在干些什么:
- /**
- * free_area_init_nodes - Initialise all pg_data_t and zone data
- * @max_zone_pfn: an array of max PFNs for each zone
- *
- * This will call free_area_init_node() for each active node in the system.
- * Using the page ranges provided by add_active_range(), the size of each
- * zone in each node and their holes is calculated. If the maximum PFN
- * between two adjacent zones match, it is assumed that the zone is empty.
- * For example, if arch_max_dma_pfn == arch_max_dma32_pfn, it is assumed
- * that arch_max_dma32_pfn has no pages. It is also assumed that a zone
- * starts where the previous one ended. For example, ZONE_DMA32 starts
- * at arch_max_dma_pfn.
- */
- void __init free_area_init_nodes(unsigned long *max_zone_pfn)
- {
- unsigned long nid;
- int i;
-
- /* Sort early_node_map as initialisation assumes it is sorted */
- sort_node_map();
-
- /* Record where the zone boundaries are */
- /*以下的代码对于全局变量arch_zone_lowest_possible_pfn[]
- 和arch_zone_highest_possible_pfn进行设置,以确定每个zone的lowest_pfn和highest_pfn.依次初始化设定ZONE_DMA, ZONE_Normal, ZONE_HIGHMEM的lowest pfn和highest pfn.并且对于ZONE_MOVABLE的lowest pfn和highest pfn设为9.
- */
- memset(arch_zone_lowest_possible_pfn, 0,
- sizeof(arch_zone_lowest_possible_pfn));
- memset(arch_zone_highest_possible_pfn, 0,
- sizeof(arch_zone_highest_possible_pfn));
- arch_zone_lowest_possible_pfn[0] = find_min_pfn_with_active_regions();//find lowest pfn of physical memory can be used. here it is 0
- arch_zone_highest_possible_pfn[0] = max_zone_pfn[0]; //it refer to MAX_DMA_ADDRESS
- for (i = 1; i < MAX_NR_ZONES; i++) {
- if (i == ZONE_MOVABLE)
- continue;
- arch_zone_lowest_possible_pfn[i] =
- arch_zone_highest_possible_pfn[i-1]; //previous higest pfn is the lowest pfn in the next region
- arch_zone_highest_possible_pfn[i] =
- max(max_zone_pfn[i], arch_zone_lowest_possible_pfn[i]);//highest pfn is max_zone_pfn[i]
- }
- arch_zone_lowest_possible_pfn[ZONE_MOVABLE] = 0;
- arch_zone_highest_possible_pfn[ZONE_MOVABLE] = 0; //now arch_zone[ZONE_MOVABLE] is empty
-
- /* Find the PFNs that ZONE_MOVABLE begins at in each node */
- /* 为每一个cpu node建立zone_movable_pfn。ZONE_MOVABLE是在kernel启动时由命令行传入的参数,意义在于指明内核空间中哪些page是可以移动的,其他的内核page则称为kernel core,是不可以移动的。find_zone_movable_pfns_for_nodes的作用就是按照early_node_map根据每个node的不同内存分布计算出每一个node中movable page的数量
- */
- memset(zone_movable_pfn, 0, sizeof(zone_movable_pfn));
- find_zone_movable_pfns_for_nodes(zone_movable_pfn); //
-
- /* Print out the zone ranges */
- printk("Zone PFN ranges:\n");
- for (i = 0; i < MAX_NR_ZONES; i++) {
- if (i == ZONE_MOVABLE)
- continue;
- printk(" %-8s ", zone_names[i]);
- if (arch_zone_lowest_possible_pfn[i] ==
- arch_zone_highest_possible_pfn[i])
- printk("empty\n");
- else
- printk("%0#10lx -> %0#10lx\n",
- arch_zone_lowest_possible_pfn[i],
- arch_zone_highest_possible_pfn[i]);
- }
-
- /* Print out the PFNs ZONE_MOVABLE begins at in each node */
- printk("Movable zone start PFN for each node\n");
- for (i = 0; i < MAX_NUMNODES; i++) {
- if (zone_movable_pfn[i])
- printk(" Node %d: %lu\n", i, zone_movable_pfn[i]);
- }
-
- /* Print out the early_node_map[] */
- printk("early_node_map[%d] active PFN ranges\n", nr_nodemap_entries);
- for (i = 0; i < nr_nodemap_entries; i++)
- printk(" %3d: %0#10lx -> %0#10lx\n", early_node_map[i].nid,
- early_node_map[i].start_pfn,
- early_node_map[i].end_pfn);
-
- /* Initialise every node */
- mminit_verify_pageflags_layout();
- setup_nr_node_ids();
/*以下的for是按照early_node_map和每一个zone的内存分配建立全局变量node_data[]
*/
- for_each_online_node(nid) {
- pg_data_t *pgdat = NODE_DATA(nid);//对于NUMA来说每一个node都有一个pg_data_t结构描述该node对应的内存分配状况,在单CPU的环境下,只有一个node存在
- free_area_init_node(nid, NULL,
- find_min_pfn_for_node(nid), NULL);//对于每个node,初始化其memory分配。具体如何分配看以下的代码
-
- /* Any memory on that node */
- if (pgdat->node_present_pages)
- node_set_state(nid, N_HIGH_MEMORY);
- check_for_regular_memory(pgdat);
- }
- }
- void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
- unsigned long node_start_pfn, unsigned long *zholes_size)
- {
- pg_data_t *pgdat = NODE_DATA(nid);
-
- pgdat->node_id = nid;
- pgdat->node_start_pfn = node_start_pfn;//该node的开始页面号
- calculate_node_totalpages(pgdat, zones_size, zholes_size);//计算对于该node来说有多少pages可用。分别设置了pgdat->node_spanned_pages为总共算上hole有多少page,pgdat->node_present_pages为不计hole有多少实际可用的page
-
- alloc_node_mem_map(pgdat);//为node在memblock.memory或者bootmem中分配一块大小为pgdat->node_spanned_pages大小的内存,并把内存的起始地址放置在pgdat->node_mem_map中。该分配的原则是,如果kernel编译过程中没有使用bootmem,则在memblock.memory中找寻一块内存区域并且该内存区域应该归该node所有,即early_node_map[]的nid是该node.
- #ifdef CONFIG_FLAT_NODE_MEM_MAP
- printk(KERN_DEBUG "free_area_init_node: node %d, pgdat %08lx, node_mem_map %08lx\n",
- nid, (unsigned long)pgdat,
- (unsigned long)pgdat->node_mem_map);
- #endif
-
- free_area_init_core(pgdat, zones_size, zholes_size);
- }