在前面我们讨论了内存节点node_mem_map成员的初始化。了解到此成员其实是以struct page为单位的数组,数组大小跟此节点内存大小相关,那么这些描述符是何时初始化
呢,当前主要是在mem_init函数进行设置。
start_kernel() --> mm_init --> mem_init()
需要注意的是mem_init是跟体系结构相关的函数,比如针对ARM64的实现函数:
/* * mem_init() marks the free areas in the mem_map and tells us how much memory * is free. This is done after various parts of the system have claimed their * memory after the kernel image. */ void __init mem_init(void) { if (swiotlb_force == SWIOTLB_FORCE || max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT)) swiotlb_init(1); else swiotlb_force = SWIOTLB_NO_FORCE;
这一步是设置最大的pfn号,超出这个最大的pfn是无效的。
set_max_mapnr(pfn_to_page(max_pfn) - mem_map);
#ifndef CONFIG_SPARSEMEM_VMEMMAP free_unused_memmap(); #endif
下面这一步非常非常非常非常非常重要,其把启动过程中所有空闲内存释放到伙伴系统中去。 /* this will put all unused low memory onto the freelists */ free_all_bootmem();
mem_init_print_info(NULL); 打印一些信息。
#define MLK(b, t) b, t, ((t) - (b)) >> 10 #define MLM(b, t) b, t, ((t) - (b)) >> 20 #define MLG(b, t) b, t, ((t) - (b)) >> 30 #define MLK_ROUNDUP(b, t) b, t, DIV_ROUND_UP(((t) - (b)), SZ_1K)
pr_notice("Virtual kernel memory layout:\n"); #ifdef CONFIG_KASAN pr_notice(" kasan : 0x%16lx - 0x%16lx (%6ld GB)\n", MLG(KASAN_SHADOW_START, KASAN_SHADOW_END)); #endif pr_notice(" modules : 0x%16lx - 0x%16lx (%6ld MB)\n", MLM(MODULES_VADDR, MODULES_END)); pr_notice(" vmalloc : 0x%16lx - 0x%16lx (%6ld GB)\n", MLG(VMALLOC_START, VMALLOC_END)); pr_notice(" .text : 0x%p" " - 0x%p" " (%6ld KB)\n", MLK_ROUNDUP(_text, _etext)); pr_notice(" .rodata : 0x%p" " - 0x%p" " (%6ld KB)\n", MLK_ROUNDUP(__start_rodata, __init_begin)); pr_notice(" .init : 0x%p" " - 0x%p" " (%6ld KB)\n", MLK_ROUNDUP(__init_begin, __init_end)); pr_notice(" .data : 0x%p" " - 0x%p" " (%6ld KB)\n", MLK_ROUNDUP(_sdata, _edata)); pr_notice(" .bss : 0x%p" " - 0x%p" " (%6ld KB)\n", MLK_ROUNDUP(__bss_start, __bss_stop)); pr_notice(" fixed : 0x%16lx - 0x%16lx (%6ld KB)\n", MLK(FIXADDR_START, FIXADDR_TOP)); pr_notice(" PCI I/O : 0x%16lx - 0x%16lx (%6ld MB)\n", MLM(PCI_IO_START, PCI_IO_END)); #ifdef CONFIG_SPARSEMEM_VMEMMAP pr_notice(" vmemmap : 0x%16lx - 0x%16lx (%6ld GB maximum)\n", MLG(VMEMMAP_START, VMEMMAP_START + VMEMMAP_SIZE)); pr_notice(" 0x%16lx - 0x%16lx (%6ld MB actual)\n", MLM((unsigned long)phys_to_page(memblock_start_of_DRAM()), (unsigned long)virt_to_page(high_memory))); #endif pr_notice(" memory : 0x%16lx - 0x%16lx (%6ld MB)\n", MLM(__phys_to_virt(memblock_start_of_DRAM()), (unsigned long)high_memory));
#undef MLK #undef MLM #undef MLK_ROUNDUP
/* * Check boundaries twice: Some fundamental inconsistencies can be * detected at build time already. */ #ifdef CONFIG_COMPAT BUILD_BUG_ON(TASK_SIZE_32 > TASK_SIZE_64); #endif
/* * Make sure we chose the upper bound of sizeof(struct page) * correctly. */ BUILD_BUG_ON(sizeof(struct page) > (1 << STRUCT_PAGE_MAX_SHIFT));
if (PAGE_SIZE >= 16384 && get_num_physpages() <= 128) { extern int sysctl_overcommit_memory; /* * On a machine this small we won't get anywhere without * overcommit, so turn it on by default. */ sysctl_overcommit_memory = OVERCOMMIT_ALWAYS; } }
unsigned long __init free_all_bootmem(void) { unsigned long total_pages = 0; bootmem_data_t *bdata;
reset_all_zones_managed_pages();
list_for_each_entry(bdata, &bdata_list, list) total_pages += free_all_bootmem_core(bdata);
totalram_pages += total_pages;
return total_pages; }
static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) { struct page *page; unsigned long *map, start, end, pages, cur, count = 0;
if (!bdata->node_bootmem_map) return 0;
map = bdata->node_bootmem_map; start = bdata->node_min_pfn; end = bdata->node_low_pfn;
bdebug("nid=%td start=%lx end=%lx\n", bdata - bootmem_node_data, start, end);
while (start < end) { unsigned long idx, vec; unsigned shift;
idx = start - bdata->node_min_pfn; shift = idx & (BITS_PER_LONG - 1); /* * vec holds at most BITS_PER_LONG map bits, * bit 0 corresponds to start. */ vec = ~map[idx / BITS_PER_LONG];
if (shift) { vec >>= shift; if (end - start >= BITS_PER_LONG) vec |= ~map[idx / BITS_PER_LONG + 1] << (BITS_PER_LONG - shift); } /* * If we have a properly aligned and fully unreserved * BITS_PER_LONG block of pages in front of us, free * it in one go. */ if (IS_ALIGNED(start, BITS_PER_LONG) && vec == ~0UL) { int order = ilog2(BITS_PER_LONG);
__free_pages_bootmem(pfn_to_page(start), start, order); count += BITS_PER_LONG; start += BITS_PER_LONG; } else { cur = start;
start = ALIGN(start + 1, BITS_PER_LONG); while (vec && cur != start) { if (vec & 1) { page = pfn_to_page(cur); __free_pages_bootmem(page, cur, 0); count++; } vec >>= 1; ++cur; } } }
cur = bdata->node_min_pfn; page = virt_to_page(bdata->node_bootmem_map); pages = bdata->node_low_pfn - bdata->node_min_pfn; pages = bootmem_bootmap_pages(pages); count += pages; while (pages--) __free_pages_bootmem(page++, cur++, 0); bdata->node_bootmem_map = NULL;
bdebug("nid=%td released=%lx\n", bdata - bootmem_node_data, count);
return count; }
void __init __free_pages_bootmem(struct page *page, unsigned long pfn, unsigned int order) { if (early_page_uninitialised(pfn)) return; return __free_pages_boot_core(page, order); }
static void __init __free_pages_boot_core(struct page *page, unsigned int order) { unsigned int nr_pages = 1 << order; struct page *p = page; unsigned int loop;
prefetchw(p); for (loop = 0; loop < (nr_pages - 1); loop++, p++) { prefetchw(p + 1); __ClearPageReserved(p); set_page_count(p, 0); } __ClearPageReserved(p); set_page_count(p, 0);
page_zone(page)->managed_pages += nr_pages; set_page_refcounted(page); __free_pages(page, order); }
下面就是通用流程处理了,即对页释放处理。
void __free_pages(struct page *page, unsigned int order) { if (put_page_testzero(page)) { if (order == 0) free_hot_cold_page(page, false); else __free_pages_ok(page, order); } }
EXPORT_SYMBOL(__free_pages);