1 paging_init的主要工作
如名字所说的,paging_init要进行页面管理初始化。 众所周知,Buddy System是采用页管理的。
include/linux/mm_types.h
41 struct page {
42 /* First double word block */
43 unsigned long flags; /* Atomic flags, some possibly
44 * updated asynchronously */
45 struct address_space *mapping; /* If low bit clear, points to
46 * inode address_space, or NULL.
47 * If page mapped as anonymous
48 * memory, low bit is set, and
49 * it points to anon_vma object:
50 * see PAGE_MAPPING_ANON below.
51 */
52 /* Second double word */
53 struct {
54 union {
55 pgoff_t index; /* Our offset within mapping. */
56 void *freelist; /* slub/slob first free object */
57 bool pfmemalloc; /* If set by the page allocator,
58 * ALLOC_NO_WATERMARKS was set
59 * and the low watermark was not
60 * met implying that the system
61 * is under some pressure. The
62 * caller should try ensure
63 * this page is only used to
64 * free other pages.
65 */
66 };
67
68 union {
69 #if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
70 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
71 /* Used for cmpxchg_double in slub */
72 unsigned long counters;
73 #else
74 /*
75 * Keep _count separate from slub cmpxchg_double data.
76 * As the rest of the double word is protected by
77 * slab_lock but _count is not.
78 */
79 unsigned counters;
80 #endif
81
82 struct {
83
84 union {
85 /*
86 * Count of ptes mapped in
87 * mms, to show when page is
88 * mapped & limit reverse map
89 * searches.
90 *
91 * Used also for tail pages
92 * refcounting instead of
93 * _count. Tail pages cannot
94 * be mapped and keeping the
95 * tail page _count zero at
96 * all times guarantees
97 * get_page_unless_zero() will
98 * never succeed on tail
99 * pages.
100 */
101 atomic_t _mapcount;
102
103 struct { /* SLUB */
104 unsigned inuse:16;
105 unsigned objects:15;
106 unsigned frozen:1;
107 };
108 int units; /* SLOB */
109 };
110 atomic_t _count; /* Usage count, see below. */
111 };
112 };
113 };
114
115 /* Third double word block */
116 union {
117 struct list_head lru; /* Pageout list, eg. active_list
118 * protected by zone->lru_lock !
119 */
120 struct { /* slub per cpu partial pages */
121 struct page *next; /* Next partial slab */
122 #ifdef CONFIG_64BIT
123 int pages; /* Nr of partial slabs left */
124 int pobjects; /* Approximate # of objects */
125 #else
126 short int pages;
127 short int pobjects;
128 #endif
129 };
130
131 struct list_head list; /* slobs list of pages */
132 struct slab *slab_page; /* slab fields */
133 };
134
135 /* Remainder is not double word aligned */
136 union {
137 unsigned long private; /* Mapping-private opaque data:
138 * usually used for buffer_heads
139 * if PagePrivate set; used for
140 * swp_entry_t if PageSwapCache;
141 * indicates order in the buddy
142 * system if PG_buddy is set.
143 */
144 #if USE_SPLIT_PTLOCKS
145 spinlock_t ptl;
146 #endif
147 struct kmem_cache *slab_cache; /* SL[AU]B: Pointer to slab */
148 struct page *first_page; /* Compound tail pages */
149 };
150
151 /*
152 * On machines where all RAM is mapped into kernel address space,
153 * we can simply calculate the virtual address. On machines with
154 * highmem some memory is mapped into kernel virtual memory
155 * dynamically, so we need a place to store that address.
156 * Note that this field could be 16 bits on x86 ... ;)
157 *
158 * Architectures with slow multiplication can define
159 * WANT_PAGE_VIRTUAL in asm/page.h
160 */
161 #if defined(WANT_PAGE_VIRTUAL)
162 void *virtual; /* Kernel virtual address (NULL if
163 not kmapped, ie. highmem) */
164 #endif /* WANT_PAGE_VIRTUAL */
165 #ifdef CONFIG_WANT_PAGE_DEBUG_FLAGS
166 unsigned long debug_flags; /* Use atomic bitops on this */
167 #endif
168
169 #ifdef CONFIG_KMEMCHECK
170 /*
171 * kmemcheck wants to track the status of each byte in a page; this
172 * is a pointer to such a status block. NULL if not tracked.
173 */
174 void *shadow;
175 #endif
176
177 #ifdef LAST_NID_NOT_IN_PAGE_FLAGS
178 int _last_nid;
179 #endif
180 }
为了减少页表占用的内存数量,page 数据结构中采用了Union。
paging_init的工作,为每一页物理内存创建一个page数据结构,并初始化。
2 页表指针到PFN的转换
include/asm-generic/memory_model.h
#elif defined(CONFIG_SPARSEMEM_VMEMMAP)
/* memmap is virtually contiguous. */
#define __pfn_to_page(pfn) (vmemmap + (pfn))
#define __page_to_pfn(page) (unsigned long)((page) - vmemmap)
#define page_to_pfn __page_to_pfn
#define pfn_to_page __pfn_to_page
在arm64的编译中定义了CONFIG_SPARSEMEM_VMEMMAP,page_to_pfn和pfn_to_page非常简单了。
arch/arm64/include/asm/pgtable.h
#define VMALLOC_END (PAGE_OFFSET - UL(0x400000000) - SZ_64K)
#define vmemmap ((struct page *)(VMALLOC_END + SZ_64K))
vmemmap 定义了页表起始虚拟地址,为了防止越界,与VMALLOC_END 有64k的空隙。
3. paging_init实现
arch/arm64/mm/mmu.c
316 void __init paging_init(void)
317 {
318 void *zero_page;
319
320 /*
321 * Maximum PGDIR_SIZE addressable via the initial direct kernel
322 * mapping in swapper_pg_dir.
323 */
324 memblock_set_current_limit((PHYS_OFFSET & PGDIR_MASK) + PGDIR_SIZE);
325
326 init_mem_pgprot();
327 map_mem();
328
329 /*
330 * Finally flush the caches and tlb to ensure that we're in a
331 * consistent state.
332 */
333 flush_cache_all();
334 flush_tlb_all();
335
336 /* allocate the zero page. */
337 zero_page = early_alloc(PAGE_SIZE);
338
339 bootmem_init();
340
341 empty_zero_page = virt_to_page(zero_page);
342 __flush_dcache_page(empty_zero_page);
343
344 /*
345 * TTBR0 is only used for the identity mapping at this stage. Make it
346 * point to zero page to avoid speculatively fetching new entries.
347 */
348 cpu_set_reserved_ttbr0();
349 flush_tlb_all();
350 }
324行: 设置一个限制。
326行: 设置保护权限(PTE)
327行: 建立虚拟地址到物理地址的映射表
339行 : 为每个物理页面建立一个页表,初始化Zone区。
296 static void __init map_mem(void)
297 {
298 struct memblock_region *reg;
299
300 /* map all the memory banks */
301 for_each_memblock(memory, reg) {
302 phys_addr_t start = reg->base;
303 phys_addr_t end = start + reg->size;
304
305 if (start >= end)
306 break;
307
308 create_mapping(start, __phys_to_virt(start), end - start);
309 }
310 }
map_mem为每个内存区域建立虚拟到物理地址的映射表
227 /*
228 * Create the page directory entries and any necessary page tables for the
229 * mapping specified by 'md'.
230 */
231 static void __init create_mapping(phys_addr_t phys, unsigned long virt,
232 phys_addr_t size)
233 {
234 unsigned long addr, length, end, next;
235 pgd_t *pgd;
236
237 if (virt < VMALLOC_START) {
238 pr_warning("BUG: not creating mapping for 0x%016llx at 0x%016lx - outside kernel range\ n",
239 phys, virt);
240 return;
241 }
242
243 addr = virt & PAGE_MASK;
244 length = PAGE_ALIGN(size + (virt & ~PAGE_MASK));
245
246 pgd = pgd_offset_k(addr);
247 end = addr + length;
248 do {
249 next = pgd_addr_end(addr, end);
250 alloc_init_pud(pgd, addr, next, phys);
251 phys += next - addr;
252 } while (pgd++, addr = next, addr != end);
253 }