目录
ARM32 页表映射
使用单层段映射:
内存中有一个段映射表,4096 个表项,每个表项4B ,占用 4K *4 =16K (可以当作PGD ,地址位数[31~20] ),可以寻址1M 空间;PTE 有256 项,地址位数[19~12] ,对应空间大小 256 * 4K([11~0] 对应页4K) = 1M;
cpu 访问内存,虚拟地址高12位用于段(section)索引找到对应表项,每个表项提供12位的物理段地址,以及相应的标志位,如何读、写等标志位。这个12位物理地址和虚拟地址低20位一起得到32位物理地址。
采用页表映射,段映射表就变成一级映射表,称PGD,其表项提供的不再是物理段地址,而是二级页表的基地址;根据32 位虚拟地址的高12 位,确定PGD 一级页表的索引,确定对应的页表项,而页表项存储的是二级页表的地址,再根据32位虚拟地址的[19~12] 8 位确定在二级页表的索引,得到对应的二级页表中的页表项,这个页表项中找到20位的物理页面地址,再跟虚拟地址中低12位[11~0] 一起确定最终32 位物理地址。这个arm32中由mmu 硬件完成。
arm32 PGD 项填充与PUD 创建
include/asm/pgtable.h
#define PGDIR_SHIFT 21
#define PMD_SHIFT 21
#define PGDIR_SIZE (1UL << PGDIR_SHIFT )
kernel/msm-4.19/arch/arm/mm/mmu.c
static void __init __create_mapping(struct mm_struct *mm, struct map_desc *md,
912 void *(*alloc)(unsigned long sz),
913 bool ng)
914{
915 unsigned long addr, length, end;
916 phys_addr_t phys;
917 const struct mem_type *type;
918 pgd_t *pgd;
919
920 type = &mem_types[md->type];
921
922#ifndef CONFIG_ARM_LPAE
923 /*
924 * Catch 36-bit addresses
925 */
926 if (md->pfn >= 0x100000) {
927 create_36bit_mapping(mm, md, type, ng);
928 return;
929 }
930#endif
931
932 addr = md->virtual & PAGE_MASK;
933 phys = __pfn_to_phys(md->pfn);
934 length = PAGE_ALIGN(md->length + (md->virtual & ~PAGE_MASK));
935
936 if (type->prot_l1 == 0 && ((addr | phys | length) & ~SECTION_MASK)) {
937 pr_warn("BUG: map for 0x%08llx at 0x%08lx can not be mapped using pages, ignoring.\n",
938 (long long)__pfn_to_phys(md->pfn), addr);
939 return;
940 }
941
942 pgd = pgd_offset(mm, addr);
943 end = addr + length;
944 do {
945 unsigned long next = pgd_addr_end(addr, end);
946
947 alloc_init_pud(pgd, addr, next, phys, type, alloc, ng);
948
949 phys += next - addr;
950 addr = next;
951 } while (pgd++, addr != end);
952}
addr = md->virtual & PAGE_MASK;
phys = __pfn_to_phys(md->pfn); 物理页帧号到物理地址
length = PAGE_ALIGN(md->length + (md->virtual & ~PAGE_MASK)); 16K对齐
pgd = pgd_offset(mm, addr); 从init_mm.pgd 获取swapper_pg_dir ,即PGD 基地址。
unsigned long next = pgd_addr_end(addr, end); 以PGDRI_SIZE 为步长
alloc_init_pud(pgd, addr, next, phys, type, alloc, ng); 初始化PGD页表项和下一级PUD
arch/arm/kernel/head.S
#define KERNEL_RAM_VADDR (PAGE_OFFSET + TEXT_OFFSET)
#define PG_DIR_SIZE 0x4000
.globl swapper_pg_dir
.equ swapper_pg_dir, KERNEL_RAM_VADDR - PG_DIR_SIZE
arch/arm/Makefile
textofs-y := 0x00008000
TEXT_OFFSET := $(textofs-y)
这里由于配置arm32 用户空间跟内核空间为3G :1G
内核空间虚拟地址0xC000_0000 (3G) 开始 ,KERNEL_RAM_VADDR 为0xC0008000
PG_DIR_SIZE 这里0x4000 刚好等于16K ,也就是4096 个PGD 页表项对应的空间
swapper_pg_dir PGD页基址 就是 0xC000_4000
为何PGDIR_SHIFT 为21 不为20 呢?这是因为linux 默认是PGD [31~21],一共2048 项;但是ARM32硬件结构中PGD是从[31~20],所以就不一致。在early_pte_alloc 创建pte 即填充pte 页项时,创建了两个 512 PTE 页面项,其中一个512 就是为了兼容PMD少的一位,给了mmu 硬件,另外512 是给系统,从而可以标记对应页面的属性,比如自读、读写、弄脏等。
start_kernel->mm_init->mem_init
pr_notice("Virtual kernel memory layout:\n");
这里会将编译链接生成的kernel 镜像对应内存空间布局打印出来,地址是由arch/arm/kernel/vmlinux.ld.S 控制;kernel 镜像由_text和_end 来确定范围,_text 每次编译总是确定的,对应的编译链接宏System.map 里面可以查看。
arm32系统将物理地址[0:760M]内存线性映射到[3G:3G+760M]虚拟地址上。线性物理地址跟虚拟地址差PAGE_OFFSET = 3G
__virt_to_phys(x) {
x-PAGE_OFFSET+PHYS_OFFSET
}
__phys_to_virt(x){
x+PAGE_OFFSET-PHYS_OFFSET
}
高端内存是从[760M:1024M] ,即vmalloc_min = arm_lowmem_limit = 0x2f80_0000=760M
VMALLOC_START和VMALLOC_END 对应[0xf000_0000:0xff00_0000],大小240M
vmalloc_min到VMALLOC_START对应[0xEF80_0000:0xf000_0000] 共8M 用于捕获越界访问
高端内存用于vmalloc 、fixmap 、I/O设备 、外设映射等
Arm64 内存
加载解析fdt 存储信息
start_kernel()-》setup_arch()-》setup_machine_fdt()-》 early_init_dt_scan_memory()
通过fdt 获取存储信息,base_address和size,再通过early_init_dt_add_memory_arch(base, size); 加入memblock 子系统。
kaslr和页表映射
kernel-4.19/arch/arm64/mm/init.c
659#define MLK(b, t) b, t, ((t) - (b)) >> 10
660#define MLM(b, t) b, t, ((t) - (b)) >> 20
661#define MLG(b, t) b, t, ((t) - (b)) >> 30
662#define MLK_ROUNDUP(b, t) b, t, DIV_ROUND_UP(((t) - (b)), SZ_1K)
663
664 pr_notice("Virtual kernel memory layout:\n");
665#ifdef CONFIG_KASAN
666 pr_notice(" kasan : 0x%16lx - 0x%16lx (%6ld GB)\n",
667 MLG(KASAN_SHADOW_START, KASAN_SHADOW_END));
668#endif
669 pr_notice(" modules : 0x%16lx - 0x%16lx (%6ld MB)\n",
670 MLM(MODULES_VADDR, MODULES_END));
671 pr_notice(" vmalloc : 0x%16lx - 0x%16lx (%6ld GB)\n",
672 MLG(VMALLOC_START, VMALLOC_END));
673 pr_notice(" .text : 0x%p" " - 0x%p" " (%6ld KB)\n",
674 MLK_ROUNDUP(_text, _etext));
675 pr_notice(" .rodata : 0x%p" " - 0x%p" " (%6ld KB)\n",
676 MLK_ROUNDUP(__start_rodata, __init_begin));
677 pr_notice(" .init : 0x%p" " - 0x%p" " (%6ld KB)\n",
678 MLK_ROUNDUP(__init_begin, __init_end));
679 pr_notice(" .data : 0x%p" " - 0x%p" " (%6ld KB)\n",
680 MLK_ROUNDUP(_sdata, _edata));
681 pr_notice(" .bss : 0x%p" " - 0x%p" " (%6ld KB)\n",
682 MLK_ROUNDUP(__bss_start, __bss_stop));
683 pr_notice(" fixed : 0x%16lx - 0x%16lx (%6ld KB)\n",
684 MLK(FIXADDR_START, FIXADDR_TOP));
685 pr_notice(" PCI I/O : 0x%16lx - 0x%16lx (%6ld MB)\n",
686 MLM(PCI_IO_START, PCI_IO_END));
687#ifdef CONFIG_SPARSEMEM_VMEMMAP
688 pr_notice(" vmemmap : 0x%16lx - 0x%16lx (%6ld GB maximum)\n",
689 MLG(VMEMMAP_START, VMEMMAP_START + VMEMMAP_SIZE));
690 pr_notice(" 0x%16lx - 0x%16lx (%6ld MB actual)\n",
691 MLM((unsigned long)phys_to_page(memblock_start_of_DRAM()),
692 (unsigned long)virt_to_page(high_memory)));
693#endif
694 pr_notice(" memory : 0x%16lx - 0x%16lx (%6ld MB)\n",
695 MLM(__phys_to_virt(memblock_start_of_DRAM()),
696 (unsigned long)high_memory));
kernel-4.19/arch/arm64/include/asm/pgtable.h
/*
27 * VMALLOC range.
28 *
29 * VMALLOC_START: beginning of the kernel vmalloc space
30 * VMALLOC_END: extends to the available space below vmmemmap, PCI I/O space
31 * and fixed mappings
32 */
33#define VMALLOC_START (MODULES_END)
34#define VMALLOC_END (PAGE_OFFSET - PUD_SIZE - VMEMMAP_SIZE - SZ_64K)
35
36#define vmemmap ((struct page *)VMEMMAP_START - (memstart_addr >> PAGE_SHIFT))
37
38#define FIRST_USER_ADDRESS 0UL
这里VMALLOC_START 跟 KIMAGE_VADDR 一样
kernel-4.19/arch/arm64/kernel/head.S
/*
377 * Map the kernel image (starting with PHYS_OFFSET).
378 */
379 adrp x0, swapper_pg_dir
380 mov_q x5, KIMAGE_VADDR + TEXT_OFFSET // compile time __va(_text)
381 add x5, x5, x23 // add KASLR displacement
382 mov x4, PTRS_PER_PGD
383 adrp x6, _end // runtime __pa(_end)
384 adrp x3, _text // runtime __pa(_text)
385 sub x6, x6, x3 // _end - _text
386 add x6, x6, x5 // runtime __va(_end)
map_memory x0, x1, x5, x6, x7, x3, x4, x10, x11, x12, x13, x14
map_memory 是将虚拟地址x5 ,长度x6 映射到物理地址x3 开始的位置;其中x0 页表地址,x1 第一个页表项,通常x1=x0+PAGE_SIZE;x4 表示对应页表等级由多少项。这里是将内核代码段.text 从虚拟地址KIMAGE_VADDR + TEXT_OFFSET+KASLR 偏移 对应虚拟地址映射到物理地址_text 对应连续物理地址。
vmlinux 对应的是编译链接地址;coredump 及内核堆栈对应的是运行时地址;
支持kaslr之前,kernel加载到system RAM的某个位置,它之前的内存kernel是无法管理的,所以一般将kernel加载到system RAM的 起始位置+TEXT_OFFSET(0x080000)处,因为kaslr修改成可以随意加载到system RAM的任何位置,只要满足对齐要求就可以;
支持kaslr之前,kernel image是映射到线性映射区域的(4.15 之前),因为kaslr才修改成映射到vmalloc区域;
为了支持kaslr,内核要编译成PIE(Position Independent Execuable),才能重映射
这样.text 其实位置跟 VMALLOC区其实地址有一个偏移
add_link = addr_run - (VAMLLOC_START - .text_start) + TEXT_OFFSET ???
add_link 是addr2line 使用,addr_run 是虚拟地址,运行时堆栈地址。 .text_start 是load物理地址???
MTK 平台:
static inline void show_kaslr(void) 55{ 56 u64 const kaslr_offset = aee_get_kimage_vaddr() - KIMAGE_VADDR; 57 58 pr_notice("Kernel Offset: 0x%llx from 0x%lx\n", 59 kaslr_offset, KIMAGE_VADDR); 60 pr_notice("PHYS_OFFSET: 0x%llx\n", PHYS_OFFSET); 61 aee_rr_rec_kaslr_offset(kaslr_offset); 62}
aee_get_kimage_vaddr 从coredump 里面读取kimage_vaddr 对应地址
9#if defined(KIMAGE_VADDR)
90 machdesc_p->kimage_vaddr = KIMAGE_VADDR;
91#endif
92#if defined(TEXT_OFFSET)
93 machdesc_p->kimage_vaddr += TEXT_OFFSET;
94#endif
kernel-4.19/arch/arm64/include/asm/memory.h
35#define PCI_IO_SIZE SZ_16M
36
37/*
38 * Log2 of the upper bound of the size of a struct page. Used for sizing
39 * the vmemmap region only, does not affect actual memory footprint.
40 * We don't use sizeof(struct page) directly since taking its size here
41 * requires its definition to be available at this point in the inclusion
42 * chain, and it may not be a power of 2 in the first place.
43 */
44#define STRUCT_PAGE_MAX_SHIFT 6
45
46/*
47 * VMEMMAP_SIZE - allows the whole linear region to be covered by
48 * a struct page array
49 */
50#define VMEMMAP_SIZE (UL(1) << (VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT))
51
52/*
53 * PAGE_OFFSET - the virtual address of the start of the linear map (top
54 * (VA_BITS - 1))
55 * KIMAGE_VADDR - the virtual address of the start of the kernel image
56 * VA_BITS - the maximum number of bits for virtual addresses.
57 * VA_START - the first kernel virtual address.
58 */
59#define VA_BITS (CONFIG_ARM64_VA_BITS)
60#define VA_START (UL(0xffffffffffffffff) - \
61 (UL(1) << VA_BITS) + 1)
62#define PAGE_OFFSET (UL(0xffffffffffffffff) - \
63 (UL(1) << (VA_BITS - 1)) + 1)
64#define KIMAGE_VADDR (MODULES_END)
65#define MODULES_END (MODULES_VADDR + MODULES_VSIZE)
66#define MODULES_VADDR (VA_START + KASAN_SHADOW_SIZE)
67#define MODULES_VSIZE (SZ_128M)
68#define VMEMMAP_START (PAGE_OFFSET - VMEMMAP_SIZE)
69#define PCI_IO_END (VMEMMAP_START - SZ_2M)
70#define PCI_IO_START (PCI_IO_END - PCI_IO_SIZE)
71#define FIXADDR_TOP (PCI_IO_START - SZ_2M)
72
73#define KERNEL_START _text
74#define KERNEL_END _end
内核虚拟地址起点:VA_START = 0xffff_0000_0000_0000
PAGE_OFFSET =0xffff_1000_0000_0000
PAGE_OFFSET - the virtual address of the start of the linear map (top(VA_BITS - 1))
KIMAGE_VADDR - the virtual address of the start of the kernel image
这里MODULES_VSIZE = 128M = 0x8000000
kernel-4.19/arch/arm64/Makefile
90# The byte offset of the kernel image in RAM from the start of RAM.
91ifeq ($(CONFIG_ARM64_RANDOMIZE_TEXT_OFFSET), y)
92TEXT_OFFSET := $(shell awk "BEGIN {srand(); printf \"0x%06x\n\", \
93 int(2 * 1024 * 1024 / (2 ^ $(CONFIG_ARM64_PAGE_SHIFT)) * \
94 rand()) * (2 ^ $(CONFIG_ARM64_PAGE_SHIFT))}")
95else
96TEXT_OFFSET := 0x00080000
这里CONFIG_ARM64_PAGE_SHIFT 页大小 12 位
User space 地址mmu转换示例:
基本概念介绍:
task_struct->mm
如果是用户进程,指向当前的进程地址空间。
如果是内核线程,为空(内核线程没有进程地址空间)。
task_struct->active_mm
如果是用户进程,mm与active_mm相同,都指向进程的地址空间。
如果是内核线程,指向被借用的用户进程的地址空间(mm)。
user space各个process 保存自己独立的pgd,存放在task__struct->mm->pgd里面,每次做context switch时,会把next_task的pgd存放到TTBR0_EL0里面,从而实现不同process不同的地址空间。
TTBR0_EL1 对应内核pgd
cr3寄存器的加载
cr3寄存器的加载是在进程调度的时候更新的,具体如下
schedule()->context_switch()->switch_mm()->load_cr3(next->pgd)
load_cr3加载的是mm_struct->pgd,即线性地址,而实际上加裁到cr3寄存器的是实际的物理地址write_cr3(__pa(pgdir));在装载cr3寄存器时将线性地址通过__pa转换成了物理地址了,所以cr3寄存器是装的是实实在在的物理地址。正在使用的页目录的物理地址存在cr3控制寄存器中
假设页表映射层级是4,即配置CONFIG_ARM64_PGTABLE_LEVELS=4。地址宽度是48,即配置CONFIG_ARM64_VA_BITS=48,页大小4K,每个页表项占 8字节
PGD [47,39] 512*512G=256T
PUD [38,30] 512G
PMD [29,21] 512*2M = 1G
PTE [20,12] 4K/8=512 项,512*4K = 2M
PAGE_SHIFT [11~0]
当bit[63] 为1 表面是内核空间地址,页表的基地址寄存器用TTBR1_EL1 (Translation Table Base Register 1);如果为0 ,表示用户空间地址使用TTBR0 ;寄存器保存了PGD页表基地址(Table base address)
PTE 页表项含有最终的物理地址[47~12],再跟虚拟地址[11~0]合并成最终的物理地址。
48位 , 3级页表,页4K 大小
创建内核页表
start_kernel -> setup_arch ->paging_init ->map_mem -> __map_memblock ->create_mapping
PTE 2M * 8 =16 M
PMD 64 G
PGD 64*8 = 512 G
kernel-4.19/arch/arm64/include/asm/pgtable-hwdef.h
*/ 16#ifndef __ASM_PGTABLE_HWDEF_H 17#define __ASM_PGTABLE_HWDEF_H 18 19#include <asm/memory.h> 20 21/* 22 * Number of page-table levels required to address 'va_bits' wide 23 * address, without section mapping. We resolve the top (va_bits - PAGE_SHIFT) 24 * bits with (PAGE_SHIFT - 3) bits at each page table level. Hence: 25 * 26 * levels = DIV_ROUND_UP((va_bits - PAGE_SHIFT), (PAGE_SHIFT - 3)) 27 * 28 * where DIV_ROUND_UP(n, d) => (((n) + (d) - 1) / (d)) 29 * 30 * We cannot include linux/kernel.h which defines DIV_ROUND_UP here 31 * due to build issues. So we open code DIV_ROUND_UP here: 32 * 33 * ((((va_bits) - PAGE_SHIFT) + (PAGE_SHIFT - 3) - 1) / (PAGE_SHIFT - 3)) 34 * 35 * which gets simplified as : 36 */ 37#define ARM64_HW_PGTABLE_LEVELS(va_bits) (((va_bits) - 4) / (PAGE_SHIFT - 3)) 38 39/* 40 * Size mapped by an entry at level n ( 0 <= n <= 3) 41 * We map (PAGE_SHIFT - 3) at all translation levels and PAGE_SHIFT bits 42 * in the final page. The maximum number of translation levels supported by 43 * the architecture is 4. Hence, starting at at level n, we have further 44 * ((4 - n) - 1) levels of translation excluding the offset within the page. 45 * So, the total number of bits mapped by an entry at level n is : 46 * 47 * ((4 - n) - 1) * (PAGE_SHIFT - 3) + PAGE_SHIFT 48 * 49 * Rearranging it a bit we get : 50 * (4 - n) * (PAGE_SHIFT - 3) + 3 51 */ 52#define ARM64_HW_PGTABLE_LEVEL_SHIFT(n) ((PAGE_SHIFT - 3) * (4 - (n)) + 3) 53 54#define PTRS_PER_PTE (1 << (PAGE_SHIFT - 3)) 55 56/* 57 * PMD_SHIFT determines the size a level 2 page table entry can map. 58 */ 59#if CONFIG_PGTABLE_LEVELS > 2 60#define PMD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(2) 61#define PMD_SIZE (_AC(1, UL) << PMD_SHIFT) 62#define PMD_MASK (~(PMD_SIZE-1)) 63#define PTRS_PER_PMD PTRS_PER_PTE 64#endif 65 66/* 67 * PUD_SHIFT determines the size a level 1 page table entry can map. 68 */ 69#if CONFIG_PGTABLE_LEVELS > 3 70#define PUD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(1) 71#define PUD_SIZE (_AC(1, UL) << PUD_SHIFT) 72#define PUD_MASK (~(PUD_SIZE-1)) 73#define PTRS_PER_PUD PTRS_PER_PTE 74#endif 75 76/* 77 * PGDIR_SHIFT determines the size a top-level page table entry can map 78 * (depending on the configuration, this level can be 0, 1 or 2). 79 */ 80#define PGDIR_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - CONFIG_PGTABLE_LEVELS) 81#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) 82#define PGDIR_MASK (~(PGDIR_SIZE-1)) 83#define PTRS_PER_PGD (1 << (VA_BITS - PGDIR_SHIFT)) 84 85/* 86 * Section address mask and size definitions. 87 */ 88#define SECTION_SHIFT PMD_SHIFT 89#define SECTION_SIZE (_AC(1, UL) << SECTION_SHIFT) 90#define SECTION_MASK (~(SECTION_SIZE-1))
当配置CONFIG_PGTABLE_LEVELS为4 ,则为4级页表
PGDIR_SHIFT = ARM64_HW_PGTABLE_LEVEL_SHIFT(0) = 39 ,表示VA 中除了本级页表地址,还有39 位表示其它级地址
PTRS_PER_PGD = (1 << (VA_BITS - PGDIR_SHIFT)) = 1<<9
虚拟地址到物理地址转换
virt_to_phys和phys_to_virt
内核虚拟地址起点:VA_START = 0xffff_0000_0000_0000
PAGE_OFFSET =0xffff_1000_0000_0000
PAGE_OFFSET - the virtual address of the start of the linear map (top(VA_BITS - 1))
对于48位虚拟地址,从PAGE_OFFSET 开始的往大地址的区域是线性区域,跟物理地址就是一个PHYS_OFFSET 偏差;如果不是线性区域,这个时候是 kimage_voffset 偏移;
- #define PHYS_OFFSET ({ VM_BUG_ON(memstart_addr & 1); memstart_addr; })
- #define __is_lm_address(addr) (!!((addr) & BIT(VA_BITS - 1)))
- #define __lm_to_phys(addr) (((addr) & ~PAGE_OFFSET) + PHYS_OFFSET)
- #define __kimg_to_phys(addr) ((addr) - kimage_voffset)
- #define __virt_to_phys_nodebug(x) ({ \
- phys_addr_t __x = (phys_addr_t)(x); \
- __is_lm_address(__x) ? __lm_to_phys(__x) : \__kimg_to_phys(__x); \
- #define __virt_to_phys(x) __virt_to_phys_nodebug(x)
- static inline phys_addr_t virt_to_phys(const volatile void *x)
- {
- return __virt_to_phys((unsigned long)(x));
- }
kimage_voffset 的获取:
- #define KERNEL_START _text
- #define __PHYS_OFFSET (KERNEL_START - TEXT_OFFSET)
- ENTRY(kimage_vaddr)
- .quad _text - TEXT_OFFSET
- /*
- * The following fragment of code is executed with the MMU enabled.
- *
- * x0 = __PHYS_OFFSET
- */
- __primary_switched:
- ldr_l x4, kimage_vaddr // Save the offset between /* 2 */
- sub x4, x4, x0 // the kernel virtual and /* 3 */
- str_l x4, kimage_voffset, x5 // physical mappings /* 4 */
- b start_kernel
- __primary_switch:
- bl __enable_mmu
- ldr x8, =__primary_switched
- adrp x0, __PHYS_OFFSET /* 1 */
- br x8
__primary_switch 这里获取的是MMU 没有打开时的_text 链接的地址(相对VMALLOC有一个偏移),加载地址跟链接地址一样;
__primary_switched x4 获取是运行时_text 运行虚拟地址,这个时候运行地址跟加载地址不一样,从而x4-x0 就是运行虚拟地址跟加载地址的一个偏移存入kimage_voffset。
fixmap & 页表
这里是start_kernel 之前使用fixmap 映射dtb 等,那个时候也填充了页表
device tree 简介 - 灰信网(软件开发博客聚合)
memblock
linux早期内存管理:memblock完全介绍_加油2019的博客-CSDN博客